From 6fbfdd971ed162a2e3c2d99e15a5bb8d5089a20c Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 1 Apr 2025 14:00:08 -0700 Subject: [PATCH 01/23] Add a sample resource locking workflow --- resource_locking/README.md | 31 ++++++++ resource_locking/__init__.py | 0 resource_locking/load_workflow.py | 80 ++++++++++++++++++++ resource_locking/sem_workflow.py | 120 ++++++++++++++++++++++++++++++ resource_locking/starter.py | 49 ++++++++++++ resource_locking/worker.py | 28 +++++++ 6 files changed, 308 insertions(+) create mode 100644 resource_locking/README.md create mode 100644 resource_locking/__init__.py create mode 100644 resource_locking/load_workflow.py create mode 100644 resource_locking/sem_workflow.py create mode 100644 resource_locking/starter.py create mode 100644 resource_locking/worker.py diff --git a/resource_locking/README.md b/resource_locking/README.md new file mode 100644 index 00000000..66f7d160 --- /dev/null +++ b/resource_locking/README.md @@ -0,0 +1,31 @@ +# Semaphore Sample + +This sample shows how to use a long-lived `semaphore_workflow` to ensure that each `resource` is used by at most one +`load_workflow` at a time. `load_workflow` runs several activities while it has ownership of a resource. + +Run the following from this directory to start the worker: + + poetry run python worker.py + +This will start the worker. Then, in another terminal, run the following to execute several load workflows: + + poetry run python starter.py + +You should see output indicating that the semaphore workflow serialized access to each resource. + +You can query the set of current lock holders with: + + tctl wf query -w semaphore --qt get_current_holders + +# Caveats + +This sample uses true locking (not leasing!) to avoid complexity and scaling concerns associated with heartbeating via +signals. Locking carries the risk of a "leak" (failure to unlock) permanently removing a resource from the pool. With +Temporal's durabile execution guarantees, this can only happen if: + +- A LoadWorkflow times out (prohibited in the sample code) +- You shut down your workers, and never restart them (unhandled, but probably irrelevant) + +If a leak were to happen in the wild, you could discover the identity of the leaker using the query above, then: + + ~/Code/tctl/tctl wf signal -w semaphore --name release_resource --input '{ "resource": "the resource", "workflow_id": "holder workflow id", "run_id": "holder run id" }' \ No newline at end of file diff --git a/resource_locking/__init__.py b/resource_locking/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/resource_locking/load_workflow.py b/resource_locking/load_workflow.py new file mode 100644 index 00000000..ec00c369 --- /dev/null +++ b/resource_locking/load_workflow.py @@ -0,0 +1,80 @@ +import asyncio +from dataclasses import dataclass +from datetime import timedelta +from typing import Optional + +from temporalio import activity, workflow +from temporalio.client import Client + +from resource_locking.sem_workflow import AssignedResource, SEMAPHORE_WORKFLOW_ID, \ + ReleaseRequest, AcquireRequest, SemaphoreWorkflowInput, SEMAPHORE_WORKFLOW_TYPE + + +@dataclass +class LoadActivityInput: + resource: str + iteration: str + +@activity.defn +async def load(input: LoadActivityInput) -> None: + workflow_id = activity.info().workflow_id + print(f"Workflow {workflow_id} starts using {input.resource} the {input.iteration} time") + await asyncio.sleep(5) + print(f"Workflow {workflow_id} finishes using {input.resource} the {input.iteration} time") + +@dataclass +class LoadWorkflowInput: + iteration_to_fail_after: Optional[str] + +class FailWorkflowException(Exception): + pass + +MAX_RESOURCE_WAIT_TIME = timedelta(minutes=5) + +@workflow.defn( + failure_exception_types=[FailWorkflowException] +) +class LoadWorkflow: + + def __init__(self): + self.assigned_resource = None + + @workflow.signal(name="assign_resource") + def handle_assign_resource(self, input: AssignedResource): + self.assigned_resource = input.resource + + @workflow.run + async def run(self, input: LoadWorkflowInput): + if workflow.info().run_timeout is not None: + # See "locking" comment below for rationale + raise FailWorkflowException(f"LoadWorkflow cannot have a run_timeout") + if workflow.info().execution_timeout is not None: + raise FailWorkflowException(f"LoadWorkflow cannot have an execution_timeout") + + sem_handle = workflow.get_external_workflow_handle(SEMAPHORE_WORKFLOW_ID) + + # Ask for a resource... + info = workflow.info() + await sem_handle.signal("acquire_resource", AcquireRequest(info.workflow_id, info.run_id)) + + # ...and wait for the answer + await workflow.wait_condition(lambda: self.assigned_resource is not None, timeout=MAX_RESOURCE_WAIT_TIME) + if self.assigned_resource is None: + raise FailWorkflowException(f"No resource was assigned after {MAX_RESOURCE_WAIT_TIME}") + + # From this point forward, we own the resource. Note that this is a lock, not a lease! Our finally block needs + # to run to free up the resource if an activity fails. This is why we asserted the lack of workflow-level + # timeouts above - they would prevent the finally block from running if there was a timeout. + try: + for iteration in ["first", "second", "third"]: + await workflow.execute_activity( + load, + LoadActivityInput(self.assigned_resource, iteration), + start_to_close_timeout=timedelta(seconds=10), + ) + + if iteration == input.iteration_to_fail_after: + workflow.logger.info(f"Failing after iteration {input.iteration_to_fail_after}") + raise FailWorkflowException() + finally: + await sem_handle.signal("release_resource", ReleaseRequest(self.assigned_resource, info.workflow_id, info.run_id)) \ No newline at end of file diff --git a/resource_locking/sem_workflow.py b/resource_locking/sem_workflow.py new file mode 100644 index 00000000..56cf8c69 --- /dev/null +++ b/resource_locking/sem_workflow.py @@ -0,0 +1,120 @@ +from dataclasses import dataclass +from datetime import datetime, timedelta + +from temporalio import workflow + +@dataclass +class AssignedResource: + resource: str + +@dataclass +class AcquireRequest: + workflow_id: str + run_id: str + +@dataclass +class ReleaseRequest: + resource: str + workflow_id: str + run_id: str + +@dataclass +class HandoffRequest: + resource: str + workflow_id: str + old_run_id: str + new_run_id: str + +SEMAPHORE_WORKFLOW_TYPE = "semaphore-wf" +SEMAPHORE_WORKFLOW_ID = "semaphore" + +@dataclass +class SemaphoreWorkflowInput: + # Key is resource, value is users of the resource. The first item in each list is the current holder of the lease + # on that resource. + resource_queues: dict[str, list[AcquireRequest]] + +@workflow.defn( + name=SEMAPHORE_WORKFLOW_TYPE, +) +class SemaphoreWorkflow: + def __init__(self): + self.resource_queues: dict[str, list[AcquireRequest]] = {} + + @workflow.signal + async def acquire_resource(self, request: AcquireRequest): + for resource in self.resource_queues: + # Naively give out the first free resource, if we have one + if len(self.resource_queues[resource]) == 0: + self.resource_queues[resource].append(request) + requester = workflow.get_external_workflow_handle(request.workflow_id, run_id=request.run_id) + await requester.signal("assign_resource", AssignedResource(resource)) + return + + # Otherwise put this resource in a random queue + resource = workflow.random().choice(list(self.resource_queues.keys())) + self.resource_queues[resource].append(request) + + @workflow.signal + async def release_resource(self, request: ReleaseRequest): + queue = self.resource_queues[request.resource] + if queue is None: + workflow.logger.warning(f"Ignoring request from {request.workflow_id} to release non-existent resource: {request.resource}") + return + + if len(queue) == 0: + workflow.logger.warning(f"Ignoring request from {request.workflow_id} to release resource that is not held: {request.resource}") + return + + holder = queue[0] + if not (holder.workflow_id == request.workflow_id and holder.run_id == request.run_id): + workflow.logger.warning(f"Ignoring request from non-holder to release resource {request.resource}") + workflow.logger.warning(f"resource is currently held by wf_id={holder.workflow_id} run_id={holder.run_id}") + workflow.logger.warning(f"request was from wf_id={request.workflow_id} run_id={request.run_id}") + return + + # Remove the current holder from the head of the queue + queue = queue[1:] + self.resource_queues[request.resource] = queue + + # If there are queued requests, assign the resource to the next one + if len(queue) > 0: + next_holder = queue[0] + requester = workflow.get_external_workflow_handle(next_holder.workflow_id, run_id=next_holder.run_id) + await requester.signal("assign_resource", AssignedResource(request.resource)) + + @workflow.signal + async def handoff_resource(self, request: HandoffRequest): + queue = self.resource_queues[request.resource] + if queue is None: + workflow.logger.warning(f"Ignoring request from {request.workflow_id} to hand off non-existent resource: {request.resource}") + return + + if len(queue) == 0: + workflow.logger.warning(f"Ignoring request from {request.workflow_id} to hand off resource that is not held: {request.resource}") + return + + holder = queue[0] + if not (holder.workflow_id == request.workflow_id and holder.run_id == request.old_run_id): + workflow.logger.warning(f"Ignoring request from non-holder to hand off resource {request.resource}") + workflow.logger.warning(f"resource is currently held by wf_id={holder.workflow_id} run_id={holder.run_id}") + workflow.logger.warning(f"request was from wf_id={request.workflow_id} run_id={request.old_run_id}") + return + + queue[0] = AcquireRequest(request.workflow_id, request.new_run_id) + + @workflow.query + def get_current_holders(self) -> dict[str, AcquireRequest]: + return { k: v[0] if v else None for k, v in self.resource_queues.items() } + + @workflow.run + async def run(self, input: SemaphoreWorkflowInput) -> None: + self.resource_queues = input.resource_queues + + # Continue as new either when temporal tells us to, or every 12 hours (so it occurs semi-frequently) + await workflow.wait_condition( + lambda: workflow.info().is_continue_as_new_suggested(), + timeout=timedelta(hours=12), + ) + + workflow.continue_as_new(SemaphoreWorkflowInput(self.resource_queues)) \ No newline at end of file diff --git a/resource_locking/starter.py b/resource_locking/starter.py new file mode 100644 index 00000000..2e7004f5 --- /dev/null +++ b/resource_locking/starter.py @@ -0,0 +1,49 @@ +import asyncio +from typing import Optional, Any + +from temporalio.client import Client, WorkflowHandle, WorkflowFailureError + +from resource_locking.load_workflow import LoadWorkflow, LoadWorkflowInput, FailWorkflowException +from resource_locking.sem_workflow import SemaphoreWorkflow, SemaphoreWorkflowInput, SEMAPHORE_WORKFLOW_ID + + +async def main(): + # Connect client + client = await Client.connect("localhost:7233") + + # Run the semaphore workflow + sem_handle = await client.start_workflow( + workflow=SemaphoreWorkflow.run, + arg=SemaphoreWorkflowInput({ + "resource_a": [], + "resource_b": [], + }), + id=SEMAPHORE_WORKFLOW_ID, + task_queue="default", + ) + + load_handles: list[WorkflowHandle[Any, Any]] = [] + for i in range(0, 4): + input = LoadWorkflowInput(iteration_to_fail_after=None) + if i == 1: + input = LoadWorkflowInput(iteration_to_fail_after="first") + + load_handle = await client.start_workflow( + workflow=LoadWorkflow.run, + arg=input, + id=f"load-workflow-{i}", + task_queue="default", + ) + load_handles.append(load_handle) + + for load_handle in load_handles: + try: + await load_handle.result() + except WorkflowFailureError: + pass + + await sem_handle.terminate() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/resource_locking/worker.py b/resource_locking/worker.py new file mode 100644 index 00000000..8e72b9a7 --- /dev/null +++ b/resource_locking/worker.py @@ -0,0 +1,28 @@ +import asyncio +import logging + +from temporalio.client import Client +from temporalio.worker import Worker + +from resource_locking.load_workflow import LoadWorkflow, load +from resource_locking.sem_workflow import SemaphoreWorkflow + +async def main(): + # Uncomment the line below to see logging + logging.basicConfig(level=logging.INFO) + + # Start client + client = await Client.connect("localhost:7233") + + # Run a worker for the workflow + worker = Worker( + client, + task_queue="default", + workflows=[SemaphoreWorkflow, LoadWorkflow], + activities=[load], + ) + + await worker.run() + +if __name__ == "__main__": + asyncio.run(main()) From 00c1d4d8a58e8a46e17a8c4399167717f623d2df Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 1 Apr 2025 14:03:38 -0700 Subject: [PATCH 02/23] Fix a code snippet in README.md --- resource_locking/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource_locking/README.md b/resource_locking/README.md index 66f7d160..d8bed74e 100644 --- a/resource_locking/README.md +++ b/resource_locking/README.md @@ -28,4 +28,4 @@ Temporal's durabile execution guarantees, this can only happen if: If a leak were to happen in the wild, you could discover the identity of the leaker using the query above, then: - ~/Code/tctl/tctl wf signal -w semaphore --name release_resource --input '{ "resource": "the resource", "workflow_id": "holder workflow id", "run_id": "holder run id" }' \ No newline at end of file + tctl wf signal -w semaphore --name release_resource --input '{ "resource": "the resource", "workflow_id": "holder workflow id", "run_id": "holder run id" }' \ No newline at end of file From cd31a6114c1129c5640166aa18f00cd6bd74a93b Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 1 Apr 2025 16:09:31 -0700 Subject: [PATCH 03/23] Clean up imports, exercise continue_as_new behavior --- resource_locking/load_workflow.py | 57 +++++++++++++++++++++++-------- resource_locking/sem_workflow.py | 17 +++++++-- resource_locking/starter.py | 10 +++--- 3 files changed, 62 insertions(+), 22 deletions(-) diff --git a/resource_locking/load_workflow.py b/resource_locking/load_workflow.py index ec00c369..00f3d707 100644 --- a/resource_locking/load_workflow.py +++ b/resource_locking/load_workflow.py @@ -4,10 +4,9 @@ from typing import Optional from temporalio import activity, workflow -from temporalio.client import Client from resource_locking.sem_workflow import AssignedResource, SEMAPHORE_WORKFLOW_ID, \ - ReleaseRequest, AcquireRequest, SemaphoreWorkflowInput, SEMAPHORE_WORKFLOW_TYPE + ReleaseRequest, AcquireRequest, HandoffRequest @dataclass @@ -24,20 +23,31 @@ async def load(input: LoadActivityInput) -> None: @dataclass class LoadWorkflowInput: + # If set, this workflow will fail after the "first", "second", or "third" activity. iteration_to_fail_after: Optional[str] + # If True, this workflow will continue as new after the third activity. The next iteration will run three more + # activities, but will not continue as new. This lets us exercise the handoff logic. + should_continue_as_new: bool + + # Used to transfer resource ownership between iterations during continue_as_new + already_owned_resource: Optional[str] + class FailWorkflowException(Exception): pass MAX_RESOURCE_WAIT_TIME = timedelta(minutes=5) +def has_timeout(timeout: Optional[timedelta]) -> bool: + return timeout is not None and timeout > timedelta(0) + @workflow.defn( failure_exception_types=[FailWorkflowException] ) class LoadWorkflow: def __init__(self): - self.assigned_resource = None + self.assigned_resource: Optional[str] = None @workflow.signal(name="assign_resource") def handle_assign_resource(self, input: AssignedResource): @@ -45,26 +55,31 @@ def handle_assign_resource(self, input: AssignedResource): @workflow.run async def run(self, input: LoadWorkflowInput): - if workflow.info().run_timeout is not None: + workflow.info() + if has_timeout(workflow.info().run_timeout): # See "locking" comment below for rationale - raise FailWorkflowException(f"LoadWorkflow cannot have a run_timeout") - if workflow.info().execution_timeout is not None: - raise FailWorkflowException(f"LoadWorkflow cannot have an execution_timeout") + raise FailWorkflowException(f"LoadWorkflow cannot have a run_timeout (found {workflow.info().run_timeout})") + if has_timeout(workflow.info().execution_timeout): + raise FailWorkflowException(f"LoadWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout})") sem_handle = workflow.get_external_workflow_handle(SEMAPHORE_WORKFLOW_ID) - # Ask for a resource... info = workflow.info() - await sem_handle.signal("acquire_resource", AcquireRequest(info.workflow_id, info.run_id)) - - # ...and wait for the answer + if input.already_owned_resource is None: + await sem_handle.signal("acquire_resource", AcquireRequest(info.workflow_id, info.run_id)) + else: + # If we continued as new, we already have a resource. We need to transfer ownership from our predecessor to + # ourselves. + await sem_handle.signal("handoff_resource", HandoffRequest(input.already_owned_resource, info.workflow_id, info.continued_run_id, info.run_id)) + + # Both branches above should cause us to receive an "assign_resource" signal. await workflow.wait_condition(lambda: self.assigned_resource is not None, timeout=MAX_RESOURCE_WAIT_TIME) if self.assigned_resource is None: raise FailWorkflowException(f"No resource was assigned after {MAX_RESOURCE_WAIT_TIME}") - # From this point forward, we own the resource. Note that this is a lock, not a lease! Our finally block needs - # to run to free up the resource if an activity fails. This is why we asserted the lack of workflow-level - # timeouts above - they would prevent the finally block from running if there was a timeout. + # From this point forward, we own the resource. Note that this is a lock, not a lease! Our finally block will + # free up the resource if an activity fails. This is why we asserted the lack of workflow-level timeouts + # above - the finally block wouldn't run if there was a timeout. try: for iteration in ["first", "second", "third"]: await workflow.execute_activity( @@ -76,5 +91,17 @@ async def run(self, input: LoadWorkflowInput): if iteration == input.iteration_to_fail_after: workflow.logger.info(f"Failing after iteration {input.iteration_to_fail_after}") raise FailWorkflowException() + + if input.should_continue_as_new: + next_input = LoadWorkflowInput( + iteration_to_fail_after=input.iteration_to_fail_after, + should_continue_as_new=False, + already_owned_resource=self.assigned_resource, + ) + workflow.continue_as_new(next_input) finally: - await sem_handle.signal("release_resource", ReleaseRequest(self.assigned_resource, info.workflow_id, info.run_id)) \ No newline at end of file + # Only release the resource if we didn't continue-as-new. workflow.continue_as_new raises to halt workflow + # execution, but the code in this finally block will still run. It wouldn't successfully send the signal... + # the if statement just avoids some warnings in the log. + if not input.should_continue_as_new: + await sem_handle.signal("release_resource", ReleaseRequest(self.assigned_resource, info.workflow_id, info.run_id)) \ No newline at end of file diff --git a/resource_locking/sem_workflow.py b/resource_locking/sem_workflow.py index 56cf8c69..937b899d 100644 --- a/resource_locking/sem_workflow.py +++ b/resource_locking/sem_workflow.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from datetime import datetime, timedelta +from datetime import timedelta from temporalio import workflow @@ -31,7 +31,8 @@ class HandoffRequest: @dataclass class SemaphoreWorkflowInput: # Key is resource, value is users of the resource. The first item in each list is the current holder of the lease - # on that resource. + # on that resource. A similar data structure could allow for multiple holders (perhaps the first n items are the + # current holders). resource_queues: dict[str, list[AcquireRequest]] @workflow.defn( @@ -43,16 +44,21 @@ def __init__(self): @workflow.signal async def acquire_resource(self, request: AcquireRequest): + # A real-world version of this workflow probably wants to use more sophisticated load balancing strategies than + # "first free" and "wait for a random one". + for resource in self.resource_queues: # Naively give out the first free resource, if we have one if len(self.resource_queues[resource]) == 0: + workflow.logger.info(f"workflow_id={request.workflow_id} run_id={request.run_id} acquired resource {resource}") self.resource_queues[resource].append(request) requester = workflow.get_external_workflow_handle(request.workflow_id, run_id=request.run_id) await requester.signal("assign_resource", AssignedResource(resource)) return - # Otherwise put this resource in a random queue + # Otherwise put this resource in a random queue. resource = workflow.random().choice(list(self.resource_queues.keys())) + workflow.logger.info(f"workflow_id={request.workflow_id} run_id={request.run_id} is waiting for resource {resource}") self.resource_queues[resource].append(request) @workflow.signal @@ -74,12 +80,14 @@ async def release_resource(self, request: ReleaseRequest): return # Remove the current holder from the head of the queue + workflow.logger.info(f"workflow_id={request.workflow_id} run_id={request.run_id} released resource {request.resource}") queue = queue[1:] self.resource_queues[request.resource] = queue # If there are queued requests, assign the resource to the next one if len(queue) > 0: next_holder = queue[0] + workflow.logger.info(f"workflow_id={next_holder.workflow_id} run_id={next_holder.run_id} acquired resource {request.resource} after waiting") requester = workflow.get_external_workflow_handle(next_holder.workflow_id, run_id=next_holder.run_id) await requester.signal("assign_resource", AssignedResource(request.resource)) @@ -101,7 +109,10 @@ async def handoff_resource(self, request: HandoffRequest): workflow.logger.warning(f"request was from wf_id={request.workflow_id} run_id={request.old_run_id}") return + workflow.logger.info(f"workflow_id={request.workflow_id} handed off resource {request.resource} from run_id={request.old_run_id} to run_id={request.new_run_id}") queue[0] = AcquireRequest(request.workflow_id, request.new_run_id) + requester = workflow.get_external_workflow_handle(request.workflow_id, run_id=request.new_run_id) + await requester.signal("assign_resource", AssignedResource(request.resource)) @workflow.query def get_current_holders(self) -> dict[str, AcquireRequest]: diff --git a/resource_locking/starter.py b/resource_locking/starter.py index 2e7004f5..36856a02 100644 --- a/resource_locking/starter.py +++ b/resource_locking/starter.py @@ -1,9 +1,9 @@ import asyncio -from typing import Optional, Any +from typing import Any from temporalio.client import Client, WorkflowHandle, WorkflowFailureError -from resource_locking.load_workflow import LoadWorkflow, LoadWorkflowInput, FailWorkflowException +from resource_locking.load_workflow import LoadWorkflow, LoadWorkflowInput from resource_locking.sem_workflow import SemaphoreWorkflow, SemaphoreWorkflowInput, SEMAPHORE_WORKFLOW_ID @@ -24,9 +24,11 @@ async def main(): load_handles: list[WorkflowHandle[Any, Any]] = [] for i in range(0, 4): - input = LoadWorkflowInput(iteration_to_fail_after=None) + input = LoadWorkflowInput(iteration_to_fail_after=None, should_continue_as_new=False, already_owned_resource=None) + if i == 0: + input.should_continue_as_new = True if i == 1: - input = LoadWorkflowInput(iteration_to_fail_after="first") + input.iteration_to_fail_after = "first" load_handle = await client.start_workflow( workflow=LoadWorkflow.run, From 7fa82b1f524da47284e784b7d16cb9d1716c1426 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 1 Apr 2025 16:17:11 -0700 Subject: [PATCH 04/23] Rename the workflows for clarity, improve README --- resource_locking/README.md | 36 ++++++++++---- ...m_workflow.py => lock_manager_workflow.py} | 15 +++--- ...rkflow.py => resource_locking_workflow.py} | 48 ++++++++++--------- resource_locking/starter.py | 35 +++++++------- resource_locking/worker.py | 8 ++-- 5 files changed, 79 insertions(+), 63 deletions(-) rename resource_locking/{sem_workflow.py => lock_manager_workflow.py} (94%) rename resource_locking/{load_workflow.py => resource_locking_workflow.py} (70%) diff --git a/resource_locking/README.md b/resource_locking/README.md index d8bed74e..6808ff00 100644 --- a/resource_locking/README.md +++ b/resource_locking/README.md @@ -1,7 +1,8 @@ -# Semaphore Sample +# Resource Locking Sample -This sample shows how to use a long-lived `semaphore_workflow` to ensure that each `resource` is used by at most one -`load_workflow` at a time. `load_workflow` runs several activities while it has ownership of a resource. +This sample shows how to use a long-lived `LockManagerWorkflow` to ensure that each `resource` is used by at most one +`ResourceLockingWorkflow` at a time. `ResourceLockingWorkflow` runs several activities while it has ownership of a +resource. Run the following from this directory to start the worker: @@ -11,21 +12,36 @@ This will start the worker. Then, in another terminal, run the following to exec poetry run python starter.py -You should see output indicating that the semaphore workflow serialized access to each resource. +You should see output indicating that the LockManagerWorkflow serialized access to each resource. You can query the set of current lock holders with: - tctl wf query -w semaphore --qt get_current_holders + tctl wf query -w lock_manager --qt get_current_holders + +# Other approaches + +There are simpler ways to manage concurrent access to resources. Consider using resource-specific workers/task queues, +and limiting the number of activity slots on the workers. The golang SDK also [sessions](https://docs.temporal.io/develop/go/sessions) +that allow workflows to pin themselves to workers. + +The technique in this sample is capable of more complex resource locking than the options above, but it doesn't scale +as well. Specifically, it can: +- Manage access to a set of resources that is decoupled from the set of workers and task queues +- Run arbitrary code to place workloads on resources as they become available # Caveats This sample uses true locking (not leasing!) to avoid complexity and scaling concerns associated with heartbeating via -signals. Locking carries the risk of a "leak" (failure to unlock) permanently removing a resource from the pool. With -Temporal's durabile execution guarantees, this can only happen if: +signals. Locking carries a risk where failure to unlock permanently removing a resource from the pool. However, with +Temporal's durable execution guarantees, this can only happen if: - A LoadWorkflow times out (prohibited in the sample code) -- You shut down your workers, and never restart them (unhandled, but probably irrelevant) +- You shut down your workers and never restart them (unhandled, but irrelevant) + +If a leak were to happen, you could discover the identity of the leaker using the query above, then: -If a leak were to happen in the wild, you could discover the identity of the leaker using the query above, then: + tctl wf signal -w lock_manager --name release_resource --input '{ "resource": "the resource", "workflow_id": "holder workflow id", "run_id": "holder run id" }' - tctl wf signal -w semaphore --name release_resource --input '{ "resource": "the resource", "workflow_id": "holder workflow id", "run_id": "holder run id" }' \ No newline at end of file +Performance: A single LockManagerWorkflow scales to tens, but not hundreds, of lock/unlock events per second. It is +best suited for locking resources during long-running workflows. Actual performance will depend on your temporal +server's persistence layer. \ No newline at end of file diff --git a/resource_locking/sem_workflow.py b/resource_locking/lock_manager_workflow.py similarity index 94% rename from resource_locking/sem_workflow.py rename to resource_locking/lock_manager_workflow.py index 937b899d..119269ad 100644 --- a/resource_locking/sem_workflow.py +++ b/resource_locking/lock_manager_workflow.py @@ -25,20 +25,17 @@ class HandoffRequest: old_run_id: str new_run_id: str -SEMAPHORE_WORKFLOW_TYPE = "semaphore-wf" -SEMAPHORE_WORKFLOW_ID = "semaphore" +LOCK_MANAGER_WORKFLOW_ID = "lock_manager" @dataclass -class SemaphoreWorkflowInput: +class LockManagerWorkflowInput: # Key is resource, value is users of the resource. The first item in each list is the current holder of the lease # on that resource. A similar data structure could allow for multiple holders (perhaps the first n items are the # current holders). resource_queues: dict[str, list[AcquireRequest]] -@workflow.defn( - name=SEMAPHORE_WORKFLOW_TYPE, -) -class SemaphoreWorkflow: +@workflow.defn +class LockManagerWorkflow: def __init__(self): self.resource_queues: dict[str, list[AcquireRequest]] = {} @@ -119,7 +116,7 @@ def get_current_holders(self) -> dict[str, AcquireRequest]: return { k: v[0] if v else None for k, v in self.resource_queues.items() } @workflow.run - async def run(self, input: SemaphoreWorkflowInput) -> None: + async def run(self, input: LockManagerWorkflowInput) -> None: self.resource_queues = input.resource_queues # Continue as new either when temporal tells us to, or every 12 hours (so it occurs semi-frequently) @@ -128,4 +125,4 @@ async def run(self, input: SemaphoreWorkflowInput) -> None: timeout=timedelta(hours=12), ) - workflow.continue_as_new(SemaphoreWorkflowInput(self.resource_queues)) \ No newline at end of file + workflow.continue_as_new(LockManagerWorkflowInput(self.resource_queues)) \ No newline at end of file diff --git a/resource_locking/load_workflow.py b/resource_locking/resource_locking_workflow.py similarity index 70% rename from resource_locking/load_workflow.py rename to resource_locking/resource_locking_workflow.py index 00f3d707..fd577db9 100644 --- a/resource_locking/load_workflow.py +++ b/resource_locking/resource_locking_workflow.py @@ -5,24 +5,23 @@ from temporalio import activity, workflow -from resource_locking.sem_workflow import AssignedResource, SEMAPHORE_WORKFLOW_ID, \ +from resource_locking.lock_manager_workflow import AssignedResource, LOCK_MANAGER_WORKFLOW_ID, \ ReleaseRequest, AcquireRequest, HandoffRequest - @dataclass -class LoadActivityInput: +class UseResourceActivityInput: resource: str iteration: str @activity.defn -async def load(input: LoadActivityInput) -> None: - workflow_id = activity.info().workflow_id - print(f"Workflow {workflow_id} starts using {input.resource} the {input.iteration} time") - await asyncio.sleep(5) - print(f"Workflow {workflow_id} finishes using {input.resource} the {input.iteration} time") +async def use_resource(input: UseResourceActivityInput) -> None: + info = activity.info() + activity.logger.info(f"{info.workflow_id} starts using {input.resource} the {input.iteration} time") + await asyncio.sleep(3) + activity.logger.info(f"{info.workflow_id} done using {input.resource} the {input.iteration} time") @dataclass -class LoadWorkflowInput: +class ResourceLockingWorkflowInput: # If set, this workflow will fail after the "first", "second", or "third" activity. iteration_to_fail_after: Optional[str] @@ -36,15 +35,13 @@ class LoadWorkflowInput: class FailWorkflowException(Exception): pass +# Wait this long for a resource before giving up MAX_RESOURCE_WAIT_TIME = timedelta(minutes=5) -def has_timeout(timeout: Optional[timedelta]) -> bool: - return timeout is not None and timeout > timedelta(0) - @workflow.defn( failure_exception_types=[FailWorkflowException] ) -class LoadWorkflow: +class ResourceLockingWorkflow: def __init__(self): self.assigned_resource: Optional[str] = None @@ -54,15 +51,15 @@ def handle_assign_resource(self, input: AssignedResource): self.assigned_resource = input.resource @workflow.run - async def run(self, input: LoadWorkflowInput): + async def run(self, input: ResourceLockingWorkflowInput): workflow.info() if has_timeout(workflow.info().run_timeout): # See "locking" comment below for rationale - raise FailWorkflowException(f"LoadWorkflow cannot have a run_timeout (found {workflow.info().run_timeout})") + raise FailWorkflowException(f"ResourceLockingWorkflow cannot have a run_timeout (found {workflow.info().run_timeout})") if has_timeout(workflow.info().execution_timeout): - raise FailWorkflowException(f"LoadWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout})") + raise FailWorkflowException(f"ResourceLockingWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout})") - sem_handle = workflow.get_external_workflow_handle(SEMAPHORE_WORKFLOW_ID) + sem_handle = workflow.get_external_workflow_handle(LOCK_MANAGER_WORKFLOW_ID) info = workflow.info() if input.already_owned_resource is None: @@ -78,13 +75,13 @@ async def run(self, input: LoadWorkflowInput): raise FailWorkflowException(f"No resource was assigned after {MAX_RESOURCE_WAIT_TIME}") # From this point forward, we own the resource. Note that this is a lock, not a lease! Our finally block will - # free up the resource if an activity fails. This is why we asserted the lack of workflow-level timeouts + # release the resource if an activity fails. This is why we asserted the lack of workflow-level timeouts # above - the finally block wouldn't run if there was a timeout. try: for iteration in ["first", "second", "third"]: await workflow.execute_activity( - load, - LoadActivityInput(self.assigned_resource, iteration), + use_resource, + UseResourceActivityInput(self.assigned_resource, iteration), start_to_close_timeout=timedelta(seconds=10), ) @@ -93,7 +90,7 @@ async def run(self, input: LoadWorkflowInput): raise FailWorkflowException() if input.should_continue_as_new: - next_input = LoadWorkflowInput( + next_input = ResourceLockingWorkflowInput( iteration_to_fail_after=input.iteration_to_fail_after, should_continue_as_new=False, already_owned_resource=self.assigned_resource, @@ -101,7 +98,12 @@ async def run(self, input: LoadWorkflowInput): workflow.continue_as_new(next_input) finally: # Only release the resource if we didn't continue-as-new. workflow.continue_as_new raises to halt workflow - # execution, but the code in this finally block will still run. It wouldn't successfully send the signal... + # execution, but this code in this finally block will still run. It wouldn't successfully send the signal... # the if statement just avoids some warnings in the log. if not input.should_continue_as_new: - await sem_handle.signal("release_resource", ReleaseRequest(self.assigned_resource, info.workflow_id, info.run_id)) \ No newline at end of file + await sem_handle.signal("release_resource", ReleaseRequest(self.assigned_resource, info.workflow_id, info.run_id)) + +def has_timeout(timeout: Optional[timedelta]) -> bool: + # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the + # continue_as_new call). + return timeout is not None and timeout > timedelta(0) \ No newline at end of file diff --git a/resource_locking/starter.py b/resource_locking/starter.py index 36856a02..19fdb962 100644 --- a/resource_locking/starter.py +++ b/resource_locking/starter.py @@ -3,49 +3,50 @@ from temporalio.client import Client, WorkflowHandle, WorkflowFailureError -from resource_locking.load_workflow import LoadWorkflow, LoadWorkflowInput -from resource_locking.sem_workflow import SemaphoreWorkflow, SemaphoreWorkflowInput, SEMAPHORE_WORKFLOW_ID +from resource_locking.resource_locking_workflow import ResourceLockingWorkflow, ResourceLockingWorkflowInput +from resource_locking.lock_manager_workflow import LockManagerWorkflow, LockManagerWorkflowInput, LOCK_MANAGER_WORKFLOW_ID async def main(): # Connect client client = await Client.connect("localhost:7233") - # Run the semaphore workflow - sem_handle = await client.start_workflow( - workflow=SemaphoreWorkflow.run, - arg=SemaphoreWorkflowInput({ + # Start the LockManagerWorkflow + lock_manager_handle = await client.start_workflow( + workflow=LockManagerWorkflow.run, + arg=LockManagerWorkflowInput({ "resource_a": [], "resource_b": [], }), - id=SEMAPHORE_WORKFLOW_ID, + id=LOCK_MANAGER_WORKFLOW_ID, task_queue="default", ) - load_handles: list[WorkflowHandle[Any, Any]] = [] + # Start the ResourceLockingWorkflows + resource_locking_handles: list[WorkflowHandle[Any, Any]] = [] for i in range(0, 4): - input = LoadWorkflowInput(iteration_to_fail_after=None, should_continue_as_new=False, already_owned_resource=None) + input = ResourceLockingWorkflowInput(iteration_to_fail_after=None, should_continue_as_new=False, already_owned_resource=None) if i == 0: input.should_continue_as_new = True if i == 1: input.iteration_to_fail_after = "first" - load_handle = await client.start_workflow( - workflow=LoadWorkflow.run, + resource_locking_handle = await client.start_workflow( + workflow=ResourceLockingWorkflow.run, arg=input, - id=f"load-workflow-{i}", + id=f"resource-locking-workflow-{i}", task_queue="default", ) - load_handles.append(load_handle) + resource_locking_handles.append(resource_locking_handle) - for load_handle in load_handles: + for resource_locking_handle in resource_locking_handles: try: - await load_handle.result() + await resource_locking_handle.result() except WorkflowFailureError: pass - await sem_handle.terminate() - + # Clean up after ourselves. In the real world, the lock manager workflow would run forever. + await lock_manager_handle.terminate() if __name__ == "__main__": asyncio.run(main()) diff --git a/resource_locking/worker.py b/resource_locking/worker.py index 8e72b9a7..36c0fae1 100644 --- a/resource_locking/worker.py +++ b/resource_locking/worker.py @@ -4,8 +4,8 @@ from temporalio.client import Client from temporalio.worker import Worker -from resource_locking.load_workflow import LoadWorkflow, load -from resource_locking.sem_workflow import SemaphoreWorkflow +from resource_locking.resource_locking_workflow import ResourceLockingWorkflow, use_resource +from resource_locking.lock_manager_workflow import LockManagerWorkflow async def main(): # Uncomment the line below to see logging @@ -18,8 +18,8 @@ async def main(): worker = Worker( client, task_queue="default", - workflows=[SemaphoreWorkflow, LoadWorkflow], - activities=[load], + workflows=[LockManagerWorkflow, ResourceLockingWorkflow], + activities=[use_resource], ) await worker.run() From 583be402ba83f085d5d28e73c0e83727419a50f0 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 1 Apr 2025 16:50:51 -0700 Subject: [PATCH 05/23] Remove spurious logging comment --- resource_locking/worker.py | 1 - 1 file changed, 1 deletion(-) diff --git a/resource_locking/worker.py b/resource_locking/worker.py index 36c0fae1..9ad09f1d 100644 --- a/resource_locking/worker.py +++ b/resource_locking/worker.py @@ -8,7 +8,6 @@ from resource_locking.lock_manager_workflow import LockManagerWorkflow async def main(): - # Uncomment the line below to see logging logging.basicConfig(level=logging.INFO) # Start client From be1d760114470305c74b4353ba59a2d367fc8700 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 1 Apr 2025 16:56:56 -0700 Subject: [PATCH 06/23] poe format --- resource_locking/lock_manager_workflow.py | 97 ++++++++++++++----- resource_locking/resource_locking_workflow.py | 72 ++++++++++---- resource_locking/starter.py | 30 ++++-- resource_locking/worker.py | 7 +- 4 files changed, 157 insertions(+), 49 deletions(-) diff --git a/resource_locking/lock_manager_workflow.py b/resource_locking/lock_manager_workflow.py index 119269ad..5abd04a0 100644 --- a/resource_locking/lock_manager_workflow.py +++ b/resource_locking/lock_manager_workflow.py @@ -3,21 +3,25 @@ from temporalio import workflow + @dataclass class AssignedResource: resource: str + @dataclass class AcquireRequest: workflow_id: str run_id: str + @dataclass class ReleaseRequest: resource: str workflow_id: str run_id: str + @dataclass class HandoffRequest: resource: str @@ -25,8 +29,10 @@ class HandoffRequest: old_run_id: str new_run_id: str + LOCK_MANAGER_WORKFLOW_ID = "lock_manager" + @dataclass class LockManagerWorkflowInput: # Key is resource, value is users of the resource. The first item in each list is the current holder of the lease @@ -34,6 +40,7 @@ class LockManagerWorkflowInput: # current holders). resource_queues: dict[str, list[AcquireRequest]] + @workflow.defn class LockManagerWorkflow: def __init__(self): @@ -47,73 +54,117 @@ async def acquire_resource(self, request: AcquireRequest): for resource in self.resource_queues: # Naively give out the first free resource, if we have one if len(self.resource_queues[resource]) == 0: - workflow.logger.info(f"workflow_id={request.workflow_id} run_id={request.run_id} acquired resource {resource}") + workflow.logger.info( + f"workflow_id={request.workflow_id} run_id={request.run_id} acquired resource {resource}" + ) self.resource_queues[resource].append(request) - requester = workflow.get_external_workflow_handle(request.workflow_id, run_id=request.run_id) + requester = workflow.get_external_workflow_handle( + request.workflow_id, run_id=request.run_id + ) await requester.signal("assign_resource", AssignedResource(resource)) return # Otherwise put this resource in a random queue. resource = workflow.random().choice(list(self.resource_queues.keys())) - workflow.logger.info(f"workflow_id={request.workflow_id} run_id={request.run_id} is waiting for resource {resource}") + workflow.logger.info( + f"workflow_id={request.workflow_id} run_id={request.run_id} is waiting for resource {resource}" + ) self.resource_queues[resource].append(request) @workflow.signal async def release_resource(self, request: ReleaseRequest): queue = self.resource_queues[request.resource] if queue is None: - workflow.logger.warning(f"Ignoring request from {request.workflow_id} to release non-existent resource: {request.resource}") + workflow.logger.warning( + f"Ignoring request from {request.workflow_id} to release non-existent resource: {request.resource}" + ) return if len(queue) == 0: - workflow.logger.warning(f"Ignoring request from {request.workflow_id} to release resource that is not held: {request.resource}") + workflow.logger.warning( + f"Ignoring request from {request.workflow_id} to release resource that is not held: {request.resource}" + ) return holder = queue[0] - if not (holder.workflow_id == request.workflow_id and holder.run_id == request.run_id): - workflow.logger.warning(f"Ignoring request from non-holder to release resource {request.resource}") - workflow.logger.warning(f"resource is currently held by wf_id={holder.workflow_id} run_id={holder.run_id}") - workflow.logger.warning(f"request was from wf_id={request.workflow_id} run_id={request.run_id}") + if not ( + holder.workflow_id == request.workflow_id + and holder.run_id == request.run_id + ): + workflow.logger.warning( + f"Ignoring request from non-holder to release resource {request.resource}" + ) + workflow.logger.warning( + f"resource is currently held by wf_id={holder.workflow_id} run_id={holder.run_id}" + ) + workflow.logger.warning( + f"request was from wf_id={request.workflow_id} run_id={request.run_id}" + ) return # Remove the current holder from the head of the queue - workflow.logger.info(f"workflow_id={request.workflow_id} run_id={request.run_id} released resource {request.resource}") + workflow.logger.info( + f"workflow_id={request.workflow_id} run_id={request.run_id} released resource {request.resource}" + ) queue = queue[1:] self.resource_queues[request.resource] = queue # If there are queued requests, assign the resource to the next one if len(queue) > 0: next_holder = queue[0] - workflow.logger.info(f"workflow_id={next_holder.workflow_id} run_id={next_holder.run_id} acquired resource {request.resource} after waiting") - requester = workflow.get_external_workflow_handle(next_holder.workflow_id, run_id=next_holder.run_id) - await requester.signal("assign_resource", AssignedResource(request.resource)) + workflow.logger.info( + f"workflow_id={next_holder.workflow_id} run_id={next_holder.run_id} acquired resource {request.resource} after waiting" + ) + requester = workflow.get_external_workflow_handle( + next_holder.workflow_id, run_id=next_holder.run_id + ) + await requester.signal( + "assign_resource", AssignedResource(request.resource) + ) @workflow.signal async def handoff_resource(self, request: HandoffRequest): queue = self.resource_queues[request.resource] if queue is None: - workflow.logger.warning(f"Ignoring request from {request.workflow_id} to hand off non-existent resource: {request.resource}") + workflow.logger.warning( + f"Ignoring request from {request.workflow_id} to hand off non-existent resource: {request.resource}" + ) return if len(queue) == 0: - workflow.logger.warning(f"Ignoring request from {request.workflow_id} to hand off resource that is not held: {request.resource}") + workflow.logger.warning( + f"Ignoring request from {request.workflow_id} to hand off resource that is not held: {request.resource}" + ) return holder = queue[0] - if not (holder.workflow_id == request.workflow_id and holder.run_id == request.old_run_id): - workflow.logger.warning(f"Ignoring request from non-holder to hand off resource {request.resource}") - workflow.logger.warning(f"resource is currently held by wf_id={holder.workflow_id} run_id={holder.run_id}") - workflow.logger.warning(f"request was from wf_id={request.workflow_id} run_id={request.old_run_id}") + if not ( + holder.workflow_id == request.workflow_id + and holder.run_id == request.old_run_id + ): + workflow.logger.warning( + f"Ignoring request from non-holder to hand off resource {request.resource}" + ) + workflow.logger.warning( + f"resource is currently held by wf_id={holder.workflow_id} run_id={holder.run_id}" + ) + workflow.logger.warning( + f"request was from wf_id={request.workflow_id} run_id={request.old_run_id}" + ) return - workflow.logger.info(f"workflow_id={request.workflow_id} handed off resource {request.resource} from run_id={request.old_run_id} to run_id={request.new_run_id}") + workflow.logger.info( + f"workflow_id={request.workflow_id} handed off resource {request.resource} from run_id={request.old_run_id} to run_id={request.new_run_id}" + ) queue[0] = AcquireRequest(request.workflow_id, request.new_run_id) - requester = workflow.get_external_workflow_handle(request.workflow_id, run_id=request.new_run_id) + requester = workflow.get_external_workflow_handle( + request.workflow_id, run_id=request.new_run_id + ) await requester.signal("assign_resource", AssignedResource(request.resource)) @workflow.query def get_current_holders(self) -> dict[str, AcquireRequest]: - return { k: v[0] if v else None for k, v in self.resource_queues.items() } + return {k: v[0] if v else None for k, v in self.resource_queues.items()} @workflow.run async def run(self, input: LockManagerWorkflowInput) -> None: @@ -125,4 +176,4 @@ async def run(self, input: LockManagerWorkflowInput) -> None: timeout=timedelta(hours=12), ) - workflow.continue_as_new(LockManagerWorkflowInput(self.resource_queues)) \ No newline at end of file + workflow.continue_as_new(LockManagerWorkflowInput(self.resource_queues)) diff --git a/resource_locking/resource_locking_workflow.py b/resource_locking/resource_locking_workflow.py index fd577db9..0ee4f12f 100644 --- a/resource_locking/resource_locking_workflow.py +++ b/resource_locking/resource_locking_workflow.py @@ -5,20 +5,32 @@ from temporalio import activity, workflow -from resource_locking.lock_manager_workflow import AssignedResource, LOCK_MANAGER_WORKFLOW_ID, \ - ReleaseRequest, AcquireRequest, HandoffRequest +from resource_locking.lock_manager_workflow import ( + LOCK_MANAGER_WORKFLOW_ID, + AcquireRequest, + AssignedResource, + HandoffRequest, + ReleaseRequest, +) + @dataclass class UseResourceActivityInput: resource: str iteration: str + @activity.defn async def use_resource(input: UseResourceActivityInput) -> None: info = activity.info() - activity.logger.info(f"{info.workflow_id} starts using {input.resource} the {input.iteration} time") + activity.logger.info( + f"{info.workflow_id} starts using {input.resource} the {input.iteration} time" + ) await asyncio.sleep(3) - activity.logger.info(f"{info.workflow_id} done using {input.resource} the {input.iteration} time") + activity.logger.info( + f"{info.workflow_id} done using {input.resource} the {input.iteration} time" + ) + @dataclass class ResourceLockingWorkflowInput: @@ -32,17 +44,17 @@ class ResourceLockingWorkflowInput: # Used to transfer resource ownership between iterations during continue_as_new already_owned_resource: Optional[str] + class FailWorkflowException(Exception): pass + # Wait this long for a resource before giving up MAX_RESOURCE_WAIT_TIME = timedelta(minutes=5) -@workflow.defn( - failure_exception_types=[FailWorkflowException] -) -class ResourceLockingWorkflow: +@workflow.defn(failure_exception_types=[FailWorkflowException]) +class ResourceLockingWorkflow: def __init__(self): self.assigned_resource: Optional[str] = None @@ -55,24 +67,42 @@ async def run(self, input: ResourceLockingWorkflowInput): workflow.info() if has_timeout(workflow.info().run_timeout): # See "locking" comment below for rationale - raise FailWorkflowException(f"ResourceLockingWorkflow cannot have a run_timeout (found {workflow.info().run_timeout})") + raise FailWorkflowException( + f"ResourceLockingWorkflow cannot have a run_timeout (found {workflow.info().run_timeout})" + ) if has_timeout(workflow.info().execution_timeout): - raise FailWorkflowException(f"ResourceLockingWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout})") + raise FailWorkflowException( + f"ResourceLockingWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout})" + ) sem_handle = workflow.get_external_workflow_handle(LOCK_MANAGER_WORKFLOW_ID) info = workflow.info() if input.already_owned_resource is None: - await sem_handle.signal("acquire_resource", AcquireRequest(info.workflow_id, info.run_id)) + await sem_handle.signal( + "acquire_resource", AcquireRequest(info.workflow_id, info.run_id) + ) else: # If we continued as new, we already have a resource. We need to transfer ownership from our predecessor to # ourselves. - await sem_handle.signal("handoff_resource", HandoffRequest(input.already_owned_resource, info.workflow_id, info.continued_run_id, info.run_id)) + await sem_handle.signal( + "handoff_resource", + HandoffRequest( + input.already_owned_resource, + info.workflow_id, + info.continued_run_id, + info.run_id, + ), + ) # Both branches above should cause us to receive an "assign_resource" signal. - await workflow.wait_condition(lambda: self.assigned_resource is not None, timeout=MAX_RESOURCE_WAIT_TIME) + await workflow.wait_condition( + lambda: self.assigned_resource is not None, timeout=MAX_RESOURCE_WAIT_TIME + ) if self.assigned_resource is None: - raise FailWorkflowException(f"No resource was assigned after {MAX_RESOURCE_WAIT_TIME}") + raise FailWorkflowException( + f"No resource was assigned after {MAX_RESOURCE_WAIT_TIME}" + ) # From this point forward, we own the resource. Note that this is a lock, not a lease! Our finally block will # release the resource if an activity fails. This is why we asserted the lack of workflow-level timeouts @@ -86,7 +116,9 @@ async def run(self, input: ResourceLockingWorkflowInput): ) if iteration == input.iteration_to_fail_after: - workflow.logger.info(f"Failing after iteration {input.iteration_to_fail_after}") + workflow.logger.info( + f"Failing after iteration {input.iteration_to_fail_after}" + ) raise FailWorkflowException() if input.should_continue_as_new: @@ -101,9 +133,15 @@ async def run(self, input: ResourceLockingWorkflowInput): # execution, but this code in this finally block will still run. It wouldn't successfully send the signal... # the if statement just avoids some warnings in the log. if not input.should_continue_as_new: - await sem_handle.signal("release_resource", ReleaseRequest(self.assigned_resource, info.workflow_id, info.run_id)) + await sem_handle.signal( + "release_resource", + ReleaseRequest( + self.assigned_resource, info.workflow_id, info.run_id + ), + ) + def has_timeout(timeout: Optional[timedelta]) -> bool: # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the # continue_as_new call). - return timeout is not None and timeout > timedelta(0) \ No newline at end of file + return timeout is not None and timeout > timedelta(0) diff --git a/resource_locking/starter.py b/resource_locking/starter.py index 19fdb962..f2989a4f 100644 --- a/resource_locking/starter.py +++ b/resource_locking/starter.py @@ -1,10 +1,17 @@ import asyncio from typing import Any -from temporalio.client import Client, WorkflowHandle, WorkflowFailureError +from temporalio.client import Client, WorkflowFailureError, WorkflowHandle -from resource_locking.resource_locking_workflow import ResourceLockingWorkflow, ResourceLockingWorkflowInput -from resource_locking.lock_manager_workflow import LockManagerWorkflow, LockManagerWorkflowInput, LOCK_MANAGER_WORKFLOW_ID +from resource_locking.lock_manager_workflow import ( + LOCK_MANAGER_WORKFLOW_ID, + LockManagerWorkflow, + LockManagerWorkflowInput, +) +from resource_locking.resource_locking_workflow import ( + ResourceLockingWorkflow, + ResourceLockingWorkflowInput, +) async def main(): @@ -14,10 +21,12 @@ async def main(): # Start the LockManagerWorkflow lock_manager_handle = await client.start_workflow( workflow=LockManagerWorkflow.run, - arg=LockManagerWorkflowInput({ - "resource_a": [], - "resource_b": [], - }), + arg=LockManagerWorkflowInput( + { + "resource_a": [], + "resource_b": [], + } + ), id=LOCK_MANAGER_WORKFLOW_ID, task_queue="default", ) @@ -25,7 +34,11 @@ async def main(): # Start the ResourceLockingWorkflows resource_locking_handles: list[WorkflowHandle[Any, Any]] = [] for i in range(0, 4): - input = ResourceLockingWorkflowInput(iteration_to_fail_after=None, should_continue_as_new=False, already_owned_resource=None) + input = ResourceLockingWorkflowInput( + iteration_to_fail_after=None, + should_continue_as_new=False, + already_owned_resource=None, + ) if i == 0: input.should_continue_as_new = True if i == 1: @@ -48,5 +61,6 @@ async def main(): # Clean up after ourselves. In the real world, the lock manager workflow would run forever. await lock_manager_handle.terminate() + if __name__ == "__main__": asyncio.run(main()) diff --git a/resource_locking/worker.py b/resource_locking/worker.py index 9ad09f1d..febc5de0 100644 --- a/resource_locking/worker.py +++ b/resource_locking/worker.py @@ -4,8 +4,12 @@ from temporalio.client import Client from temporalio.worker import Worker -from resource_locking.resource_locking_workflow import ResourceLockingWorkflow, use_resource from resource_locking.lock_manager_workflow import LockManagerWorkflow +from resource_locking.resource_locking_workflow import ( + ResourceLockingWorkflow, + use_resource, +) + async def main(): logging.basicConfig(level=logging.INFO) @@ -23,5 +27,6 @@ async def main(): await worker.run() + if __name__ == "__main__": asyncio.run(main()) From ca184ed3a2a504085ea8d01dcaa1ae225eddf46b Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 1 Apr 2025 17:01:06 -0700 Subject: [PATCH 07/23] poe lint --- resource_locking/lock_manager_workflow.py | 3 ++- resource_locking/resource_locking_workflow.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/resource_locking/lock_manager_workflow.py b/resource_locking/lock_manager_workflow.py index 5abd04a0..10efc9a6 100644 --- a/resource_locking/lock_manager_workflow.py +++ b/resource_locking/lock_manager_workflow.py @@ -1,5 +1,6 @@ from dataclasses import dataclass from datetime import timedelta +from typing import Optional from temporalio import workflow @@ -163,7 +164,7 @@ async def handoff_resource(self, request: HandoffRequest): await requester.signal("assign_resource", AssignedResource(request.resource)) @workflow.query - def get_current_holders(self) -> dict[str, AcquireRequest]: + def get_current_holders(self) -> dict[str, Optional[AcquireRequest]]: return {k: v[0] if v else None for k, v in self.resource_queues.items()} @workflow.run diff --git a/resource_locking/resource_locking_workflow.py b/resource_locking/resource_locking_workflow.py index 0ee4f12f..2da00da2 100644 --- a/resource_locking/resource_locking_workflow.py +++ b/resource_locking/resource_locking_workflow.py @@ -82,7 +82,7 @@ async def run(self, input: ResourceLockingWorkflowInput): await sem_handle.signal( "acquire_resource", AcquireRequest(info.workflow_id, info.run_id) ) - else: + elif info.continued_run_id: # If we continued as new, we already have a resource. We need to transfer ownership from our predecessor to # ourselves. await sem_handle.signal( @@ -94,6 +94,10 @@ async def run(self, input: ResourceLockingWorkflowInput): info.run_id, ), ) + else: + raise FailWorkflowException( + f"Only set 'already_owned_resource' when using continue_as_new" + ) # Both branches above should cause us to receive an "assign_resource" signal. await workflow.wait_condition( From 0e0bcd218b95508aca46770630f1afecee048467 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Thu, 10 Apr 2025 14:54:25 -0700 Subject: [PATCH 08/23] Borrow the random signal name trick from other samples --- resource_locking/lock_manager_workflow.py | 167 ++++++------------ resource_locking/resource_locking_workflow.py | 50 ++---- resource_locking/shared.py | 12 ++ resource_locking/starter.py | 4 +- 4 files changed, 77 insertions(+), 156 deletions(-) create mode 100644 resource_locking/shared.py diff --git a/resource_locking/lock_manager_workflow.py b/resource_locking/lock_manager_workflow.py index 10efc9a6..0e800442 100644 --- a/resource_locking/lock_manager_workflow.py +++ b/resource_locking/lock_manager_workflow.py @@ -4,173 +4,106 @@ from temporalio import workflow +from resource_locking.shared import ( + AcquireRequest, + AcquireResponse, +) +# Internal to this workflow, we'll associate randomly generated release signal names with each acquire request. @dataclass -class AssignedResource: - resource: str - - -@dataclass -class AcquireRequest: - workflow_id: str - run_id: str - - -@dataclass -class ReleaseRequest: - resource: str - workflow_id: str - run_id: str - - -@dataclass -class HandoffRequest: - resource: str - workflow_id: str - old_run_id: str - new_run_id: str - - -LOCK_MANAGER_WORKFLOW_ID = "lock_manager" - +class InternalAcquireRequest(AcquireRequest): + release_signal: Optional[str] @dataclass class LockManagerWorkflowInput: - # Key is resource, value is users of the resource. The first item in each list is the current holder of the lease + # Key is resource, value is users of the resource. The first item in each list is the current holder of the lock # on that resource. A similar data structure could allow for multiple holders (perhaps the first n items are the # current holders). - resource_queues: dict[str, list[AcquireRequest]] - + resource_queues: dict[str, list[InternalAcquireRequest]] @workflow.defn class LockManagerWorkflow: - def __init__(self): - self.resource_queues: dict[str, list[AcquireRequest]] = {} + @workflow.init + def __init__(self, input: LockManagerWorkflowInput): + self.resource_queues = input.resource_queues + self.release_signal_to_resource: dict[str, str] = {} + for resource, queue in self.resource_queues.items(): + if len(queue) > 0: + self.release_signal_to_resource[queue[0].release_signal] = resource @workflow.signal async def acquire_resource(self, request: AcquireRequest): + internal_request = InternalAcquireRequest(workflow_id=request.workflow_id, release_signal=None) + # A real-world version of this workflow probably wants to use more sophisticated load balancing strategies than # "first free" and "wait for a random one". for resource in self.resource_queues: # Naively give out the first free resource, if we have one if len(self.resource_queues[resource]) == 0: - workflow.logger.info( - f"workflow_id={request.workflow_id} run_id={request.run_id} acquired resource {resource}" - ) - self.resource_queues[resource].append(request) - requester = workflow.get_external_workflow_handle( - request.workflow_id, run_id=request.run_id - ) - await requester.signal("assign_resource", AssignedResource(resource)) + self.resource_queues[resource].append(internal_request) + await self.notify_acquirer(resource) return - # Otherwise put this resource in a random queue. + # Otherwise put this request in a random queue. resource = workflow.random().choice(list(self.resource_queues.keys())) workflow.logger.info( - f"workflow_id={request.workflow_id} run_id={request.run_id} is waiting for resource {resource}" + f"workflow_id={request.workflow_id} is waiting for resource {resource}" ) - self.resource_queues[resource].append(request) + self.resource_queues[resource].append(internal_request) - @workflow.signal - async def release_resource(self, request: ReleaseRequest): - queue = self.resource_queues[request.resource] - if queue is None: - workflow.logger.warning( - f"Ignoring request from {request.workflow_id} to release non-existent resource: {request.resource}" - ) - return + async def notify_acquirer(self, resource: str): + acquirer = self.resource_queues[resource][0] + workflow.logger.info( + f"workflow_id={acquirer.workflow_id} acquired resource {resource}" + ) + acquirer.release_signal = str(workflow.uuid4()) + self.release_signal_to_resource[acquirer.release_signal] = resource - if len(queue) == 0: - workflow.logger.warning( - f"Ignoring request from {request.workflow_id} to release resource that is not held: {request.resource}" - ) - return + requester = workflow.get_external_workflow_handle(acquirer.workflow_id) + await requester.signal("assign_resource", AcquireResponse(release_signal_name=acquirer.release_signal, resource=resource)) - holder = queue[0] - if not ( - holder.workflow_id == request.workflow_id - and holder.run_id == request.run_id - ): - workflow.logger.warning( - f"Ignoring request from non-holder to release resource {request.resource}" - ) - workflow.logger.warning( - f"resource is currently held by wf_id={holder.workflow_id} run_id={holder.run_id}" - ) - workflow.logger.warning( - f"request was from wf_id={request.workflow_id} run_id={request.run_id}" - ) + @workflow.signal(dynamic=True) + async def release_resource(self, signal_name, *args): + if not signal_name in self.release_signal_to_resource: + workflow.logger.warning(f"Ignoring unknown signal: {signal_name} was not a valid release signal.") return - # Remove the current holder from the head of the queue - workflow.logger.info( - f"workflow_id={request.workflow_id} run_id={request.run_id} released resource {request.resource}" - ) - queue = queue[1:] - self.resource_queues[request.resource] = queue + resource = self.release_signal_to_resource[signal_name] - # If there are queued requests, assign the resource to the next one - if len(queue) > 0: - next_holder = queue[0] - workflow.logger.info( - f"workflow_id={next_holder.workflow_id} run_id={next_holder.run_id} acquired resource {request.resource} after waiting" - ) - requester = workflow.get_external_workflow_handle( - next_holder.workflow_id, run_id=next_holder.run_id - ) - await requester.signal( - "assign_resource", AssignedResource(request.resource) - ) - - @workflow.signal - async def handoff_resource(self, request: HandoffRequest): - queue = self.resource_queues[request.resource] + queue = self.resource_queues[resource] if queue is None: workflow.logger.warning( - f"Ignoring request from {request.workflow_id} to hand off non-existent resource: {request.resource}" + f"Ignoring request to release non-existent resource: {resource}" ) return if len(queue) == 0: workflow.logger.warning( - f"Ignoring request from {request.workflow_id} to hand off resource that is not held: {request.resource}" + f"Ignoring request to release resource that is not locked: {resource}" ) return holder = queue[0] - if not ( - holder.workflow_id == request.workflow_id - and holder.run_id == request.old_run_id - ): - workflow.logger.warning( - f"Ignoring request from non-holder to hand off resource {request.resource}" - ) - workflow.logger.warning( - f"resource is currently held by wf_id={holder.workflow_id} run_id={holder.run_id}" - ) - workflow.logger.warning( - f"request was from wf_id={request.workflow_id} run_id={request.old_run_id}" - ) - return + # Remove the current holder from the head of the queue workflow.logger.info( - f"workflow_id={request.workflow_id} handed off resource {request.resource} from run_id={request.old_run_id} to run_id={request.new_run_id}" + f"workflow_id={holder.workflow_id} released resource {resource}" ) - queue[0] = AcquireRequest(request.workflow_id, request.new_run_id) - requester = workflow.get_external_workflow_handle( - request.workflow_id, run_id=request.new_run_id - ) - await requester.signal("assign_resource", AssignedResource(request.resource)) + queue = queue[1:] + self.resource_queues[resource] = queue + del self.release_signal_to_resource[signal_name] + + # If there are queued requests, assign the resource to the next one + if len(queue) > 0: + await self.notify_acquirer(resource) @workflow.query - def get_current_holders(self) -> dict[str, Optional[AcquireRequest]]: + def get_current_holders(self) -> dict[str, Optional[InternalAcquireRequest]]: return {k: v[0] if v else None for k, v in self.resource_queues.items()} @workflow.run async def run(self, input: LockManagerWorkflowInput) -> None: - self.resource_queues = input.resource_queues - # Continue as new either when temporal tells us to, or every 12 hours (so it occurs semi-frequently) await workflow.wait_condition( lambda: workflow.info().is_continue_as_new_suggested(), diff --git a/resource_locking/resource_locking_workflow.py b/resource_locking/resource_locking_workflow.py index 2da00da2..619b87cd 100644 --- a/resource_locking/resource_locking_workflow.py +++ b/resource_locking/resource_locking_workflow.py @@ -5,21 +5,17 @@ from temporalio import activity, workflow -from resource_locking.lock_manager_workflow import ( +from resource_locking.shared import ( LOCK_MANAGER_WORKFLOW_ID, AcquireRequest, - AssignedResource, - HandoffRequest, - ReleaseRequest, + AcquireResponse, ) - @dataclass class UseResourceActivityInput: resource: str iteration: str - @activity.defn async def use_resource(input: UseResourceActivityInput) -> None: info = activity.info() @@ -42,7 +38,7 @@ class ResourceLockingWorkflowInput: should_continue_as_new: bool # Used to transfer resource ownership between iterations during continue_as_new - already_owned_resource: Optional[str] + already_assigned_resource: Optional[AcquireResponse] class FailWorkflowException(Exception): @@ -56,15 +52,14 @@ class FailWorkflowException(Exception): @workflow.defn(failure_exception_types=[FailWorkflowException]) class ResourceLockingWorkflow: def __init__(self): - self.assigned_resource: Optional[str] = None + self.assigned_resource: Optional[AcquireResponse] = None @workflow.signal(name="assign_resource") - def handle_assign_resource(self, input: AssignedResource): - self.assigned_resource = input.resource + def handle_assign_resource(self, input: AcquireResponse): + self.assigned_resource = input @workflow.run async def run(self, input: ResourceLockingWorkflowInput): - workflow.info() if has_timeout(workflow.info().run_timeout): # See "locking" comment below for rationale raise FailWorkflowException( @@ -78,28 +73,15 @@ async def run(self, input: ResourceLockingWorkflowInput): sem_handle = workflow.get_external_workflow_handle(LOCK_MANAGER_WORKFLOW_ID) info = workflow.info() - if input.already_owned_resource is None: - await sem_handle.signal( - "acquire_resource", AcquireRequest(info.workflow_id, info.run_id) - ) + if input.already_assigned_resource is None: + await sem_handle.signal("acquire_resource", AcquireRequest(info.workflow_id)) elif info.continued_run_id: - # If we continued as new, we already have a resource. We need to transfer ownership from our predecessor to - # ourselves. - await sem_handle.signal( - "handoff_resource", - HandoffRequest( - input.already_owned_resource, - info.workflow_id, - info.continued_run_id, - info.run_id, - ), - ) + self.assigned_resource = input.already_assigned_resource else: raise FailWorkflowException( - f"Only set 'already_owned_resource' when using continue_as_new" + f"Only set 'already_assigned_resource' when using continue_as_new" ) - # Both branches above should cause us to receive an "assign_resource" signal. await workflow.wait_condition( lambda: self.assigned_resource is not None, timeout=MAX_RESOURCE_WAIT_TIME ) @@ -115,7 +97,7 @@ async def run(self, input: ResourceLockingWorkflowInput): for iteration in ["first", "second", "third"]: await workflow.execute_activity( use_resource, - UseResourceActivityInput(self.assigned_resource, iteration), + UseResourceActivityInput(self.assigned_resource.resource, iteration), start_to_close_timeout=timedelta(seconds=10), ) @@ -129,7 +111,7 @@ async def run(self, input: ResourceLockingWorkflowInput): next_input = ResourceLockingWorkflowInput( iteration_to_fail_after=input.iteration_to_fail_after, should_continue_as_new=False, - already_owned_resource=self.assigned_resource, + already_assigned_resource=self.assigned_resource, ) workflow.continue_as_new(next_input) finally: @@ -137,13 +119,7 @@ async def run(self, input: ResourceLockingWorkflowInput): # execution, but this code in this finally block will still run. It wouldn't successfully send the signal... # the if statement just avoids some warnings in the log. if not input.should_continue_as_new: - await sem_handle.signal( - "release_resource", - ReleaseRequest( - self.assigned_resource, info.workflow_id, info.run_id - ), - ) - + await sem_handle.signal(self.assigned_resource.release_signal_name) def has_timeout(timeout: Optional[timedelta]) -> bool: # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the diff --git a/resource_locking/shared.py b/resource_locking/shared.py new file mode 100644 index 00000000..3b58dbca --- /dev/null +++ b/resource_locking/shared.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass + +LOCK_MANAGER_WORKFLOW_ID = "lock_manager" + +@dataclass +class AcquireRequest: + workflow_id: str + +@dataclass +class AcquireResponse: + release_signal_name: str + resource: str diff --git a/resource_locking/starter.py b/resource_locking/starter.py index f2989a4f..87bc5230 100644 --- a/resource_locking/starter.py +++ b/resource_locking/starter.py @@ -3,8 +3,8 @@ from temporalio.client import Client, WorkflowFailureError, WorkflowHandle +from resource_locking.shared import LOCK_MANAGER_WORKFLOW_ID from resource_locking.lock_manager_workflow import ( - LOCK_MANAGER_WORKFLOW_ID, LockManagerWorkflow, LockManagerWorkflowInput, ) @@ -37,7 +37,7 @@ async def main(): input = ResourceLockingWorkflowInput( iteration_to_fail_after=None, should_continue_as_new=False, - already_owned_resource=None, + already_assigned_resource=None, ) if i == 0: input.should_continue_as_new = True From 90aaf9972d6ef566789e218b2364db74cad8c484 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Thu, 10 Apr 2025 15:24:58 -0700 Subject: [PATCH 09/23] Simplify allocation algorithm --- resource_locking/lock_manager_workflow.py | 93 ++++++++++++----------- resource_locking/starter.py | 6 +- 2 files changed, 50 insertions(+), 49 deletions(-) diff --git a/resource_locking/lock_manager_workflow.py b/resource_locking/lock_manager_workflow.py index 0e800442..8d9a2131 100644 --- a/resource_locking/lock_manager_workflow.py +++ b/resource_locking/lock_manager_workflow.py @@ -16,52 +16,61 @@ class InternalAcquireRequest(AcquireRequest): @dataclass class LockManagerWorkflowInput: - # Key is resource, value is users of the resource. The first item in each list is the current holder of the lock - # on that resource. A similar data structure could allow for multiple holders (perhaps the first n items are the - # current holders). - resource_queues: dict[str, list[InternalAcquireRequest]] + # Key is resource, value is current lock holder for the resource (None if not locked) + resources: dict[str, Optional[InternalAcquireRequest]] + waiters: list[InternalAcquireRequest] @workflow.defn class LockManagerWorkflow: @workflow.init def __init__(self, input: LockManagerWorkflowInput): - self.resource_queues = input.resource_queues + self.resources = input.resources + self.waiters = input.waiters self.release_signal_to_resource: dict[str, str] = {} - for resource, queue in self.resource_queues.items(): - if len(queue) > 0: - self.release_signal_to_resource[queue[0].release_signal] = resource + for resource, holder in self.resources.items(): + if holder is not None: + self.release_signal_to_resource[holder.release_signal] = resource + + @workflow.signal + async def add_resources(self, resources: list[str]): + for resource in resources: + if resource in self.resources: + workflow.logger.warning( + f"Ignoring attempt to add already-existing resource: {resource}" + ) + continue + + self.resources[resource] = None + if len(self.waiters) > 0: + next_holder = self.waiters.pop(0) + await self.allocate_resource(resource, next_holder) @workflow.signal async def acquire_resource(self, request: AcquireRequest): internal_request = InternalAcquireRequest(workflow_id=request.workflow_id, release_signal=None) - # A real-world version of this workflow probably wants to use more sophisticated load balancing strategies than - # "first free" and "wait for a random one". - - for resource in self.resource_queues: + for resource, holder in self.resources.items(): # Naively give out the first free resource, if we have one - if len(self.resource_queues[resource]) == 0: - self.resource_queues[resource].append(internal_request) - await self.notify_acquirer(resource) + if holder is None: + await self.allocate_resource(resource, internal_request) return - # Otherwise put this request in a random queue. - resource = workflow.random().choice(list(self.resource_queues.keys())) + # Otherwise queue the request + self.waiters.append(internal_request) workflow.logger.info( - f"workflow_id={request.workflow_id} is waiting for resource {resource}" + f"workflow_id={request.workflow_id} is waiting for a resource" ) - self.resource_queues[resource].append(internal_request) - async def notify_acquirer(self, resource: str): - acquirer = self.resource_queues[resource][0] + async def allocate_resource(self, resource: str, internal_request: InternalAcquireRequest): + self.resources[resource] = internal_request workflow.logger.info( - f"workflow_id={acquirer.workflow_id} acquired resource {resource}" + f"workflow_id={internal_request.workflow_id} acquired resource {resource}" ) - acquirer.release_signal = str(workflow.uuid4()) - self.release_signal_to_resource[acquirer.release_signal] = resource + internal_request.release_signal = str(workflow.uuid4()) + self.release_signal_to_resource[internal_request.release_signal] = resource - requester = workflow.get_external_workflow_handle(acquirer.workflow_id) - await requester.signal("assign_resource", AcquireResponse(release_signal_name=acquirer.release_signal, resource=resource)) + requester = workflow.get_external_workflow_handle(internal_request.workflow_id) + await requester.signal("assign_resource", AcquireResponse(release_signal_name=internal_request.release_signal, resource=resource)) @workflow.signal(dynamic=True) async def release_resource(self, signal_name, *args): @@ -71,43 +80,37 @@ async def release_resource(self, signal_name, *args): resource = self.release_signal_to_resource[signal_name] - queue = self.resource_queues[resource] - if queue is None: - workflow.logger.warning( - f"Ignoring request to release non-existent resource: {resource}" - ) - return - - if len(queue) == 0: + holder = self.resources[resource] + if holder is None: workflow.logger.warning( f"Ignoring request to release resource that is not locked: {resource}" ) - return - - holder = queue[0] - # Remove the current holder from the head of the queue + # Remove the current holder workflow.logger.info( f"workflow_id={holder.workflow_id} released resource {resource}" ) - queue = queue[1:] - self.resource_queues[resource] = queue + self.resources[resource] = None del self.release_signal_to_resource[signal_name] # If there are queued requests, assign the resource to the next one - if len(queue) > 0: - await self.notify_acquirer(resource) + if len(self.waiters) > 0: + next_holder = self.waiters.pop(0) + await self.allocate_resource(resource, next_holder) @workflow.query def get_current_holders(self) -> dict[str, Optional[InternalAcquireRequest]]: - return {k: v[0] if v else None for k, v in self.resource_queues.items()} + return {k: v if v else None for k, v in self.resources.items()} @workflow.run - async def run(self, input: LockManagerWorkflowInput) -> None: + async def run(self, _: LockManagerWorkflowInput) -> None: # Continue as new either when temporal tells us to, or every 12 hours (so it occurs semi-frequently) await workflow.wait_condition( lambda: workflow.info().is_continue_as_new_suggested(), timeout=timedelta(hours=12), ) - workflow.continue_as_new(LockManagerWorkflowInput(self.resource_queues)) + workflow.continue_as_new(LockManagerWorkflowInput( + resources=self.resources, + waiters=self.waiters, + )) diff --git a/resource_locking/starter.py b/resource_locking/starter.py index 87bc5230..4bfb7a08 100644 --- a/resource_locking/starter.py +++ b/resource_locking/starter.py @@ -22,10 +22,8 @@ async def main(): lock_manager_handle = await client.start_workflow( workflow=LockManagerWorkflow.run, arg=LockManagerWorkflowInput( - { - "resource_a": [], - "resource_b": [], - } + resources={ "resource_a": None, "resource_b": None }, + waiters=[], ), id=LOCK_MANAGER_WORKFLOW_ID, task_queue="default", From 72a24b094b17df412286a73324d0f21b438f91ed Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Thu, 10 Apr 2025 17:23:32 -0700 Subject: [PATCH 10/23] Add 'async with' syntactic sugar --- resource_locking/resource_allocator.py | 83 +++++++++++++++++++ resource_locking/resource_locking_workflow.py | 71 +++------------- resource_locking/shared.py | 7 +- resource_locking/starter.py | 34 ++++---- resource_locking/worker.py | 8 +- 5 files changed, 126 insertions(+), 77 deletions(-) create mode 100644 resource_locking/resource_allocator.py diff --git a/resource_locking/resource_allocator.py b/resource_locking/resource_allocator.py new file mode 100644 index 00000000..a3f2b169 --- /dev/null +++ b/resource_locking/resource_allocator.py @@ -0,0 +1,83 @@ +from contextlib import asynccontextmanager +from datetime import timedelta +from typing import Optional, AsyncGenerator + +from temporalio.client import Client +from temporalio import workflow, activity +from temporalio.common import WorkflowIDConflictPolicy + +from resource_locking.lock_manager_workflow import LockManagerWorkflowInput, LockManagerWorkflow +from resource_locking.shared import AcquireResponse, LOCK_MANAGER_WORKFLOW_ID, AcquireRequest, AcquiredResource + +# Use this class in workflow code that that needs to run on locked resources. +class ResourceAllocator: + def __init__(self, client: Client): + self.client = client + + @activity.defn + async def send_acquire_signal(self): + info = activity.info() + + # This will start and signal the workflow if it isn't running, otherwise it will signal the current run. + await self.client.start_workflow( + workflow=LockManagerWorkflow.run, + arg=LockManagerWorkflowInput( + resources={}, + waiters=[], + ), + id=LOCK_MANAGER_WORKFLOW_ID, + task_queue="default", + id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, + start_signal="acquire_resource", + start_signal_args=[AcquireRequest(info.workflow_id)] + ) + + @classmethod + @asynccontextmanager + async def acquire_resource(cls, *, already_acquired_resource: Optional[AcquiredResource] = None, max_wait_time: timedelta = timedelta(minutes=5)): + warn_when_workflow_has_timeouts() + + resource = already_acquired_resource + if resource is None: + async def assign_resource(input: AcquireResponse): + workflow.set_signal_handler("assign_resource", None) + nonlocal resource + resource = AcquiredResource( + resource=input.resource, + release_signal_name=input.release_signal_name, + ) + + workflow.set_signal_handler("assign_resource", assign_resource) + + await workflow.execute_activity( + ResourceAllocator.send_acquire_signal, + start_to_close_timeout=timedelta(seconds=10), + ) + + await workflow.wait_condition(lambda: resource is not None, timeout=max_wait_time) + + # During the yield, the calling workflow owns the resource. Note that this is a lock, not a lease! Our + # finally block will release the resource if an activity fails. This is why we asserted the lack of + # workflow-level timeouts above - the finally block wouldn't run if there was a timeout. + try: + resource.autorelease = True + yield resource + finally: + if resource.autorelease: + handle = workflow.get_external_workflow_handle(LOCK_MANAGER_WORKFLOW_ID) + await handle.signal(resource.release_signal_name) + +def warn_when_workflow_has_timeouts(): + if has_timeout(workflow.info().run_timeout): + workflow.logger.warning( + f"ResourceLockingWorkflow cannot have a run_timeout (found {workflow.info().run_timeout}) - this will leak locks" + ) + if has_timeout(workflow.info().execution_timeout): + workflow.logger.warning( + f"ResourceLockingWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout}) - this will leak locks" + ) + +def has_timeout(timeout: Optional[timedelta]) -> bool: + # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the + # continue_as_new call). + return timeout is not None and timeout > timedelta(0) diff --git a/resource_locking/resource_locking_workflow.py b/resource_locking/resource_locking_workflow.py index 619b87cd..c4686222 100644 --- a/resource_locking/resource_locking_workflow.py +++ b/resource_locking/resource_locking_workflow.py @@ -1,14 +1,15 @@ import asyncio -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import timedelta -from typing import Optional +from typing import Optional, Callable from temporalio import activity, workflow +from resource_locking.resource_allocator import ResourceAllocator from resource_locking.shared import ( LOCK_MANAGER_WORKFLOW_ID, AcquireRequest, - AcquireResponse, + AcquireResponse, AcquiredResource, ) @dataclass @@ -38,7 +39,7 @@ class ResourceLockingWorkflowInput: should_continue_as_new: bool # Used to transfer resource ownership between iterations during continue_as_new - already_assigned_resource: Optional[AcquireResponse] + already_acquired_resource: Optional[AcquiredResource] = field(default=None) class FailWorkflowException(Exception): @@ -51,53 +52,13 @@ class FailWorkflowException(Exception): @workflow.defn(failure_exception_types=[FailWorkflowException]) class ResourceLockingWorkflow: - def __init__(self): - self.assigned_resource: Optional[AcquireResponse] = None - - @workflow.signal(name="assign_resource") - def handle_assign_resource(self, input: AcquireResponse): - self.assigned_resource = input - @workflow.run async def run(self, input: ResourceLockingWorkflowInput): - if has_timeout(workflow.info().run_timeout): - # See "locking" comment below for rationale - raise FailWorkflowException( - f"ResourceLockingWorkflow cannot have a run_timeout (found {workflow.info().run_timeout})" - ) - if has_timeout(workflow.info().execution_timeout): - raise FailWorkflowException( - f"ResourceLockingWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout})" - ) - - sem_handle = workflow.get_external_workflow_handle(LOCK_MANAGER_WORKFLOW_ID) - - info = workflow.info() - if input.already_assigned_resource is None: - await sem_handle.signal("acquire_resource", AcquireRequest(info.workflow_id)) - elif info.continued_run_id: - self.assigned_resource = input.already_assigned_resource - else: - raise FailWorkflowException( - f"Only set 'already_assigned_resource' when using continue_as_new" - ) - - await workflow.wait_condition( - lambda: self.assigned_resource is not None, timeout=MAX_RESOURCE_WAIT_TIME - ) - if self.assigned_resource is None: - raise FailWorkflowException( - f"No resource was assigned after {MAX_RESOURCE_WAIT_TIME}" - ) - - # From this point forward, we own the resource. Note that this is a lock, not a lease! Our finally block will - # release the resource if an activity fails. This is why we asserted the lack of workflow-level timeouts - # above - the finally block wouldn't run if there was a timeout. - try: + async with ResourceAllocator.acquire_resource(already_acquired_resource=input.already_acquired_resource) as resource: for iteration in ["first", "second", "third"]: await workflow.execute_activity( use_resource, - UseResourceActivityInput(self.assigned_resource.resource, iteration), + UseResourceActivityInput(resource.resource, iteration), start_to_close_timeout=timedelta(seconds=10), ) @@ -111,17 +72,11 @@ async def run(self, input: ResourceLockingWorkflowInput): next_input = ResourceLockingWorkflowInput( iteration_to_fail_after=input.iteration_to_fail_after, should_continue_as_new=False, - already_assigned_resource=self.assigned_resource, + already_acquired_resource=resource, ) + + # By default, ResourceAllocator will release the resource when we return. We want to hold the resource + # across continue-as-new for the sake of demonstration. + resource.autorelease = False + workflow.continue_as_new(next_input) - finally: - # Only release the resource if we didn't continue-as-new. workflow.continue_as_new raises to halt workflow - # execution, but this code in this finally block will still run. It wouldn't successfully send the signal... - # the if statement just avoids some warnings in the log. - if not input.should_continue_as_new: - await sem_handle.signal(self.assigned_resource.release_signal_name) - -def has_timeout(timeout: Optional[timedelta]) -> bool: - # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the - # continue_as_new call). - return timeout is not None and timeout > timedelta(0) diff --git a/resource_locking/shared.py b/resource_locking/shared.py index 3b58dbca..331bcd15 100644 --- a/resource_locking/shared.py +++ b/resource_locking/shared.py @@ -1,4 +1,5 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field +from typing import Optional LOCK_MANAGER_WORKFLOW_ID = "lock_manager" @@ -10,3 +11,7 @@ class AcquireRequest: class AcquireResponse: release_signal_name: str resource: str + +@dataclass +class AcquiredResource(AcquireResponse): + autorelease: bool = field(default=True) diff --git a/resource_locking/starter.py b/resource_locking/starter.py index 4bfb7a08..a2856719 100644 --- a/resource_locking/starter.py +++ b/resource_locking/starter.py @@ -3,39 +3,25 @@ from temporalio.client import Client, WorkflowFailureError, WorkflowHandle -from resource_locking.shared import LOCK_MANAGER_WORKFLOW_ID -from resource_locking.lock_manager_workflow import ( - LockManagerWorkflow, - LockManagerWorkflowInput, -) +from resource_locking.lock_manager_workflow import LockManagerWorkflow, LockManagerWorkflowInput from resource_locking.resource_locking_workflow import ( ResourceLockingWorkflow, ResourceLockingWorkflowInput, ) +from resource_locking.shared import LOCK_MANAGER_WORKFLOW_ID +from temporalio.common import WorkflowIDConflictPolicy async def main(): # Connect client client = await Client.connect("localhost:7233") - # Start the LockManagerWorkflow - lock_manager_handle = await client.start_workflow( - workflow=LockManagerWorkflow.run, - arg=LockManagerWorkflowInput( - resources={ "resource_a": None, "resource_b": None }, - waiters=[], - ), - id=LOCK_MANAGER_WORKFLOW_ID, - task_queue="default", - ) - # Start the ResourceLockingWorkflows resource_locking_handles: list[WorkflowHandle[Any, Any]] = [] for i in range(0, 4): input = ResourceLockingWorkflowInput( iteration_to_fail_after=None, should_continue_as_new=False, - already_assigned_resource=None, ) if i == 0: input.should_continue_as_new = True @@ -50,6 +36,20 @@ async def main(): ) resource_locking_handles.append(resource_locking_handle) + # Add some resources + lock_manager_handle = await client.start_workflow( + workflow=LockManagerWorkflow.run, + arg=LockManagerWorkflowInput( + resources={}, + waiters=[], + ), + id=LOCK_MANAGER_WORKFLOW_ID, + task_queue="default", + id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, + start_signal="add_resources", + start_signal_args=[["resource_a", "resource_b"]], + ) + for resource_locking_handle in resource_locking_handles: try: await resource_locking_handle.result() diff --git a/resource_locking/worker.py b/resource_locking/worker.py index febc5de0..e6161a29 100644 --- a/resource_locking/worker.py +++ b/resource_locking/worker.py @@ -4,6 +4,7 @@ from temporalio.client import Client from temporalio.worker import Worker +from resource_locking.resource_allocator import ResourceAllocator from resource_locking.lock_manager_workflow import LockManagerWorkflow from resource_locking.resource_locking_workflow import ( ResourceLockingWorkflow, @@ -17,12 +18,17 @@ async def main(): # Start client client = await Client.connect("localhost:7233") + resource_allocator = ResourceAllocator(client) + # Run a worker for the workflow worker = Worker( client, task_queue="default", workflows=[LockManagerWorkflow, ResourceLockingWorkflow], - activities=[use_resource], + activities=[ + use_resource, + resource_allocator.send_acquire_signal, + ], ) await worker.run() From eb92f4881fee2df34dab97ee15d3341ffdd859c8 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Thu, 10 Apr 2025 17:24:27 -0700 Subject: [PATCH 11/23] Update readme --- resource_locking/README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/resource_locking/README.md b/resource_locking/README.md index 6808ff00..581a87b0 100644 --- a/resource_locking/README.md +++ b/resource_locking/README.md @@ -1,16 +1,16 @@ # Resource Locking Sample -This sample shows how to use a long-lived `LockManagerWorkflow` to ensure that each `resource` is used by at most one -`ResourceLockingWorkflow` at a time. `ResourceLockingWorkflow` runs several activities while it has ownership of a -resource. +This sample shows how to use a long-lived `LockManagerWorkflow` to allocate `resources` to `ResourceLockingWorkflows`. +Each`ResourceLockingWorkflow` runs several activities while it has ownership of a resource. Note that +`LockManagerWorkflow` is making resource allocation decisions based on in-memory state. Run the following from this directory to start the worker: - poetry run python worker.py + uv run worker.py -This will start the worker. Then, in another terminal, run the following to execute several load workflows: +This will start the worker. Then, in another terminal, run the following to execute several `ResourceLockingWorkflows`. - poetry run python starter.py + uv run starter.py You should see output indicating that the LockManagerWorkflow serialized access to each resource. @@ -36,6 +36,7 @@ signals. Locking carries a risk where failure to unlock permanently removing a r Temporal's durable execution guarantees, this can only happen if: - A LoadWorkflow times out (prohibited in the sample code) +- An operator terminates a LoadWorkflow. (Temporal recommends canceling workflows instead of terminating them whenever possible.) - You shut down your workers and never restart them (unhandled, but irrelevant) If a leak were to happen, you could discover the identity of the leaker using the query above, then: From 69751bb8f4c7f78f0cdb0237a20ca59f7e47bd14 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Thu, 10 Apr 2025 17:37:47 -0700 Subject: [PATCH 12/23] poe lint/fmt --- resource_locking/lock_manager_workflow.py | 40 +++++++++++------ resource_locking/resource_allocator.py | 43 ++++++++++++++----- resource_locking/resource_locking_workflow.py | 13 ++++-- resource_locking/shared.py | 3 ++ resource_locking/starter.py | 7 ++- resource_locking/worker.py | 2 +- 6 files changed, 78 insertions(+), 30 deletions(-) diff --git a/resource_locking/lock_manager_workflow.py b/resource_locking/lock_manager_workflow.py index 8d9a2131..4cfaaa1d 100644 --- a/resource_locking/lock_manager_workflow.py +++ b/resource_locking/lock_manager_workflow.py @@ -4,22 +4,22 @@ from temporalio import workflow -from resource_locking.shared import ( - AcquireRequest, - AcquireResponse, -) +from resource_locking.shared import AcquireRequest, AcquireResponse + # Internal to this workflow, we'll associate randomly generated release signal names with each acquire request. @dataclass class InternalAcquireRequest(AcquireRequest): release_signal: Optional[str] + @dataclass class LockManagerWorkflowInput: # Key is resource, value is current lock holder for the resource (None if not locked) resources: dict[str, Optional[InternalAcquireRequest]] waiters: list[InternalAcquireRequest] + @workflow.defn class LockManagerWorkflow: @workflow.init @@ -28,7 +28,7 @@ def __init__(self, input: LockManagerWorkflowInput): self.waiters = input.waiters self.release_signal_to_resource: dict[str, str] = {} for resource, holder in self.resources.items(): - if holder is not None: + if holder is not None and holder.release_signal is not None: self.release_signal_to_resource[holder.release_signal] = resource @workflow.signal @@ -47,7 +47,9 @@ async def add_resources(self, resources: list[str]): @workflow.signal async def acquire_resource(self, request: AcquireRequest): - internal_request = InternalAcquireRequest(workflow_id=request.workflow_id, release_signal=None) + internal_request = InternalAcquireRequest( + workflow_id=request.workflow_id, release_signal=None + ) for resource, holder in self.resources.items(): # Naively give out the first free resource, if we have one @@ -61,7 +63,9 @@ async def acquire_resource(self, request: AcquireRequest): f"workflow_id={request.workflow_id} is waiting for a resource" ) - async def allocate_resource(self, resource: str, internal_request: InternalAcquireRequest): + async def allocate_resource( + self, resource: str, internal_request: InternalAcquireRequest + ): self.resources[resource] = internal_request workflow.logger.info( f"workflow_id={internal_request.workflow_id} acquired resource {resource}" @@ -70,12 +74,19 @@ async def allocate_resource(self, resource: str, internal_request: InternalAcqui self.release_signal_to_resource[internal_request.release_signal] = resource requester = workflow.get_external_workflow_handle(internal_request.workflow_id) - await requester.signal("assign_resource", AcquireResponse(release_signal_name=internal_request.release_signal, resource=resource)) + await requester.signal( + "assign_resource", + AcquireResponse( + release_signal_name=internal_request.release_signal, resource=resource + ), + ) @workflow.signal(dynamic=True) async def release_resource(self, signal_name, *args): if not signal_name in self.release_signal_to_resource: - workflow.logger.warning(f"Ignoring unknown signal: {signal_name} was not a valid release signal.") + workflow.logger.warning( + f"Ignoring unknown signal: {signal_name} was not a valid release signal." + ) return resource = self.release_signal_to_resource[signal_name] @@ -85,6 +96,7 @@ async def release_resource(self, signal_name, *args): workflow.logger.warning( f"Ignoring request to release resource that is not locked: {resource}" ) + return # Remove the current holder workflow.logger.info( @@ -110,7 +122,9 @@ async def run(self, _: LockManagerWorkflowInput) -> None: timeout=timedelta(hours=12), ) - workflow.continue_as_new(LockManagerWorkflowInput( - resources=self.resources, - waiters=self.waiters, - )) + workflow.continue_as_new( + LockManagerWorkflowInput( + resources=self.resources, + waiters=self.waiters, + ) + ) diff --git a/resource_locking/resource_allocator.py b/resource_locking/resource_allocator.py index a3f2b169..c479c684 100644 --- a/resource_locking/resource_allocator.py +++ b/resource_locking/resource_allocator.py @@ -1,13 +1,22 @@ from contextlib import asynccontextmanager from datetime import timedelta -from typing import Optional, AsyncGenerator +from typing import Optional +from temporalio import activity, workflow from temporalio.client import Client -from temporalio import workflow, activity from temporalio.common import WorkflowIDConflictPolicy -from resource_locking.lock_manager_workflow import LockManagerWorkflowInput, LockManagerWorkflow -from resource_locking.shared import AcquireResponse, LOCK_MANAGER_WORKFLOW_ID, AcquireRequest, AcquiredResource +from resource_locking.lock_manager_workflow import ( + LockManagerWorkflow, + LockManagerWorkflowInput, +) +from resource_locking.shared import ( + LOCK_MANAGER_WORKFLOW_ID, + AcquiredResource, + AcquireRequest, + AcquireResponse, +) + # Use this class in workflow code that that needs to run on locked resources. class ResourceAllocator: @@ -15,7 +24,7 @@ def __init__(self, client: Client): self.client = client @activity.defn - async def send_acquire_signal(self): + async def send_acquire_signal(self) -> None: info = activity.info() # This will start and signal the workflow if it isn't running, otherwise it will signal the current run. @@ -29,16 +38,22 @@ async def send_acquire_signal(self): task_queue="default", id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, start_signal="acquire_resource", - start_signal_args=[AcquireRequest(info.workflow_id)] + start_signal_args=[AcquireRequest(info.workflow_id)], ) @classmethod @asynccontextmanager - async def acquire_resource(cls, *, already_acquired_resource: Optional[AcquiredResource] = None, max_wait_time: timedelta = timedelta(minutes=5)): + async def acquire_resource( + cls, + *, + already_acquired_resource: Optional[AcquiredResource] = None, + max_wait_time: timedelta = timedelta(minutes=5), + ): warn_when_workflow_has_timeouts() resource = already_acquired_resource if resource is None: + async def assign_resource(input: AcquireResponse): workflow.set_signal_handler("assign_resource", None) nonlocal resource @@ -49,12 +64,18 @@ async def assign_resource(input: AcquireResponse): workflow.set_signal_handler("assign_resource", assign_resource) - await workflow.execute_activity( - ResourceAllocator.send_acquire_signal, + await workflow.start_activity( + ResourceAllocator.send_acquire_signal, # type: ignore[arg-type] start_to_close_timeout=timedelta(seconds=10), ) - await workflow.wait_condition(lambda: resource is not None, timeout=max_wait_time) + await workflow.wait_condition( + lambda: resource is not None, timeout=max_wait_time + ) + + # Can't happen, but the typechecker doesn't know about workflow.wait_condition + if resource is None: + raise RuntimeError("resource was None when it can't be") # During the yield, the calling workflow owns the resource. Note that this is a lock, not a lease! Our # finally block will release the resource if an activity fails. This is why we asserted the lack of @@ -67,6 +88,7 @@ async def assign_resource(input: AcquireResponse): handle = workflow.get_external_workflow_handle(LOCK_MANAGER_WORKFLOW_ID) await handle.signal(resource.release_signal_name) + def warn_when_workflow_has_timeouts(): if has_timeout(workflow.info().run_timeout): workflow.logger.warning( @@ -77,6 +99,7 @@ def warn_when_workflow_has_timeouts(): f"ResourceLockingWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout}) - this will leak locks" ) + def has_timeout(timeout: Optional[timedelta]) -> bool: # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the # continue_as_new call). diff --git a/resource_locking/resource_locking_workflow.py b/resource_locking/resource_locking_workflow.py index c4686222..bb8f97c3 100644 --- a/resource_locking/resource_locking_workflow.py +++ b/resource_locking/resource_locking_workflow.py @@ -1,22 +1,25 @@ import asyncio from dataclasses import dataclass, field from datetime import timedelta -from typing import Optional, Callable +from typing import Callable, Optional from temporalio import activity, workflow from resource_locking.resource_allocator import ResourceAllocator from resource_locking.shared import ( LOCK_MANAGER_WORKFLOW_ID, + AcquiredResource, AcquireRequest, - AcquireResponse, AcquiredResource, + AcquireResponse, ) + @dataclass class UseResourceActivityInput: resource: str iteration: str + @activity.defn async def use_resource(input: UseResourceActivityInput) -> None: info = activity.info() @@ -54,7 +57,9 @@ class FailWorkflowException(Exception): class ResourceLockingWorkflow: @workflow.run async def run(self, input: ResourceLockingWorkflowInput): - async with ResourceAllocator.acquire_resource(already_acquired_resource=input.already_acquired_resource) as resource: + async with ResourceAllocator.acquire_resource( + already_acquired_resource=input.already_acquired_resource + ) as resource: for iteration in ["first", "second", "third"]: await workflow.execute_activity( use_resource, @@ -76,7 +81,7 @@ async def run(self, input: ResourceLockingWorkflowInput): ) # By default, ResourceAllocator will release the resource when we return. We want to hold the resource - # across continue-as-new for the sake of demonstration. + # across continue-as-new for the sake of demonstration.b resource.autorelease = False workflow.continue_as_new(next_input) diff --git a/resource_locking/shared.py b/resource_locking/shared.py index 331bcd15..098c1e58 100644 --- a/resource_locking/shared.py +++ b/resource_locking/shared.py @@ -3,15 +3,18 @@ LOCK_MANAGER_WORKFLOW_ID = "lock_manager" + @dataclass class AcquireRequest: workflow_id: str + @dataclass class AcquireResponse: release_signal_name: str resource: str + @dataclass class AcquiredResource(AcquireResponse): autorelease: bool = field(default=True) diff --git a/resource_locking/starter.py b/resource_locking/starter.py index a2856719..3a752753 100644 --- a/resource_locking/starter.py +++ b/resource_locking/starter.py @@ -2,14 +2,17 @@ from typing import Any from temporalio.client import Client, WorkflowFailureError, WorkflowHandle +from temporalio.common import WorkflowIDConflictPolicy -from resource_locking.lock_manager_workflow import LockManagerWorkflow, LockManagerWorkflowInput +from resource_locking.lock_manager_workflow import ( + LockManagerWorkflow, + LockManagerWorkflowInput, +) from resource_locking.resource_locking_workflow import ( ResourceLockingWorkflow, ResourceLockingWorkflowInput, ) from resource_locking.shared import LOCK_MANAGER_WORKFLOW_ID -from temporalio.common import WorkflowIDConflictPolicy async def main(): diff --git a/resource_locking/worker.py b/resource_locking/worker.py index e6161a29..d28b18da 100644 --- a/resource_locking/worker.py +++ b/resource_locking/worker.py @@ -4,8 +4,8 @@ from temporalio.client import Client from temporalio.worker import Worker -from resource_locking.resource_allocator import ResourceAllocator from resource_locking.lock_manager_workflow import LockManagerWorkflow +from resource_locking.resource_allocator import ResourceAllocator from resource_locking.resource_locking_workflow import ( ResourceLockingWorkflow, use_resource, From 2ab9dfa241d72826be801da55bb67e7dc359b742 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Thu, 10 Apr 2025 18:05:10 -0700 Subject: [PATCH 13/23] Add a test --- resource_locking/resource_locking_workflow.py | 2 +- resource_locking/shared.py | 1 - tests/resource_locking/__init__.py | 0 tests/resource_locking/workflow_test.py | 118 ++++++++++++++++++ 4 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 tests/resource_locking/__init__.py create mode 100644 tests/resource_locking/workflow_test.py diff --git a/resource_locking/resource_locking_workflow.py b/resource_locking/resource_locking_workflow.py index bb8f97c3..d9e25278 100644 --- a/resource_locking/resource_locking_workflow.py +++ b/resource_locking/resource_locking_workflow.py @@ -60,7 +60,7 @@ async def run(self, input: ResourceLockingWorkflowInput): async with ResourceAllocator.acquire_resource( already_acquired_resource=input.already_acquired_resource ) as resource: - for iteration in ["first", "second", "third"]: + for iteration in ["first", "second"]: await workflow.execute_activity( use_resource, UseResourceActivityInput(resource.resource, iteration), diff --git a/resource_locking/shared.py b/resource_locking/shared.py index 098c1e58..2afdc556 100644 --- a/resource_locking/shared.py +++ b/resource_locking/shared.py @@ -1,5 +1,4 @@ from dataclasses import dataclass, field -from typing import Optional LOCK_MANAGER_WORKFLOW_ID = "lock_manager" diff --git a/tests/resource_locking/__init__.py b/tests/resource_locking/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/resource_locking/workflow_test.py b/tests/resource_locking/workflow_test.py new file mode 100644 index 00000000..96d5d5ee --- /dev/null +++ b/tests/resource_locking/workflow_test.py @@ -0,0 +1,118 @@ +import asyncio +import uuid +from collections import defaultdict +from datetime import timedelta +from typing import Any, Optional, Sequence + +from temporalio import activity +from temporalio.client import Client, WorkflowFailureError, WorkflowHandle +from temporalio.common import WorkflowIDConflictPolicy +from temporalio.worker import Worker + +from resource_locking.lock_manager_workflow import ( + LockManagerWorkflow, + LockManagerWorkflowInput, +) +from resource_locking.resource_allocator import ResourceAllocator +from resource_locking.resource_locking_workflow import ( + ResourceLockingWorkflow, + ResourceLockingWorkflowInput, + UseResourceActivityInput, +) +from resource_locking.shared import LOCK_MANAGER_WORKFLOW_ID + +TASK_QUEUE = "default" + + +async def test_resource_locking_workflow(client: Client): + # key is resource, value is a description of resource usage + resource_usage: defaultdict[str, list[Sequence[str]]] = defaultdict(list) + + # Mock out the activity to count executions + @activity.defn(name="use_resource") + async def use_resource_mock(input: UseResourceActivityInput) -> None: + workflow_id = activity.info().workflow_id + resource_usage[input.resource].append((workflow_id, "start")) + # We need a small sleep here to bait out races + await asyncio.sleep(0.05) + resource_usage[input.resource].append((workflow_id, "end")) + + resource_allocator = ResourceAllocator(client) + + async with Worker( + client, + task_queue=TASK_QUEUE, + workflows=[LockManagerWorkflow, ResourceLockingWorkflow], + activities=[use_resource_mock, resource_allocator.send_acquire_signal], + ): + await run_all_workflows(client) + + # Did any workflow run in more than one place? + workflow_id_to_resource: dict[str, str] = {} + for resource, events in resource_usage.items(): + for workflow_id, event in events: + if workflow_id in workflow_id_to_resource: + existing_resource = workflow_id_to_resource[workflow_id] + assert ( + existing_resource == resource + ), f"{workflow_id} ran on both {resource} and {existing_resource}" + else: + workflow_id_to_resource[workflow_id] = resource + + # Did any resource have more than one workflow on it at a time? + for resource, events in resource_usage.items(): + holder: Optional[str] = None + for workflow_id, event in events: + if event == "start": + assert ( + holder is None + ), f"{workflow_id} started on {resource} held by {holder}" + holder = workflow_id + else: + assert ( + holder == workflow_id + ), f"{workflow_id} ended on {resource} held by {holder}" + holder = None + + +async def run_all_workflows(client: Client): + resource_locking_handles: list[WorkflowHandle[Any, Any]] = [] + for i in range(0, 8): + input = ResourceLockingWorkflowInput( + iteration_to_fail_after=None, + should_continue_as_new=False, + ) + if i == 0: + input.should_continue_as_new = True + if i == 1: + input.iteration_to_fail_after = "first" + + resource_locking_handle = await client.start_workflow( + workflow=ResourceLockingWorkflow.run, + arg=input, + id=f"resource-locking-workflow-{i}", + task_queue=TASK_QUEUE, + ) + resource_locking_handles.append(resource_locking_handle) + + # Add some resources + lock_manager_handle = await client.start_workflow( + workflow=LockManagerWorkflow.run, + arg=LockManagerWorkflowInput( + resources={}, + waiters=[], + ), + id=LOCK_MANAGER_WORKFLOW_ID, + task_queue="default", + id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, + start_signal="add_resources", + start_signal_args=[["r_a", "r_b", "r_c"]], + ) + + for resource_locking_handle in resource_locking_handles: + try: + await resource_locking_handle.result() + except WorkflowFailureError: + pass + + await lock_manager_handle.terminate() From bb9cb64a510d80b1394375f5be7d802d4029658f Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Thu, 10 Apr 2025 18:21:11 -0700 Subject: [PATCH 14/23] method rename --- resource_locking/lock_manager_workflow.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/resource_locking/lock_manager_workflow.py b/resource_locking/lock_manager_workflow.py index 4cfaaa1d..d1a013ac 100644 --- a/resource_locking/lock_manager_workflow.py +++ b/resource_locking/lock_manager_workflow.py @@ -43,7 +43,7 @@ async def add_resources(self, resources: list[str]): self.resources[resource] = None if len(self.waiters) > 0: next_holder = self.waiters.pop(0) - await self.allocate_resource(resource, next_holder) + await self.assign_resource(resource, next_holder) @workflow.signal async def acquire_resource(self, request: AcquireRequest): @@ -54,7 +54,7 @@ async def acquire_resource(self, request: AcquireRequest): for resource, holder in self.resources.items(): # Naively give out the first free resource, if we have one if holder is None: - await self.allocate_resource(resource, internal_request) + await self.assign_resource(resource, internal_request) return # Otherwise queue the request @@ -63,7 +63,7 @@ async def acquire_resource(self, request: AcquireRequest): f"workflow_id={request.workflow_id} is waiting for a resource" ) - async def allocate_resource( + async def assign_resource( self, resource: str, internal_request: InternalAcquireRequest ): self.resources[resource] = internal_request @@ -108,7 +108,7 @@ async def release_resource(self, signal_name, *args): # If there are queued requests, assign the resource to the next one if len(self.waiters) > 0: next_holder = self.waiters.pop(0) - await self.allocate_resource(resource, next_holder) + await self.assign_resource(resource, next_holder) @workflow.query def get_current_holders(self) -> dict[str, Optional[InternalAcquireRequest]]: From d9a5f507cf529d5048fe5253df92e7e969bb4963 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Fri, 11 Apr 2025 09:15:49 -0700 Subject: [PATCH 15/23] renames --- resource_locking/starter.py | 67 ------------------- {resource_locking => resource_pool}/README.md | 30 ++++----- .../__init__.py | 0 .../resource_allocator.py | 20 +++--- .../resource_pool_workflow.py | 16 ++--- .../resource_user_workflow.py | 25 +++---- {resource_locking => resource_pool}/shared.py | 2 +- resource_pool/starter.py | 67 +++++++++++++++++++ {resource_locking => resource_pool}/worker.py | 11 ++- tests/resource_locking/workflow_test.py | 46 ++++++------- 10 files changed, 138 insertions(+), 146 deletions(-) delete mode 100644 resource_locking/starter.py rename {resource_locking => resource_pool}/README.md (50%) rename {resource_locking => resource_pool}/__init__.py (100%) rename {resource_locking => resource_pool}/resource_allocator.py (89%) rename resource_locking/lock_manager_workflow.py => resource_pool/resource_pool_workflow.py (91%) rename resource_locking/resource_locking_workflow.py => resource_pool/resource_user_workflow.py (76%) rename {resource_locking => resource_pool}/shared.py (86%) create mode 100644 resource_pool/starter.py rename {resource_locking => resource_pool}/worker.py (65%) diff --git a/resource_locking/starter.py b/resource_locking/starter.py deleted file mode 100644 index 3a752753..00000000 --- a/resource_locking/starter.py +++ /dev/null @@ -1,67 +0,0 @@ -import asyncio -from typing import Any - -from temporalio.client import Client, WorkflowFailureError, WorkflowHandle -from temporalio.common import WorkflowIDConflictPolicy - -from resource_locking.lock_manager_workflow import ( - LockManagerWorkflow, - LockManagerWorkflowInput, -) -from resource_locking.resource_locking_workflow import ( - ResourceLockingWorkflow, - ResourceLockingWorkflowInput, -) -from resource_locking.shared import LOCK_MANAGER_WORKFLOW_ID - - -async def main(): - # Connect client - client = await Client.connect("localhost:7233") - - # Start the ResourceLockingWorkflows - resource_locking_handles: list[WorkflowHandle[Any, Any]] = [] - for i in range(0, 4): - input = ResourceLockingWorkflowInput( - iteration_to_fail_after=None, - should_continue_as_new=False, - ) - if i == 0: - input.should_continue_as_new = True - if i == 1: - input.iteration_to_fail_after = "first" - - resource_locking_handle = await client.start_workflow( - workflow=ResourceLockingWorkflow.run, - arg=input, - id=f"resource-locking-workflow-{i}", - task_queue="default", - ) - resource_locking_handles.append(resource_locking_handle) - - # Add some resources - lock_manager_handle = await client.start_workflow( - workflow=LockManagerWorkflow.run, - arg=LockManagerWorkflowInput( - resources={}, - waiters=[], - ), - id=LOCK_MANAGER_WORKFLOW_ID, - task_queue="default", - id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, - start_signal="add_resources", - start_signal_args=[["resource_a", "resource_b"]], - ) - - for resource_locking_handle in resource_locking_handles: - try: - await resource_locking_handle.result() - except WorkflowFailureError: - pass - - # Clean up after ourselves. In the real world, the lock manager workflow would run forever. - await lock_manager_handle.terminate() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/resource_locking/README.md b/resource_pool/README.md similarity index 50% rename from resource_locking/README.md rename to resource_pool/README.md index 581a87b0..143de0f3 100644 --- a/resource_locking/README.md +++ b/resource_pool/README.md @@ -1,22 +1,22 @@ -# Resource Locking Sample +# Resource Pool Sample -This sample shows how to use a long-lived `LockManagerWorkflow` to allocate `resources` to `ResourceLockingWorkflows`. -Each`ResourceLockingWorkflow` runs several activities while it has ownership of a resource. Note that -`LockManagerWorkflow` is making resource allocation decisions based on in-memory state. +This sample shows how to use a long-lived `ResourcePoolWorkflow` to allocate `resources` to `ResourceUserWorkflows`. +Each `ResourceUserWorkflow` runs several activities while it has ownership of a resource. Note that +`ResourcePoolWorkflow` is making resource allocation decisions based on in-memory state. Run the following from this directory to start the worker: uv run worker.py -This will start the worker. Then, in another terminal, run the following to execute several `ResourceLockingWorkflows`. +This will start the worker. Then, in another terminal, run the following to execute several `ResourceUserWorkflows`. uv run starter.py -You should see output indicating that the LockManagerWorkflow serialized access to each resource. +You should see output indicating that the `ResourcePoolWorkflow` serialized access to each resource. -You can query the set of current lock holders with: +You can query the set of current resource resource holders with: - tctl wf query -w lock_manager --qt get_current_holders + tctl wf query -w resource_pool --qt get_current_holders # Other approaches @@ -24,7 +24,7 @@ There are simpler ways to manage concurrent access to resources. Consider using and limiting the number of activity slots on the workers. The golang SDK also [sessions](https://docs.temporal.io/develop/go/sessions) that allow workflows to pin themselves to workers. -The technique in this sample is capable of more complex resource locking than the options above, but it doesn't scale +The technique in this sample is capable of more complex resource allocation than the options above, but it doesn't scale as well. Specifically, it can: - Manage access to a set of resources that is decoupled from the set of workers and task queues - Run arbitrary code to place workloads on resources as they become available @@ -35,14 +35,14 @@ This sample uses true locking (not leasing!) to avoid complexity and scaling con signals. Locking carries a risk where failure to unlock permanently removing a resource from the pool. However, with Temporal's durable execution guarantees, this can only happen if: -- A LoadWorkflow times out (prohibited in the sample code) -- An operator terminates a LoadWorkflow. (Temporal recommends canceling workflows instead of terminating them whenever possible.) +- A ResourceUserWorkflows times out (prohibited in the sample code) +- An operator terminates a ResourceUserWorkflows. (Temporal recommends canceling workflows instead of terminating them whenever possible.) - You shut down your workers and never restart them (unhandled, but irrelevant) If a leak were to happen, you could discover the identity of the leaker using the query above, then: - tctl wf signal -w lock_manager --name release_resource --input '{ "resource": "the resource", "workflow_id": "holder workflow id", "run_id": "holder run id" }' + tctl wf signal -w resource_pool --name release_resource --input '{ "release_key": "" } -Performance: A single LockManagerWorkflow scales to tens, but not hundreds, of lock/unlock events per second. It is -best suited for locking resources during long-running workflows. Actual performance will depend on your temporal -server's persistence layer. \ No newline at end of file +Performance: A single ResourcePoolWorkflow scales to tens, but not hundreds, of request/release events per second. It is +best suited for allocating resources to long-running workflows. Actual performance will depend on your temporal server's +persistence layer. \ No newline at end of file diff --git a/resource_locking/__init__.py b/resource_pool/__init__.py similarity index 100% rename from resource_locking/__init__.py rename to resource_pool/__init__.py diff --git a/resource_locking/resource_allocator.py b/resource_pool/resource_allocator.py similarity index 89% rename from resource_locking/resource_allocator.py rename to resource_pool/resource_allocator.py index c479c684..c8a33133 100644 --- a/resource_locking/resource_allocator.py +++ b/resource_pool/resource_allocator.py @@ -6,12 +6,12 @@ from temporalio.client import Client from temporalio.common import WorkflowIDConflictPolicy -from resource_locking.lock_manager_workflow import ( - LockManagerWorkflow, - LockManagerWorkflowInput, +from resource_pool.resource_pool_workflow import ( + ResourcePoolWorkflow, + ResourcePoolWorkflowInput, ) -from resource_locking.shared import ( - LOCK_MANAGER_WORKFLOW_ID, +from resource_pool.shared import ( + RESOURCE_POOL_WORKFLOW_ID, AcquiredResource, AcquireRequest, AcquireResponse, @@ -29,12 +29,12 @@ async def send_acquire_signal(self) -> None: # This will start and signal the workflow if it isn't running, otherwise it will signal the current run. await self.client.start_workflow( - workflow=LockManagerWorkflow.run, - arg=LockManagerWorkflowInput( + workflow=ResourcePoolWorkflow.run, + arg=ResourcePoolWorkflowInput( resources={}, waiters=[], ), - id=LOCK_MANAGER_WORKFLOW_ID, + id=RESOURCE_POOL_WORKFLOW_ID, task_queue="default", id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, start_signal="acquire_resource", @@ -85,7 +85,9 @@ async def assign_resource(input: AcquireResponse): yield resource finally: if resource.autorelease: - handle = workflow.get_external_workflow_handle(LOCK_MANAGER_WORKFLOW_ID) + handle = workflow.get_external_workflow_handle( + RESOURCE_POOL_WORKFLOW_ID + ) await handle.signal(resource.release_signal_name) diff --git a/resource_locking/lock_manager_workflow.py b/resource_pool/resource_pool_workflow.py similarity index 91% rename from resource_locking/lock_manager_workflow.py rename to resource_pool/resource_pool_workflow.py index d1a013ac..074db6dc 100644 --- a/resource_locking/lock_manager_workflow.py +++ b/resource_pool/resource_pool_workflow.py @@ -4,7 +4,7 @@ from temporalio import workflow -from resource_locking.shared import AcquireRequest, AcquireResponse +from resource_pool.shared import AcquireRequest, AcquireResponse # Internal to this workflow, we'll associate randomly generated release signal names with each acquire request. @@ -14,16 +14,16 @@ class InternalAcquireRequest(AcquireRequest): @dataclass -class LockManagerWorkflowInput: - # Key is resource, value is current lock holder for the resource (None if not locked) +class ResourcePoolWorkflowInput: + # Key is resource, value is current holder of the resource (None if not held) resources: dict[str, Optional[InternalAcquireRequest]] waiters: list[InternalAcquireRequest] @workflow.defn -class LockManagerWorkflow: +class ResourcePoolWorkflow: @workflow.init - def __init__(self, input: LockManagerWorkflowInput): + def __init__(self, input: ResourcePoolWorkflowInput): self.resources = input.resources self.waiters = input.waiters self.release_signal_to_resource: dict[str, str] = {} @@ -94,7 +94,7 @@ async def release_resource(self, signal_name, *args): holder = self.resources[resource] if holder is None: workflow.logger.warning( - f"Ignoring request to release resource that is not locked: {resource}" + f"Ignoring request to release resource that is not held: {resource}" ) return @@ -115,7 +115,7 @@ def get_current_holders(self) -> dict[str, Optional[InternalAcquireRequest]]: return {k: v if v else None for k, v in self.resources.items()} @workflow.run - async def run(self, _: LockManagerWorkflowInput) -> None: + async def run(self, _: ResourcePoolWorkflowInput) -> None: # Continue as new either when temporal tells us to, or every 12 hours (so it occurs semi-frequently) await workflow.wait_condition( lambda: workflow.info().is_continue_as_new_suggested(), @@ -123,7 +123,7 @@ async def run(self, _: LockManagerWorkflowInput) -> None: ) workflow.continue_as_new( - LockManagerWorkflowInput( + ResourcePoolWorkflowInput( resources=self.resources, waiters=self.waiters, ) diff --git a/resource_locking/resource_locking_workflow.py b/resource_pool/resource_user_workflow.py similarity index 76% rename from resource_locking/resource_locking_workflow.py rename to resource_pool/resource_user_workflow.py index d9e25278..71ff8454 100644 --- a/resource_locking/resource_locking_workflow.py +++ b/resource_pool/resource_user_workflow.py @@ -1,17 +1,12 @@ import asyncio from dataclasses import dataclass, field from datetime import timedelta -from typing import Callable, Optional +from typing import Optional from temporalio import activity, workflow -from resource_locking.resource_allocator import ResourceAllocator -from resource_locking.shared import ( - LOCK_MANAGER_WORKFLOW_ID, - AcquiredResource, - AcquireRequest, - AcquireResponse, -) +from resource_pool.resource_allocator import ResourceAllocator +from resource_pool.shared import AcquiredResource @dataclass @@ -33,12 +28,12 @@ async def use_resource(input: UseResourceActivityInput) -> None: @dataclass -class ResourceLockingWorkflowInput: - # If set, this workflow will fail after the "first", "second", or "third" activity. +class ResourceUserWorkflowInput: + # If set, this workflow will fail after the "first" or "second" activity. iteration_to_fail_after: Optional[str] - # If True, this workflow will continue as new after the third activity. The next iteration will run three more - # activities, but will not continue as new. This lets us exercise the handoff logic. + # If True, this workflow will continue as new after the last activity. The next iteration will run more activities, + # but will not continue as new. should_continue_as_new: bool # Used to transfer resource ownership between iterations during continue_as_new @@ -54,9 +49,9 @@ class FailWorkflowException(Exception): @workflow.defn(failure_exception_types=[FailWorkflowException]) -class ResourceLockingWorkflow: +class ResourceUserWorkflow: @workflow.run - async def run(self, input: ResourceLockingWorkflowInput): + async def run(self, input: ResourceUserWorkflowInput): async with ResourceAllocator.acquire_resource( already_acquired_resource=input.already_acquired_resource ) as resource: @@ -74,7 +69,7 @@ async def run(self, input: ResourceLockingWorkflowInput): raise FailWorkflowException() if input.should_continue_as_new: - next_input = ResourceLockingWorkflowInput( + next_input = ResourceUserWorkflowInput( iteration_to_fail_after=input.iteration_to_fail_after, should_continue_as_new=False, already_acquired_resource=resource, diff --git a/resource_locking/shared.py b/resource_pool/shared.py similarity index 86% rename from resource_locking/shared.py rename to resource_pool/shared.py index 2afdc556..3e645d8d 100644 --- a/resource_locking/shared.py +++ b/resource_pool/shared.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field -LOCK_MANAGER_WORKFLOW_ID = "lock_manager" +RESOURCE_POOL_WORKFLOW_ID = "resource_pool" @dataclass diff --git a/resource_pool/starter.py b/resource_pool/starter.py new file mode 100644 index 00000000..7954cf00 --- /dev/null +++ b/resource_pool/starter.py @@ -0,0 +1,67 @@ +import asyncio +from typing import Any + +from temporalio.client import Client, WorkflowFailureError, WorkflowHandle +from temporalio.common import WorkflowIDConflictPolicy + +from resource_pool.resource_pool_workflow import ( + ResourcePoolWorkflow, + ResourcePoolWorkflowInput, +) +from resource_pool.resource_user_workflow import ( + ResourceUserWorkflow, + ResourceUserWorkflowInput, +) +from resource_pool.shared import RESOURCE_POOL_WORKFLOW_ID + + +async def main(): + # Connect client + client = await Client.connect("localhost:7233") + + # Start the ResourceUserWorkflows + resource_user_handles: list[WorkflowHandle[Any, Any]] = [] + for i in range(0, 4): + input = ResourceUserWorkflowInput( + iteration_to_fail_after=None, + should_continue_as_new=False, + ) + if i == 0: + input.should_continue_as_new = True + if i == 1: + input.iteration_to_fail_after = "first" + + handle = await client.start_workflow( + workflow=ResourceUserWorkflow.run, + arg=input, + id=f"resource-user-workflow-{i}", + task_queue="default", + ) + resource_user_handles.append(handle) + + # Add some resources + resource_pool_handle = await client.start_workflow( + workflow=ResourcePoolWorkflow.run, + arg=ResourcePoolWorkflowInput( + resources={}, + waiters=[], + ), + id=RESOURCE_POOL_WORKFLOW_ID, + task_queue="default", + id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, + start_signal="add_resources", + start_signal_args=[["resource_a", "resource_b"]], + ) + + for handle in resource_user_handles: + try: + await handle.result() + except WorkflowFailureError: + pass + + # Clean up after ourselves. In the real world, the resource pool workflow would run forever. + await resource_pool_handle.terminate() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/resource_locking/worker.py b/resource_pool/worker.py similarity index 65% rename from resource_locking/worker.py rename to resource_pool/worker.py index d28b18da..67f592c9 100644 --- a/resource_locking/worker.py +++ b/resource_pool/worker.py @@ -4,12 +4,9 @@ from temporalio.client import Client from temporalio.worker import Worker -from resource_locking.lock_manager_workflow import LockManagerWorkflow -from resource_locking.resource_allocator import ResourceAllocator -from resource_locking.resource_locking_workflow import ( - ResourceLockingWorkflow, - use_resource, -) +from resource_pool.resource_allocator import ResourceAllocator +from resource_pool.resource_pool_workflow import ResourcePoolWorkflow +from resource_pool.resource_user_workflow import ResourceUserWorkflow, use_resource async def main(): @@ -24,7 +21,7 @@ async def main(): worker = Worker( client, task_queue="default", - workflows=[LockManagerWorkflow, ResourceLockingWorkflow], + workflows=[ResourcePoolWorkflow, ResourceUserWorkflow], activities=[ use_resource, resource_allocator.send_acquire_signal, diff --git a/tests/resource_locking/workflow_test.py b/tests/resource_locking/workflow_test.py index 96d5d5ee..b93e5237 100644 --- a/tests/resource_locking/workflow_test.py +++ b/tests/resource_locking/workflow_test.py @@ -1,7 +1,5 @@ import asyncio -import uuid from collections import defaultdict -from datetime import timedelta from typing import Any, Optional, Sequence from temporalio import activity @@ -9,17 +7,17 @@ from temporalio.common import WorkflowIDConflictPolicy from temporalio.worker import Worker -from resource_locking.lock_manager_workflow import ( - LockManagerWorkflow, - LockManagerWorkflowInput, +from resource_pool.resource_allocator import ResourceAllocator +from resource_pool.resource_pool_workflow import ( + ResourcePoolWorkflow, + ResourcePoolWorkflowInput, ) -from resource_locking.resource_allocator import ResourceAllocator -from resource_locking.resource_locking_workflow import ( - ResourceLockingWorkflow, - ResourceLockingWorkflowInput, +from resource_pool.resource_user_workflow import ( + ResourceUserWorkflow, + ResourceUserWorkflowInput, UseResourceActivityInput, ) -from resource_locking.shared import LOCK_MANAGER_WORKFLOW_ID +from resource_pool.shared import RESOURCE_POOL_WORKFLOW_ID TASK_QUEUE = "default" @@ -42,7 +40,7 @@ async def use_resource_mock(input: UseResourceActivityInput) -> None: async with Worker( client, task_queue=TASK_QUEUE, - workflows=[LockManagerWorkflow, ResourceLockingWorkflow], + workflows=[ResourcePoolWorkflow, ResourceUserWorkflow], activities=[use_resource_mock, resource_allocator.send_acquire_signal], ): await run_all_workflows(client) @@ -76,9 +74,9 @@ async def use_resource_mock(input: UseResourceActivityInput) -> None: async def run_all_workflows(client: Client): - resource_locking_handles: list[WorkflowHandle[Any, Any]] = [] + resource_user_handles: list[WorkflowHandle[Any, Any]] = [] for i in range(0, 8): - input = ResourceLockingWorkflowInput( + input = ResourceUserWorkflowInput( iteration_to_fail_after=None, should_continue_as_new=False, ) @@ -87,32 +85,32 @@ async def run_all_workflows(client: Client): if i == 1: input.iteration_to_fail_after = "first" - resource_locking_handle = await client.start_workflow( - workflow=ResourceLockingWorkflow.run, + handle = await client.start_workflow( + workflow=ResourceUserWorkflow.run, arg=input, - id=f"resource-locking-workflow-{i}", + id=f"resource-user-workflow-{i}", task_queue=TASK_QUEUE, ) - resource_locking_handles.append(resource_locking_handle) + resource_user_handles.append(handle) # Add some resources - lock_manager_handle = await client.start_workflow( - workflow=LockManagerWorkflow.run, - arg=LockManagerWorkflowInput( + resource_pool_handle = await client.start_workflow( + workflow=ResourcePoolWorkflow.run, + arg=ResourcePoolWorkflowInput( resources={}, waiters=[], ), - id=LOCK_MANAGER_WORKFLOW_ID, + id=RESOURCE_POOL_WORKFLOW_ID, task_queue="default", id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, start_signal="add_resources", start_signal_args=[["r_a", "r_b", "r_c"]], ) - for resource_locking_handle in resource_locking_handles: + for handle in resource_user_handles: try: - await resource_locking_handle.result() + await handle.result() except WorkflowFailureError: pass - await lock_manager_handle.terminate() + await resource_pool_handle.terminate() From d75d2d39141689ab359ec92cdc339b28547b6a5f Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Fri, 11 Apr 2025 09:26:21 -0700 Subject: [PATCH 16/23] type hints for all returns --- resource_pool/resource_allocator.py | 10 +++++----- resource_pool/resource_pool_workflow.py | 10 +++++----- resource_pool/resource_user_workflow.py | 2 +- resource_pool/starter.py | 2 +- resource_pool/worker.py | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/resource_pool/resource_allocator.py b/resource_pool/resource_allocator.py index c8a33133..b0b2b5f1 100644 --- a/resource_pool/resource_allocator.py +++ b/resource_pool/resource_allocator.py @@ -1,6 +1,6 @@ from contextlib import asynccontextmanager from datetime import timedelta -from typing import Optional +from typing import AsyncGenerator, Optional from temporalio import activity, workflow from temporalio.client import Client @@ -20,7 +20,7 @@ # Use this class in workflow code that that needs to run on locked resources. class ResourceAllocator: - def __init__(self, client: Client): + def __init__(self, client: Client) -> None: self.client = client @activity.defn @@ -48,13 +48,13 @@ async def acquire_resource( *, already_acquired_resource: Optional[AcquiredResource] = None, max_wait_time: timedelta = timedelta(minutes=5), - ): + ) -> AsyncGenerator[AcquiredResource, None]: warn_when_workflow_has_timeouts() resource = already_acquired_resource if resource is None: - async def assign_resource(input: AcquireResponse): + async def assign_resource(input: AcquireResponse) -> None: workflow.set_signal_handler("assign_resource", None) nonlocal resource resource = AcquiredResource( @@ -91,7 +91,7 @@ async def assign_resource(input: AcquireResponse): await handle.signal(resource.release_signal_name) -def warn_when_workflow_has_timeouts(): +def warn_when_workflow_has_timeouts() -> None: if has_timeout(workflow.info().run_timeout): workflow.logger.warning( f"ResourceLockingWorkflow cannot have a run_timeout (found {workflow.info().run_timeout}) - this will leak locks" diff --git a/resource_pool/resource_pool_workflow.py b/resource_pool/resource_pool_workflow.py index 074db6dc..413aeea8 100644 --- a/resource_pool/resource_pool_workflow.py +++ b/resource_pool/resource_pool_workflow.py @@ -23,7 +23,7 @@ class ResourcePoolWorkflowInput: @workflow.defn class ResourcePoolWorkflow: @workflow.init - def __init__(self, input: ResourcePoolWorkflowInput): + def __init__(self, input: ResourcePoolWorkflowInput) -> None: self.resources = input.resources self.waiters = input.waiters self.release_signal_to_resource: dict[str, str] = {} @@ -32,7 +32,7 @@ def __init__(self, input: ResourcePoolWorkflowInput): self.release_signal_to_resource[holder.release_signal] = resource @workflow.signal - async def add_resources(self, resources: list[str]): + async def add_resources(self, resources: list[str]) -> None: for resource in resources: if resource in self.resources: workflow.logger.warning( @@ -46,7 +46,7 @@ async def add_resources(self, resources: list[str]): await self.assign_resource(resource, next_holder) @workflow.signal - async def acquire_resource(self, request: AcquireRequest): + async def acquire_resource(self, request: AcquireRequest) -> None: internal_request = InternalAcquireRequest( workflow_id=request.workflow_id, release_signal=None ) @@ -65,7 +65,7 @@ async def acquire_resource(self, request: AcquireRequest): async def assign_resource( self, resource: str, internal_request: InternalAcquireRequest - ): + ) -> None: self.resources[resource] = internal_request workflow.logger.info( f"workflow_id={internal_request.workflow_id} acquired resource {resource}" @@ -82,7 +82,7 @@ async def assign_resource( ) @workflow.signal(dynamic=True) - async def release_resource(self, signal_name, *args): + async def release_resource(self, signal_name, *args) -> None: if not signal_name in self.release_signal_to_resource: workflow.logger.warning( f"Ignoring unknown signal: {signal_name} was not a valid release signal." diff --git a/resource_pool/resource_user_workflow.py b/resource_pool/resource_user_workflow.py index 71ff8454..fe81d83f 100644 --- a/resource_pool/resource_user_workflow.py +++ b/resource_pool/resource_user_workflow.py @@ -51,7 +51,7 @@ class FailWorkflowException(Exception): @workflow.defn(failure_exception_types=[FailWorkflowException]) class ResourceUserWorkflow: @workflow.run - async def run(self, input: ResourceUserWorkflowInput): + async def run(self, input: ResourceUserWorkflowInput) -> None: async with ResourceAllocator.acquire_resource( already_acquired_resource=input.already_acquired_resource ) as resource: diff --git a/resource_pool/starter.py b/resource_pool/starter.py index 7954cf00..7517584b 100644 --- a/resource_pool/starter.py +++ b/resource_pool/starter.py @@ -15,7 +15,7 @@ from resource_pool.shared import RESOURCE_POOL_WORKFLOW_ID -async def main(): +async def main() -> None: # Connect client client = await Client.connect("localhost:7233") diff --git a/resource_pool/worker.py b/resource_pool/worker.py index 67f592c9..8f2d638d 100644 --- a/resource_pool/worker.py +++ b/resource_pool/worker.py @@ -9,7 +9,7 @@ from resource_pool.resource_user_workflow import ResourceUserWorkflow, use_resource -async def main(): +async def main() -> None: logging.basicConfig(level=logging.INFO) # Start client From c38f803a297303d3a941b8f9fe1c2f16a839f39e Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Fri, 11 Apr 2025 11:53:08 -0700 Subject: [PATCH 17/23] handle substantive PR feedback --- resource_pool/resource_allocator.py | 108 --------------------- resource_pool/resource_pool_client.py | 97 ++++++++++++++++++ resource_pool/resource_pool_workflow.py | 124 +++++++++++++----------- resource_pool/resource_user_workflow.py | 30 +++--- resource_pool/shared.py | 18 +++- resource_pool/starter.py | 27 +++--- resource_pool/worker.py | 6 +- tests/resource_locking/workflow_test.py | 32 +++--- 8 files changed, 227 insertions(+), 215 deletions(-) delete mode 100644 resource_pool/resource_allocator.py create mode 100644 resource_pool/resource_pool_client.py diff --git a/resource_pool/resource_allocator.py b/resource_pool/resource_allocator.py deleted file mode 100644 index b0b2b5f1..00000000 --- a/resource_pool/resource_allocator.py +++ /dev/null @@ -1,108 +0,0 @@ -from contextlib import asynccontextmanager -from datetime import timedelta -from typing import AsyncGenerator, Optional - -from temporalio import activity, workflow -from temporalio.client import Client -from temporalio.common import WorkflowIDConflictPolicy - -from resource_pool.resource_pool_workflow import ( - ResourcePoolWorkflow, - ResourcePoolWorkflowInput, -) -from resource_pool.shared import ( - RESOURCE_POOL_WORKFLOW_ID, - AcquiredResource, - AcquireRequest, - AcquireResponse, -) - - -# Use this class in workflow code that that needs to run on locked resources. -class ResourceAllocator: - def __init__(self, client: Client) -> None: - self.client = client - - @activity.defn - async def send_acquire_signal(self) -> None: - info = activity.info() - - # This will start and signal the workflow if it isn't running, otherwise it will signal the current run. - await self.client.start_workflow( - workflow=ResourcePoolWorkflow.run, - arg=ResourcePoolWorkflowInput( - resources={}, - waiters=[], - ), - id=RESOURCE_POOL_WORKFLOW_ID, - task_queue="default", - id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, - start_signal="acquire_resource", - start_signal_args=[AcquireRequest(info.workflow_id)], - ) - - @classmethod - @asynccontextmanager - async def acquire_resource( - cls, - *, - already_acquired_resource: Optional[AcquiredResource] = None, - max_wait_time: timedelta = timedelta(minutes=5), - ) -> AsyncGenerator[AcquiredResource, None]: - warn_when_workflow_has_timeouts() - - resource = already_acquired_resource - if resource is None: - - async def assign_resource(input: AcquireResponse) -> None: - workflow.set_signal_handler("assign_resource", None) - nonlocal resource - resource = AcquiredResource( - resource=input.resource, - release_signal_name=input.release_signal_name, - ) - - workflow.set_signal_handler("assign_resource", assign_resource) - - await workflow.start_activity( - ResourceAllocator.send_acquire_signal, # type: ignore[arg-type] - start_to_close_timeout=timedelta(seconds=10), - ) - - await workflow.wait_condition( - lambda: resource is not None, timeout=max_wait_time - ) - - # Can't happen, but the typechecker doesn't know about workflow.wait_condition - if resource is None: - raise RuntimeError("resource was None when it can't be") - - # During the yield, the calling workflow owns the resource. Note that this is a lock, not a lease! Our - # finally block will release the resource if an activity fails. This is why we asserted the lack of - # workflow-level timeouts above - the finally block wouldn't run if there was a timeout. - try: - resource.autorelease = True - yield resource - finally: - if resource.autorelease: - handle = workflow.get_external_workflow_handle( - RESOURCE_POOL_WORKFLOW_ID - ) - await handle.signal(resource.release_signal_name) - - -def warn_when_workflow_has_timeouts() -> None: - if has_timeout(workflow.info().run_timeout): - workflow.logger.warning( - f"ResourceLockingWorkflow cannot have a run_timeout (found {workflow.info().run_timeout}) - this will leak locks" - ) - if has_timeout(workflow.info().execution_timeout): - workflow.logger.warning( - f"ResourceLockingWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout}) - this will leak locks" - ) - - -def has_timeout(timeout: Optional[timedelta]) -> bool: - # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the - # continue_as_new call). - return timeout is not None and timeout > timedelta(0) diff --git a/resource_pool/resource_pool_client.py b/resource_pool/resource_pool_client.py new file mode 100644 index 00000000..8eba6cba --- /dev/null +++ b/resource_pool/resource_pool_client.py @@ -0,0 +1,97 @@ +from contextlib import asynccontextmanager +from datetime import timedelta +from typing import AsyncGenerator, Optional + +from temporalio import workflow + +from resource_pool.shared import ( + AcquiredResource, + AcquireRequest, + AcquireResponse, + DetachedResource, +) + + +# Use this class in workflow code that that needs to run on locked resources. +class ResourcePoolClient: + def __init__(self, pool_workflow_id: str) -> None: + self.pool_workflow_id = pool_workflow_id + self.acquired_resources: list[AcquiredResource] = [] + + async def send_acquire_signal(self) -> None: + handle = workflow.get_external_workflow_handle(self.pool_workflow_id) + await handle.signal( + "acquire_resource", AcquireRequest(workflow.info().workflow_id) + ) + + async def send_release_signal(self, acquired_resource: AcquiredResource) -> None: + handle = workflow.get_external_workflow_handle(self.pool_workflow_id) + await handle.signal( + "release_resource", + AcquireResponse( + resource=acquired_resource.resource, + release_key=acquired_resource.release_key, + ), + ) + + def lazy_register_signal_handler(self) -> None: + if workflow.get_signal_handler("assign_resource") is None: + workflow.set_signal_handler("assign_resource", self.assign_resource) + + def assign_resource(self, response: AcquireResponse) -> None: + self.acquired_resources.append( + AcquiredResource( + resource=response.resource, release_key=response.release_key + ) + ) + + @asynccontextmanager + async def acquire_resource( + self, + *, + reattach: Optional[DetachedResource] = None, + max_wait_time: timedelta = timedelta(minutes=5), + ) -> AsyncGenerator[AcquiredResource, None]: + warn_when_workflow_has_timeouts() + self.lazy_register_signal_handler() + + if reattach is None: + await self.send_acquire_signal() + await workflow.wait_condition( + lambda: len(self.acquired_resources) > 0, timeout=max_wait_time + ) + resource = self.acquired_resources.pop(0) + else: + resource = AcquiredResource( + resource=reattach.resource, release_key=reattach.release_key + ) + + # Can't happen, but the typechecker doesn't know about workflow.wait_condition + if resource is None: + raise RuntimeError("resource was None when it can't be") + + # During the yield, the calling workflow owns the resource. Note that this is a lock, not a lease! Our + # finally block will release the resource if an activity fails. This is why we asserted the lack of + # workflow-level timeouts above - the finally block wouldn't run if there was a timeout. + try: + yield resource + finally: + if not resource.detached: + await self.send_release_signal(resource) + + +def warn_when_workflow_has_timeouts() -> None: + if has_timeout(workflow.info().run_timeout): + workflow.logger.warning( + f"ResourceLockingWorkflow cannot have a run_timeout (found {workflow.info().run_timeout}) - this will leak locks" + ) + if has_timeout(workflow.info().execution_timeout): + workflow.logger.warning( + f"ResourceLockingWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout}) - this will leak locks" + ) + + +def has_timeout(timeout: Optional[timedelta]) -> bool: + # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the + # continue_as_new call). + return timeout is not None and timeout > timedelta(0) diff --git a/resource_pool/resource_pool_workflow.py b/resource_pool/resource_pool_workflow.py index 413aeea8..fb8042dc 100644 --- a/resource_pool/resource_pool_workflow.py +++ b/resource_pool/resource_pool_workflow.py @@ -26,10 +26,11 @@ class ResourcePoolWorkflow: def __init__(self, input: ResourcePoolWorkflowInput) -> None: self.resources = input.resources self.waiters = input.waiters - self.release_signal_to_resource: dict[str, str] = {} + self.release_key_to_resource: dict[str, str] = {} + for resource, holder in self.resources.items(): if holder is not None and holder.release_signal is not None: - self.release_signal_to_resource[holder.release_signal] = resource + self.release_key_to_resource[holder.release_signal] = resource @workflow.signal async def add_resources(self, resources: list[str]) -> None: @@ -39,29 +40,43 @@ async def add_resources(self, resources: list[str]) -> None: f"Ignoring attempt to add already-existing resource: {resource}" ) continue - - self.resources[resource] = None - if len(self.waiters) > 0: - next_holder = self.waiters.pop(0) - await self.assign_resource(resource, next_holder) + else: + self.resources[resource] = None @workflow.signal async def acquire_resource(self, request: AcquireRequest) -> None: - internal_request = InternalAcquireRequest( - workflow_id=request.workflow_id, release_signal=None + self.waiters.append( + InternalAcquireRequest(workflow_id=request.workflow_id, release_signal=None) + ) + workflow.logger.info( + f"workflow_id={request.workflow_id} is waiting for a resource" ) - for resource, holder in self.resources.items(): - # Naively give out the first free resource, if we have one - if holder is None: - await self.assign_resource(resource, internal_request) - return + @workflow.signal() + async def release_resource(self, acquire_response: AcquireResponse) -> None: + release_key = acquire_response.release_key + resource = self.release_key_to_resource.get(release_key) + if resource is None: + workflow.logger.warning(f"Ignoring unknown release_key: {release_key}") + return + + holder = self.resources[resource] + if holder is None: + workflow.logger.warning( + f"Ignoring request to release resource that is not held: {resource}" + ) + return - # Otherwise queue the request - self.waiters.append(internal_request) + # Remove the current holder workflow.logger.info( - f"workflow_id={request.workflow_id} is waiting for a resource" + f"workflow_id={holder.workflow_id} released resource {resource}" ) + self.resources[resource] = None + del self.release_key_to_resource[release_key] + + @workflow.query + def get_current_holders(self) -> dict[str, Optional[InternalAcquireRequest]]: + return self.resources async def assign_resource( self, resource: str, internal_request: InternalAcquireRequest @@ -71,60 +86,57 @@ async def assign_resource( f"workflow_id={internal_request.workflow_id} acquired resource {resource}" ) internal_request.release_signal = str(workflow.uuid4()) - self.release_signal_to_resource[internal_request.release_signal] = resource + self.release_key_to_resource[internal_request.release_signal] = resource requester = workflow.get_external_workflow_handle(internal_request.workflow_id) await requester.signal( "assign_resource", AcquireResponse( - release_signal_name=internal_request.release_signal, resource=resource + release_key=internal_request.release_signal, resource=resource ), ) - @workflow.signal(dynamic=True) - async def release_resource(self, signal_name, *args) -> None: - if not signal_name in self.release_signal_to_resource: - workflow.logger.warning( - f"Ignoring unknown signal: {signal_name} was not a valid release signal." - ) - return + async def assign_next_resource(self) -> bool: + if len(self.waiters) == 0: + return False - resource = self.release_signal_to_resource[signal_name] + next_free_resource = self.get_free_resource() + if next_free_resource is None: + return False - holder = self.resources[resource] - if holder is None: - workflow.logger.warning( - f"Ignoring request to release resource that is not held: {resource}" - ) - return + next_waiter = self.waiters.pop(0) + await self.assign_resource(next_free_resource, next_waiter) + return True - # Remove the current holder - workflow.logger.info( - f"workflow_id={holder.workflow_id} released resource {resource}" + def get_free_resource(self) -> Optional[str]: + return next( + (resource for resource, holder in self.resources.items() if holder is None), + None, ) - self.resources[resource] = None - del self.release_signal_to_resource[signal_name] - # If there are queued requests, assign the resource to the next one - if len(self.waiters) > 0: - next_holder = self.waiters.pop(0) - await self.assign_resource(resource, next_holder) + def can_assign_resource(self) -> bool: + return len(self.waiters) > 0 and self.get_free_resource() is not None - @workflow.query - def get_current_holders(self) -> dict[str, Optional[InternalAcquireRequest]]: - return {k: v if v else None for k, v in self.resources.items()} + def should_continue_as_new(self) -> bool: + return ( + workflow.info().is_continue_as_new_suggested() + and workflow.all_handlers_finished() + ) @workflow.run async def run(self, _: ResourcePoolWorkflowInput) -> None: - # Continue as new either when temporal tells us to, or every 12 hours (so it occurs semi-frequently) - await workflow.wait_condition( - lambda: workflow.info().is_continue_as_new_suggested(), - timeout=timedelta(hours=12), - ) - - workflow.continue_as_new( - ResourcePoolWorkflowInput( - resources=self.resources, - waiters=self.waiters, + while True: + await workflow.wait_condition( + lambda: self.can_assign_resource() or self.should_continue_as_new() ) - ) + + if await self.assign_next_resource(): + continue + + if self.should_continue_as_new(): + workflow.continue_as_new( + ResourcePoolWorkflowInput( + resources=self.resources, + waiters=self.waiters, + ) + ) diff --git a/resource_pool/resource_user_workflow.py b/resource_pool/resource_user_workflow.py index fe81d83f..8af2b2f3 100644 --- a/resource_pool/resource_user_workflow.py +++ b/resource_pool/resource_user_workflow.py @@ -5,8 +5,8 @@ from temporalio import activity, workflow -from resource_pool.resource_allocator import ResourceAllocator -from resource_pool.shared import AcquiredResource +from resource_pool.resource_pool_client import ResourcePoolClient +from resource_pool.shared import AcquiredResource, DetachedResource @dataclass @@ -29,6 +29,9 @@ async def use_resource(input: UseResourceActivityInput) -> None: @dataclass class ResourceUserWorkflowInput: + # The id of the resource pool workflow to request a resource from + resource_pool_workflow_id: str + # If set, this workflow will fail after the "first" or "second" activity. iteration_to_fail_after: Optional[str] @@ -37,7 +40,7 @@ class ResourceUserWorkflowInput: should_continue_as_new: bool # Used to transfer resource ownership between iterations during continue_as_new - already_acquired_resource: Optional[AcquiredResource] = field(default=None) + already_acquired_resource: Optional[DetachedResource] = field(default=None) class FailWorkflowException(Exception): @@ -52,13 +55,15 @@ class FailWorkflowException(Exception): class ResourceUserWorkflow: @workflow.run async def run(self, input: ResourceUserWorkflowInput) -> None: - async with ResourceAllocator.acquire_resource( - already_acquired_resource=input.already_acquired_resource - ) as resource: + pool_client = ResourcePoolClient(input.resource_pool_workflow_id) + + async with pool_client.acquire_resource( + reattach=input.already_acquired_resource + ) as acquired_resource: for iteration in ["first", "second"]: await workflow.execute_activity( use_resource, - UseResourceActivityInput(resource.resource, iteration), + UseResourceActivityInput(acquired_resource.resource, iteration), start_to_close_timeout=timedelta(seconds=10), ) @@ -68,15 +73,16 @@ async def run(self, input: ResourceUserWorkflowInput) -> None: ) raise FailWorkflowException() + # This workflow only continues as new so it can demonstrate how to pass acquired resources across + # iterations. Ordinarily, such a short workflow would not use continue as new. if input.should_continue_as_new: + detached_resource = acquired_resource.detach() + next_input = ResourceUserWorkflowInput( + resource_pool_workflow_id=input.resource_pool_workflow_id, iteration_to_fail_after=input.iteration_to_fail_after, should_continue_as_new=False, - already_acquired_resource=resource, + already_acquired_resource=detached_resource, ) - # By default, ResourceAllocator will release the resource when we return. We want to hold the resource - # across continue-as-new for the sake of demonstration.b - resource.autorelease = False - workflow.continue_as_new(next_input) diff --git a/resource_pool/shared.py b/resource_pool/shared.py index 3e645d8d..3930bb72 100644 --- a/resource_pool/shared.py +++ b/resource_pool/shared.py @@ -10,10 +10,22 @@ class AcquireRequest: @dataclass class AcquireResponse: - release_signal_name: str + release_key: str resource: str @dataclass -class AcquiredResource(AcquireResponse): - autorelease: bool = field(default=True) +class DetachedResource: + resource: str + release_key: str + + +@dataclass +class AcquiredResource: + resource: str + release_key: str + detached: bool = field(default=False) + + def detach(self) -> DetachedResource: + self.detached = True + return DetachedResource(resource=self.resource, release_key=self.release_key) diff --git a/resource_pool/starter.py b/resource_pool/starter.py index 7517584b..b215bf4c 100644 --- a/resource_pool/starter.py +++ b/resource_pool/starter.py @@ -19,10 +19,23 @@ async def main() -> None: # Connect client client = await Client.connect("localhost:7233") + # Initialize the resource pool + resource_pool_handle = await client.start_workflow( + workflow=ResourcePoolWorkflow.run, + arg=ResourcePoolWorkflowInput( + resources={"resource_a": None, "resource_b": None}, + waiters=[], + ), + id=RESOURCE_POOL_WORKFLOW_ID, + task_queue="default", + id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, + ) + # Start the ResourceUserWorkflows resource_user_handles: list[WorkflowHandle[Any, Any]] = [] for i in range(0, 4): input = ResourceUserWorkflowInput( + resource_pool_workflow_id=RESOURCE_POOL_WORKFLOW_ID, iteration_to_fail_after=None, should_continue_as_new=False, ) @@ -39,20 +52,6 @@ async def main() -> None: ) resource_user_handles.append(handle) - # Add some resources - resource_pool_handle = await client.start_workflow( - workflow=ResourcePoolWorkflow.run, - arg=ResourcePoolWorkflowInput( - resources={}, - waiters=[], - ), - id=RESOURCE_POOL_WORKFLOW_ID, - task_queue="default", - id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, - start_signal="add_resources", - start_signal_args=[["resource_a", "resource_b"]], - ) - for handle in resource_user_handles: try: await handle.result() diff --git a/resource_pool/worker.py b/resource_pool/worker.py index 8f2d638d..c03e6449 100644 --- a/resource_pool/worker.py +++ b/resource_pool/worker.py @@ -4,9 +4,10 @@ from temporalio.client import Client from temporalio.worker import Worker -from resource_pool.resource_allocator import ResourceAllocator +from resource_pool.resource_pool_client import ResourcePoolClient from resource_pool.resource_pool_workflow import ResourcePoolWorkflow from resource_pool.resource_user_workflow import ResourceUserWorkflow, use_resource +from resource_pool.shared import RESOURCE_POOL_WORKFLOW_ID async def main() -> None: @@ -15,8 +16,6 @@ async def main() -> None: # Start client client = await Client.connect("localhost:7233") - resource_allocator = ResourceAllocator(client) - # Run a worker for the workflow worker = Worker( client, @@ -24,7 +23,6 @@ async def main() -> None: workflows=[ResourcePoolWorkflow, ResourceUserWorkflow], activities=[ use_resource, - resource_allocator.send_acquire_signal, ], ) diff --git a/tests/resource_locking/workflow_test.py b/tests/resource_locking/workflow_test.py index b93e5237..785142d8 100644 --- a/tests/resource_locking/workflow_test.py +++ b/tests/resource_locking/workflow_test.py @@ -7,7 +7,7 @@ from temporalio.common import WorkflowIDConflictPolicy from temporalio.worker import Worker -from resource_pool.resource_allocator import ResourceAllocator +from resource_pool.resource_pool_client import ResourcePoolClient from resource_pool.resource_pool_workflow import ( ResourcePoolWorkflow, ResourcePoolWorkflowInput, @@ -35,13 +35,11 @@ async def use_resource_mock(input: UseResourceActivityInput) -> None: await asyncio.sleep(0.05) resource_usage[input.resource].append((workflow_id, "end")) - resource_allocator = ResourceAllocator(client) - async with Worker( client, task_queue=TASK_QUEUE, workflows=[ResourcePoolWorkflow, ResourceUserWorkflow], - activities=[use_resource_mock, resource_allocator.send_acquire_signal], + activities=[use_resource_mock], ): await run_all_workflows(client) @@ -74,9 +72,21 @@ async def use_resource_mock(input: UseResourceActivityInput) -> None: async def run_all_workflows(client: Client): + resource_pool_handle = await client.start_workflow( + workflow=ResourcePoolWorkflow.run, + arg=ResourcePoolWorkflowInput( + resources={"r_a": None, "r_b": None, "r_c": None}, + waiters=[], + ), + id=RESOURCE_POOL_WORKFLOW_ID, + task_queue="default", + id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, + ) + resource_user_handles: list[WorkflowHandle[Any, Any]] = [] for i in range(0, 8): input = ResourceUserWorkflowInput( + resource_pool_workflow_id=RESOURCE_POOL_WORKFLOW_ID, iteration_to_fail_after=None, should_continue_as_new=False, ) @@ -93,20 +103,6 @@ async def run_all_workflows(client: Client): ) resource_user_handles.append(handle) - # Add some resources - resource_pool_handle = await client.start_workflow( - workflow=ResourcePoolWorkflow.run, - arg=ResourcePoolWorkflowInput( - resources={}, - waiters=[], - ), - id=RESOURCE_POOL_WORKFLOW_ID, - task_queue="default", - id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, - start_signal="add_resources", - start_signal_args=[["r_a", "r_b", "r_c"]], - ) - for handle in resource_user_handles: try: await handle.result() From f31cd1bce8454cac55c24e1a22acf875c2665a92 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Fri, 11 Apr 2025 12:03:55 -0700 Subject: [PATCH 18/23] clean up imports --- resource_pool/resource_pool_workflow.py | 1 - resource_pool/resource_user_workflow.py | 2 +- resource_pool/worker.py | 2 -- tests/resource_locking/workflow_test.py | 1 - 4 files changed, 1 insertion(+), 5 deletions(-) diff --git a/resource_pool/resource_pool_workflow.py b/resource_pool/resource_pool_workflow.py index fb8042dc..6b70ade7 100644 --- a/resource_pool/resource_pool_workflow.py +++ b/resource_pool/resource_pool_workflow.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from datetime import timedelta from typing import Optional from temporalio import workflow diff --git a/resource_pool/resource_user_workflow.py b/resource_pool/resource_user_workflow.py index 8af2b2f3..0246db13 100644 --- a/resource_pool/resource_user_workflow.py +++ b/resource_pool/resource_user_workflow.py @@ -6,7 +6,7 @@ from temporalio import activity, workflow from resource_pool.resource_pool_client import ResourcePoolClient -from resource_pool.shared import AcquiredResource, DetachedResource +from resource_pool.shared import DetachedResource @dataclass diff --git a/resource_pool/worker.py b/resource_pool/worker.py index c03e6449..c3d2a673 100644 --- a/resource_pool/worker.py +++ b/resource_pool/worker.py @@ -4,10 +4,8 @@ from temporalio.client import Client from temporalio.worker import Worker -from resource_pool.resource_pool_client import ResourcePoolClient from resource_pool.resource_pool_workflow import ResourcePoolWorkflow from resource_pool.resource_user_workflow import ResourceUserWorkflow, use_resource -from resource_pool.shared import RESOURCE_POOL_WORKFLOW_ID async def main() -> None: diff --git a/tests/resource_locking/workflow_test.py b/tests/resource_locking/workflow_test.py index 785142d8..9ec798ab 100644 --- a/tests/resource_locking/workflow_test.py +++ b/tests/resource_locking/workflow_test.py @@ -7,7 +7,6 @@ from temporalio.common import WorkflowIDConflictPolicy from temporalio.worker import Worker -from resource_pool.resource_pool_client import ResourcePoolClient from resource_pool.resource_pool_workflow import ( ResourcePoolWorkflow, ResourcePoolWorkflowInput, From d57b65a48f0d9240295c5ffe0ea73f65d485233d Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 15 Apr 2025 16:03:58 -0700 Subject: [PATCH 19/23] PR feedback (no move yet) --- resource_pool/resource_pool_client.py | 40 ++++++++++++------- resource_pool/resource_pool_workflow.py | 5 +-- resource_pool/starter.py | 4 +- resource_pool/worker.py | 2 +- .../__init__.py | 0 .../workflow_test.py | 6 +-- 6 files changed, 33 insertions(+), 24 deletions(-) rename tests/{resource_locking => resource_pool}/__init__.py (100%) rename tests/{resource_locking => resource_pool}/workflow_test.py (96%) diff --git a/resource_pool/resource_pool_client.py b/resource_pool/resource_pool_client.py index 8eba6cba..52318280 100644 --- a/resource_pool/resource_pool_client.py +++ b/resource_pool/resource_pool_client.py @@ -4,6 +4,7 @@ from temporalio import workflow +from resource_pool.resource_pool_workflow import ResourcePoolWorkflow from resource_pool.shared import ( AcquiredResource, AcquireRequest, @@ -17,15 +18,30 @@ class ResourcePoolClient: def __init__(self, pool_workflow_id: str) -> None: self.pool_workflow_id = pool_workflow_id self.acquired_resources: list[AcquiredResource] = [] + self.register_signal_handler() + + def register_signal_handler(self) -> None: + signal_name = f"assign_resource_{self.pool_workflow_id}" + if workflow.get_signal_handler(signal_name) is None: + workflow.set_signal_handler(signal_name, self.assign_resource) + else: + raise RuntimeError( + f"{signal_name} already registered - if you use multiple ResourcePoolClients within the " + f"same workflow, they must use different pool_workflow_ids" + ) async def send_acquire_signal(self) -> None: - handle = workflow.get_external_workflow_handle(self.pool_workflow_id) + handle = workflow.get_external_workflow_handle_for( + ResourcePoolWorkflow.run, self.pool_workflow_id + ) await handle.signal( "acquire_resource", AcquireRequest(workflow.info().workflow_id) ) async def send_release_signal(self, acquired_resource: AcquiredResource) -> None: - handle = workflow.get_external_workflow_handle(self.pool_workflow_id) + handle = workflow.get_external_workflow_handle_for( + ResourcePoolWorkflow.run, self.pool_workflow_id + ) await handle.signal( "release_resource", AcquireResponse( @@ -34,10 +50,6 @@ async def send_release_signal(self, acquired_resource: AcquiredResource) -> None ), ) - def lazy_register_signal_handler(self) -> None: - if workflow.get_signal_handler("assign_resource") is None: - workflow.set_signal_handler("assign_resource", self.assign_resource) - def assign_resource(self, response: AcquireResponse) -> None: self.acquired_resources.append( AcquiredResource( @@ -52,8 +64,7 @@ async def acquire_resource( reattach: Optional[DetachedResource] = None, max_wait_time: timedelta = timedelta(minutes=5), ) -> AsyncGenerator[AcquiredResource, None]: - warn_when_workflow_has_timeouts() - self.lazy_register_signal_handler() + _warn_when_workflow_has_timeouts() if reattach is None: await self.send_acquire_signal() @@ -80,7 +91,12 @@ async def acquire_resource( await self.send_release_signal(resource) -def warn_when_workflow_has_timeouts() -> None: +def _warn_when_workflow_has_timeouts() -> None: + def has_timeout(timeout: Optional[timedelta]) -> bool: + # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the + # continue_as_new call). + return timeout is not None and timeout > timedelta(0) + if has_timeout(workflow.info().run_timeout): workflow.logger.warning( f"ResourceLockingWorkflow cannot have a run_timeout (found {workflow.info().run_timeout}) - this will leak locks" @@ -89,9 +105,3 @@ def warn_when_workflow_has_timeouts() -> None: workflow.logger.warning( f"ResourceLockingWorkflow cannot have an execution_timeout (found {workflow.info().execution_timeout}) - this will leak locks" ) - - -def has_timeout(timeout: Optional[timedelta]) -> bool: - # After continue_as_new, timeouts are 0, even if they were None before continue_as_new (and were not set in the - # continue_as_new call). - return timeout is not None and timeout > timedelta(0) diff --git a/resource_pool/resource_pool_workflow.py b/resource_pool/resource_pool_workflow.py index 6b70ade7..d01622f4 100644 --- a/resource_pool/resource_pool_workflow.py +++ b/resource_pool/resource_pool_workflow.py @@ -38,7 +38,6 @@ async def add_resources(self, resources: list[str]) -> None: workflow.logger.warning( f"Ignoring attempt to add already-existing resource: {resource}" ) - continue else: self.resources[resource] = None @@ -51,7 +50,7 @@ async def acquire_resource(self, request: AcquireRequest) -> None: f"workflow_id={request.workflow_id} is waiting for a resource" ) - @workflow.signal() + @workflow.signal async def release_resource(self, acquire_response: AcquireResponse) -> None: release_key = acquire_response.release_key resource = self.release_key_to_resource.get(release_key) @@ -89,7 +88,7 @@ async def assign_resource( requester = workflow.get_external_workflow_handle(internal_request.workflow_id) await requester.signal( - "assign_resource", + f"assign_resource_{workflow.info().workflow_id}", AcquireResponse( release_key=internal_request.release_signal, resource=resource ), diff --git a/resource_pool/starter.py b/resource_pool/starter.py index b215bf4c..b6d024d5 100644 --- a/resource_pool/starter.py +++ b/resource_pool/starter.py @@ -27,7 +27,7 @@ async def main() -> None: waiters=[], ), id=RESOURCE_POOL_WORKFLOW_ID, - task_queue="default", + task_queue="resource_pool-task-queue", id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, ) @@ -48,7 +48,7 @@ async def main() -> None: workflow=ResourceUserWorkflow.run, arg=input, id=f"resource-user-workflow-{i}", - task_queue="default", + task_queue="resource_pool-task-queue", ) resource_user_handles.append(handle) diff --git a/resource_pool/worker.py b/resource_pool/worker.py index c3d2a673..d9ada58c 100644 --- a/resource_pool/worker.py +++ b/resource_pool/worker.py @@ -17,7 +17,7 @@ async def main() -> None: # Run a worker for the workflow worker = Worker( client, - task_queue="default", + task_queue="resource_pool-task-queue", workflows=[ResourcePoolWorkflow, ResourceUserWorkflow], activities=[ use_resource, diff --git a/tests/resource_locking/__init__.py b/tests/resource_pool/__init__.py similarity index 100% rename from tests/resource_locking/__init__.py rename to tests/resource_pool/__init__.py diff --git a/tests/resource_locking/workflow_test.py b/tests/resource_pool/workflow_test.py similarity index 96% rename from tests/resource_locking/workflow_test.py rename to tests/resource_pool/workflow_test.py index 9ec798ab..6155d531 100644 --- a/tests/resource_locking/workflow_test.py +++ b/tests/resource_pool/workflow_test.py @@ -18,10 +18,10 @@ ) from resource_pool.shared import RESOURCE_POOL_WORKFLOW_ID -TASK_QUEUE = "default" +TASK_QUEUE = "resource_pool-task-queue" -async def test_resource_locking_workflow(client: Client): +async def test_resource_pool_workflow(client: Client): # key is resource, value is a description of resource usage resource_usage: defaultdict[str, list[Sequence[str]]] = defaultdict(list) @@ -78,7 +78,7 @@ async def run_all_workflows(client: Client): waiters=[], ), id=RESOURCE_POOL_WORKFLOW_ID, - task_queue="default", + task_queue=TASK_QUEUE, id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING, ) From bbe33837a8441420917b8a026b1a530e69469c15 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 15 Apr 2025 16:08:50 -0700 Subject: [PATCH 20/23] Move the resource pool client --- resource_pool/pool_client/__init__.py | 1 + resource_pool/{ => pool_client}/resource_pool_client.py | 0 resource_pool/resource_user_workflow.py | 2 +- 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 resource_pool/pool_client/__init__.py rename resource_pool/{ => pool_client}/resource_pool_client.py (100%) diff --git a/resource_pool/pool_client/__init__.py b/resource_pool/pool_client/__init__.py new file mode 100644 index 00000000..4d285e4f --- /dev/null +++ b/resource_pool/pool_client/__init__.py @@ -0,0 +1 @@ +from .resource_pool_client import ResourcePoolClient diff --git a/resource_pool/resource_pool_client.py b/resource_pool/pool_client/resource_pool_client.py similarity index 100% rename from resource_pool/resource_pool_client.py rename to resource_pool/pool_client/resource_pool_client.py diff --git a/resource_pool/resource_user_workflow.py b/resource_pool/resource_user_workflow.py index 0246db13..80ee7fba 100644 --- a/resource_pool/resource_user_workflow.py +++ b/resource_pool/resource_user_workflow.py @@ -5,7 +5,7 @@ from temporalio import activity, workflow -from resource_pool.resource_pool_client import ResourcePoolClient +from resource_pool.pool_client import ResourcePoolClient from resource_pool.shared import DetachedResource From 104b43f4198f6a613aaeb43a42ec878176ded4ae Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 15 Apr 2025 16:44:41 -0700 Subject: [PATCH 21/23] Handle assignment signal failures --- resource_pool/resource_pool_workflow.py | 27 ++++++++++++++++--------- tests/resource_pool/workflow_test.py | 7 +++++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/resource_pool/resource_pool_workflow.py b/resource_pool/resource_pool_workflow.py index d01622f4..2321f82d 100644 --- a/resource_pool/resource_pool_workflow.py +++ b/resource_pool/resource_pool_workflow.py @@ -2,6 +2,7 @@ from typing import Optional from temporalio import workflow +from temporalio.exceptions import ApplicationError from resource_pool.shared import AcquireRequest, AcquireResponse @@ -79,20 +80,28 @@ def get_current_holders(self) -> dict[str, Optional[InternalAcquireRequest]]: async def assign_resource( self, resource: str, internal_request: InternalAcquireRequest ) -> None: - self.resources[resource] = internal_request workflow.logger.info( f"workflow_id={internal_request.workflow_id} acquired resource {resource}" ) - internal_request.release_signal = str(workflow.uuid4()) - self.release_key_to_resource[internal_request.release_signal] = resource requester = workflow.get_external_workflow_handle(internal_request.workflow_id) - await requester.signal( - f"assign_resource_{workflow.info().workflow_id}", - AcquireResponse( - release_key=internal_request.release_signal, resource=resource - ), - ) + try: + release_signal = str(workflow.uuid4()) + await requester.signal( + f"assign_resource_{workflow.info().workflow_id}", + AcquireResponse(release_key=release_signal, resource=resource), + ) + + internal_request.release_signal = release_signal + self.resources[resource] = internal_request + self.release_key_to_resource[release_signal] = resource + except ApplicationError as e: + if e.type == "ExternalWorkflowExecutionNotFound": + workflow.logger.info( + f"Could not assign resource {resource} to {internal_request.workflow_id}: {e.message}" + ) + else: + raise e async def assign_next_resource(self) -> bool: if len(self.waiters) == 0: diff --git a/tests/resource_pool/workflow_test.py b/tests/resource_pool/workflow_test.py index 6155d531..3e3c1c0b 100644 --- a/tests/resource_pool/workflow_test.py +++ b/tests/resource_pool/workflow_test.py @@ -69,6 +69,13 @@ async def use_resource_mock(input: UseResourceActivityInput) -> None: ), f"{workflow_id} ended on {resource} held by {holder}" holder = None + # Are all the resources free, per the query? + pool_handle = client.get_workflow_handle_for( + ResourcePoolWorkflow.run, RESOURCE_POOL_WORKFLOW_ID + ) + query_result = await pool_handle.query(ResourcePoolWorkflow.get_current_holders) + assert query_result == {"r_a": None, "r_b": None, "r_c": None} + async def run_all_workflows(client: Client): resource_pool_handle = await client.start_workflow( From 7ac42cc54e143ddaaa51d5c6ebcdfc821e5567e1 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Tue, 15 Apr 2025 16:51:35 -0700 Subject: [PATCH 22/23] format/lint --- resource_pool/pool_client/resource_pool_client.py | 12 ++++-------- tests/resource_pool/workflow_test.py | 6 ++++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/resource_pool/pool_client/resource_pool_client.py b/resource_pool/pool_client/resource_pool_client.py index 52318280..ac39ee74 100644 --- a/resource_pool/pool_client/resource_pool_client.py +++ b/resource_pool/pool_client/resource_pool_client.py @@ -31,18 +31,14 @@ def register_signal_handler(self) -> None: ) async def send_acquire_signal(self) -> None: - handle = workflow.get_external_workflow_handle_for( + await workflow.get_external_workflow_handle_for( ResourcePoolWorkflow.run, self.pool_workflow_id - ) - await handle.signal( - "acquire_resource", AcquireRequest(workflow.info().workflow_id) - ) + ).signal("acquire_resource", AcquireRequest(workflow.info().workflow_id)) async def send_release_signal(self, acquired_resource: AcquiredResource) -> None: - handle = workflow.get_external_workflow_handle_for( + await workflow.get_external_workflow_handle_for( ResourcePoolWorkflow.run, self.pool_workflow_id - ) - await handle.signal( + ).signal( "release_resource", AcquireResponse( resource=acquired_resource.resource, diff --git a/tests/resource_pool/workflow_test.py b/tests/resource_pool/workflow_test.py index 3e3c1c0b..99b0fe76 100644 --- a/tests/resource_pool/workflow_test.py +++ b/tests/resource_pool/workflow_test.py @@ -70,10 +70,12 @@ async def use_resource_mock(input: UseResourceActivityInput) -> None: holder = None # Are all the resources free, per the query? - pool_handle = client.get_workflow_handle_for( + handle: WorkflowHandle[ + ResourcePoolWorkflow, None + ] = client.get_workflow_handle_for( ResourcePoolWorkflow.run, RESOURCE_POOL_WORKFLOW_ID ) - query_result = await pool_handle.query(ResourcePoolWorkflow.get_current_holders) + query_result = await handle.query(ResourcePoolWorkflow.get_current_holders) assert query_result == {"r_a": None, "r_b": None, "r_c": None} From 883f159a21431ebbcb16de4781970f55a53f9686 Mon Sep 17 00:00:00 2001 From: Nathan Glass Date: Wed, 16 Apr 2025 07:51:26 -0700 Subject: [PATCH 23/23] PR feedback (move resource_pool_workflow, misc factoring) --- resource_pool/pool_client/__init__.py | 1 + .../pool_client/resource_pool_client.py | 28 +++++++++---------- .../resource_pool_workflow.py | 0 resource_pool/starter.py | 2 +- resource_pool/worker.py | 2 +- tests/resource_pool/workflow_test.py | 2 +- 6 files changed, 17 insertions(+), 18 deletions(-) rename resource_pool/{ => pool_client}/resource_pool_workflow.py (100%) diff --git a/resource_pool/pool_client/__init__.py b/resource_pool/pool_client/__init__.py index 4d285e4f..b8471d8a 100644 --- a/resource_pool/pool_client/__init__.py +++ b/resource_pool/pool_client/__init__.py @@ -1 +1,2 @@ from .resource_pool_client import ResourcePoolClient +from .resource_pool_workflow import ResourcePoolWorkflow diff --git a/resource_pool/pool_client/resource_pool_client.py b/resource_pool/pool_client/resource_pool_client.py index ac39ee74..b7183afa 100644 --- a/resource_pool/pool_client/resource_pool_client.py +++ b/resource_pool/pool_client/resource_pool_client.py @@ -4,7 +4,7 @@ from temporalio import workflow -from resource_pool.resource_pool_workflow import ResourcePoolWorkflow +from resource_pool.pool_client.resource_pool_workflow import ResourcePoolWorkflow from resource_pool.shared import ( AcquiredResource, AcquireRequest, @@ -18,24 +18,29 @@ class ResourcePoolClient: def __init__(self, pool_workflow_id: str) -> None: self.pool_workflow_id = pool_workflow_id self.acquired_resources: list[AcquiredResource] = [] - self.register_signal_handler() - def register_signal_handler(self) -> None: signal_name = f"assign_resource_{self.pool_workflow_id}" if workflow.get_signal_handler(signal_name) is None: - workflow.set_signal_handler(signal_name, self.assign_resource) + workflow.set_signal_handler(signal_name, self._handle_acquire_response) else: raise RuntimeError( f"{signal_name} already registered - if you use multiple ResourcePoolClients within the " f"same workflow, they must use different pool_workflow_ids" ) - async def send_acquire_signal(self) -> None: + def _handle_acquire_response(self, response: AcquireResponse) -> None: + self.acquired_resources.append( + AcquiredResource( + resource=response.resource, release_key=response.release_key + ) + ) + + async def _send_acquire_signal(self) -> None: await workflow.get_external_workflow_handle_for( ResourcePoolWorkflow.run, self.pool_workflow_id ).signal("acquire_resource", AcquireRequest(workflow.info().workflow_id)) - async def send_release_signal(self, acquired_resource: AcquiredResource) -> None: + async def _send_release_signal(self, acquired_resource: AcquiredResource) -> None: await workflow.get_external_workflow_handle_for( ResourcePoolWorkflow.run, self.pool_workflow_id ).signal( @@ -46,13 +51,6 @@ async def send_release_signal(self, acquired_resource: AcquiredResource) -> None ), ) - def assign_resource(self, response: AcquireResponse) -> None: - self.acquired_resources.append( - AcquiredResource( - resource=response.resource, release_key=response.release_key - ) - ) - @asynccontextmanager async def acquire_resource( self, @@ -63,7 +61,7 @@ async def acquire_resource( _warn_when_workflow_has_timeouts() if reattach is None: - await self.send_acquire_signal() + await self._send_acquire_signal() await workflow.wait_condition( lambda: len(self.acquired_resources) > 0, timeout=max_wait_time ) @@ -84,7 +82,7 @@ async def acquire_resource( yield resource finally: if not resource.detached: - await self.send_release_signal(resource) + await self._send_release_signal(resource) def _warn_when_workflow_has_timeouts() -> None: diff --git a/resource_pool/resource_pool_workflow.py b/resource_pool/pool_client/resource_pool_workflow.py similarity index 100% rename from resource_pool/resource_pool_workflow.py rename to resource_pool/pool_client/resource_pool_workflow.py diff --git a/resource_pool/starter.py b/resource_pool/starter.py index b6d024d5..2ae1ab44 100644 --- a/resource_pool/starter.py +++ b/resource_pool/starter.py @@ -4,7 +4,7 @@ from temporalio.client import Client, WorkflowFailureError, WorkflowHandle from temporalio.common import WorkflowIDConflictPolicy -from resource_pool.resource_pool_workflow import ( +from resource_pool.pool_client.resource_pool_workflow import ( ResourcePoolWorkflow, ResourcePoolWorkflowInput, ) diff --git a/resource_pool/worker.py b/resource_pool/worker.py index d9ada58c..cb3a06dd 100644 --- a/resource_pool/worker.py +++ b/resource_pool/worker.py @@ -4,7 +4,7 @@ from temporalio.client import Client from temporalio.worker import Worker -from resource_pool.resource_pool_workflow import ResourcePoolWorkflow +from resource_pool.pool_client.resource_pool_workflow import ResourcePoolWorkflow from resource_pool.resource_user_workflow import ResourceUserWorkflow, use_resource diff --git a/tests/resource_pool/workflow_test.py b/tests/resource_pool/workflow_test.py index 99b0fe76..42a6fbde 100644 --- a/tests/resource_pool/workflow_test.py +++ b/tests/resource_pool/workflow_test.py @@ -7,7 +7,7 @@ from temporalio.common import WorkflowIDConflictPolicy from temporalio.worker import Worker -from resource_pool.resource_pool_workflow import ( +from resource_pool.pool_client.resource_pool_workflow import ( ResourcePoolWorkflow, ResourcePoolWorkflowInput, )