From 47f1da9fd7400f2b8d12b057fed7c402db5b23e4 Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Tue, 16 Sep 2025 15:36:20 +0200 Subject: [PATCH 1/4] WIP ALEPH-615 Expose resources for executions --- src/aleph/vm/orchestrator/supervisor.py | 2 ++ src/aleph/vm/orchestrator/views/__init__.py | 31 +++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/aleph/vm/orchestrator/supervisor.py b/src/aleph/vm/orchestrator/supervisor.py index b8f3061f3..d09c1e864 100644 --- a/src/aleph/vm/orchestrator/supervisor.py +++ b/src/aleph/vm/orchestrator/supervisor.py @@ -47,6 +47,7 @@ status_check_version, status_public_config, update_allocations, + list_executions_resources, ) from .views.operator import ( operate_confidential_initialize, @@ -128,6 +129,7 @@ def setup_webapp(pool: VmPool | None): web.get("/about/login", about_login), web.get("/about/executions/list", list_executions), web.get("/v2/about/executions/list", list_executions_v2), + web.get("/about/executions/resources", list_executions_resources), web.get("/about/executions/details", about_executions), web.get("/about/executions/records", about_execution_records), web.get("/about/usage/system", about_system_usage), diff --git a/src/aleph/vm/orchestrator/views/__init__.py b/src/aleph/vm/orchestrator/views/__init__.py index feafab4d1..e95fc79f8 100644 --- a/src/aleph/vm/orchestrator/views/__init__.py +++ b/src/aleph/vm/orchestrator/views/__init__.py @@ -229,6 +229,37 @@ async def list_executions_v2(request: web.Request) -> web.Response: ) + +@cors_allow_all +async def list_executions_resources(request: web.Request) -> web.Response: + """List all executions with detail on their resource usage""" + pool: VmPool = request.app["vm_pool"] + + return web.json_response( + { + item_hash: { + "networking": ( + { + "ipv4_network": execution.vm.tap_interface.ip_network, + "host_ipv4": pool.network.host_ipv4, + "ipv6_network": execution.vm.tap_interface.ipv6_network, + "ipv6_ip": execution.vm.tap_interface.guest_ipv6.ip, + "ipv4_ip": execution.vm.tap_interface.guest_ip.ip, + "mapped_ports": execution.mapped_ports, + } + if execution.vm and execution.vm.tap_interface + else {} + ), + "status": execution.times, + "message": execution.message, + "resources": execution.resources, + } + for item_hash, execution in pool.executions.items() + }, + dumps=dumps_for_json, + ) + + @cors_allow_all async def about_config(request: web.Request) -> web.Response: authenticate_request(request) From 4ff09475b2162c3bbaed5d19b30c83dcc813bcd7 Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Thu, 18 Sep 2025 15:33:31 +0200 Subject: [PATCH 2/4] fix formating --- src/aleph/vm/orchestrator/supervisor.py | 2 +- src/aleph/vm/orchestrator/views/__init__.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/aleph/vm/orchestrator/supervisor.py b/src/aleph/vm/orchestrator/supervisor.py index d09c1e864..646f38197 100644 --- a/src/aleph/vm/orchestrator/supervisor.py +++ b/src/aleph/vm/orchestrator/supervisor.py @@ -34,6 +34,7 @@ about_login, debug_haproxy, list_executions, + list_executions_resources, list_executions_v2, notify_allocation, operate_reserve_resources, @@ -47,7 +48,6 @@ status_check_version, status_public_config, update_allocations, - list_executions_resources, ) from .views.operator import ( operate_confidential_initialize, diff --git a/src/aleph/vm/orchestrator/views/__init__.py b/src/aleph/vm/orchestrator/views/__init__.py index e95fc79f8..cef4a37d8 100644 --- a/src/aleph/vm/orchestrator/views/__init__.py +++ b/src/aleph/vm/orchestrator/views/__init__.py @@ -229,7 +229,6 @@ async def list_executions_v2(request: web.Request) -> web.Response: ) - @cors_allow_all async def list_executions_resources(request: web.Request) -> web.Response: """List all executions with detail on their resource usage""" From a9e0f2ce404b67dec8e3faf86f335c05636f9930 Mon Sep 17 00:00:00 2001 From: "Andres D. Molins" Date: Tue, 21 Oct 2025 12:52:20 +0200 Subject: [PATCH 3/4] Simplify only adding one `resources` field instead to create another endpoint. --- src/aleph/vm/orchestrator/supervisor.py | 2 -- src/aleph/vm/orchestrator/utils.py | 18 +++++++++++ src/aleph/vm/orchestrator/views/__init__.py | 36 ++++----------------- 3 files changed, 24 insertions(+), 32 deletions(-) diff --git a/src/aleph/vm/orchestrator/supervisor.py b/src/aleph/vm/orchestrator/supervisor.py index 646f38197..b8f3061f3 100644 --- a/src/aleph/vm/orchestrator/supervisor.py +++ b/src/aleph/vm/orchestrator/supervisor.py @@ -34,7 +34,6 @@ about_login, debug_haproxy, list_executions, - list_executions_resources, list_executions_v2, notify_allocation, operate_reserve_resources, @@ -129,7 +128,6 @@ def setup_webapp(pool: VmPool | None): web.get("/about/login", about_login), web.get("/about/executions/list", list_executions), web.get("/v2/about/executions/list", list_executions_v2), - web.get("/about/executions/resources", list_executions_resources), web.get("/about/executions/details", about_executions), web.get("/about/executions/records", about_execution_records), web.get("/about/usage/system", about_system_usage), diff --git a/src/aleph/vm/orchestrator/utils.py b/src/aleph/vm/orchestrator/utils.py index 3a9f7bb0d..638778620 100644 --- a/src/aleph/vm/orchestrator/utils.py +++ b/src/aleph/vm/orchestrator/utils.py @@ -4,6 +4,7 @@ from typing import Any, TypedDict import aiohttp +from aleph_message.models import InstanceContent, ProgramContent from aleph.vm.conf import settings @@ -106,3 +107,20 @@ def get_compatible_gpus() -> list[Any]: if not LAST_AGGREGATE_SETTINGS: return [] return LAST_AGGREGATE_SETTINGS["compatible_gpus"] + + +def get_execution_disk_size(message: InstanceContent | ProgramContent) -> Decimal: + disk_size_mib = 0 + + # For Programs the disk size depends on the runtime + # TODO: Find the real size of the runtime and for the code volumes + if isinstance(message, InstanceContent): + disk_size_mib = message.rootfs.size_mib + + # For volumes, only the persistent and ephemeral volumes have a size field + # TODO: Find the real size of Inmutable volumes + for volume in message.volumes: + if getattr(volume, "size_mib", None): + disk_size_mib += volume.size_mib + + return Decimal(disk_size_mib) diff --git a/src/aleph/vm/orchestrator/views/__init__.py b/src/aleph/vm/orchestrator/views/__init__.py index cef4a37d8..219ebce45 100644 --- a/src/aleph/vm/orchestrator/views/__init__.py +++ b/src/aleph/vm/orchestrator/views/__init__.py @@ -46,6 +46,7 @@ from aleph.vm.orchestrator.utils import ( format_cost, get_community_wallet_address, + get_execution_disk_size, is_after_community_wallet_start, update_aggregate_settings, ) @@ -220,6 +221,11 @@ async def list_executions_v2(request: web.Request) -> web.Response: if execution.vm and execution.vm.tap_interface else {} ), + "resources": { + "vcpus": execution.message.resources.vcpus, + "memory": execution.message.resources.memory, + "disk_mib": get_execution_disk_size(execution.message), + }, "status": execution.times, "running": execution.is_running, } @@ -229,36 +235,6 @@ async def list_executions_v2(request: web.Request) -> web.Response: ) -@cors_allow_all -async def list_executions_resources(request: web.Request) -> web.Response: - """List all executions with detail on their resource usage""" - pool: VmPool = request.app["vm_pool"] - - return web.json_response( - { - item_hash: { - "networking": ( - { - "ipv4_network": execution.vm.tap_interface.ip_network, - "host_ipv4": pool.network.host_ipv4, - "ipv6_network": execution.vm.tap_interface.ipv6_network, - "ipv6_ip": execution.vm.tap_interface.guest_ipv6.ip, - "ipv4_ip": execution.vm.tap_interface.guest_ip.ip, - "mapped_ports": execution.mapped_ports, - } - if execution.vm and execution.vm.tap_interface - else {} - ), - "status": execution.times, - "message": execution.message, - "resources": execution.resources, - } - for item_hash, execution in pool.executions.items() - }, - dumps=dumps_for_json, - ) - - @cors_allow_all async def about_config(request: web.Request) -> web.Response: authenticate_request(request) From 23087dff4714dad3132c337ee9f0084ef5f127fa Mon Sep 17 00:00:00 2001 From: "Andres D. Molins" Date: Tue, 21 Oct 2025 13:32:58 +0200 Subject: [PATCH 4/4] Fix: Solved not passing tests and returned value as integer instead of decimal. --- src/aleph/vm/orchestrator/utils.py | 4 ++-- tests/supervisor/test_views.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/aleph/vm/orchestrator/utils.py b/src/aleph/vm/orchestrator/utils.py index 638778620..16419ff33 100644 --- a/src/aleph/vm/orchestrator/utils.py +++ b/src/aleph/vm/orchestrator/utils.py @@ -109,7 +109,7 @@ def get_compatible_gpus() -> list[Any]: return LAST_AGGREGATE_SETTINGS["compatible_gpus"] -def get_execution_disk_size(message: InstanceContent | ProgramContent) -> Decimal: +def get_execution_disk_size(message: InstanceContent | ProgramContent) -> int: disk_size_mib = 0 # For Programs the disk size depends on the runtime @@ -123,4 +123,4 @@ def get_execution_disk_size(message: InstanceContent | ProgramContent) -> Decima if getattr(volume, "size_mib", None): disk_size_mib += volume.size_mib - return Decimal(disk_size_mib) + return disk_size_mib diff --git a/tests/supervisor/test_views.py b/tests/supervisor/test_views.py index b2b63c5bb..39747815c 100644 --- a/tests/supervisor/test_views.py +++ b/tests/supervisor/test_views.py @@ -389,6 +389,11 @@ async def test_v2_executions_list_one_vm(aiohttp_client, mock_app_with_pool, moc assert await response.json() == { "decadecadecadecadecadecadecadecadecadecadecadecadecadecadecadeca": { "networking": {}, + "resources": { + "vcpus": 1, + "memory": 256, + "disk_mib": 1000, + }, "status": { "defined_at": str(execution.times.defined_at), "preparing_at": None, @@ -464,6 +469,11 @@ async def test_v2_executions_list_vm_network(aiohttp_client, mocker, mock_app_wi "ipv6_ip": "fc00:1:2:3:3:deca:deca:dec1", "mapped_ports": {}, }, + "resources": { + "vcpus": 1, + "memory": 256, + "disk_mib": 1000, + }, "status": { "defined_at": str(execution.times.defined_at), "preparing_at": None,