From e1b2aba7bb2c82c93ded492bfdc2b1f532bdbf14 Mon Sep 17 00:00:00 2001 From: Sahil Date: Tue, 6 Jan 2026 00:19:43 +0530 Subject: [PATCH 1/9] CustomBuild: add build timeout and TIMED_OUT state Fixes #201. Signed-off-by: Sahil --- build_manager/manager.py | 56 +++++++- build_manager/progress_updater.py | 83 ++++++++++- builder/builder.py | 227 +++++++++++++++++++++++------- common/__init__.py | 1 + common/config.py | 7 + web/app.py | 59 +++++++- 6 files changed, 371 insertions(+), 62 deletions(-) create mode 100644 common/__init__.py create mode 100644 common/config.py diff --git a/build_manager/manager.py b/build_manager/manager.py index 4143f64..e5583af 100644 --- a/build_manager/manager.py +++ b/build_manager/manager.py @@ -15,6 +15,7 @@ class BuildState(Enum): SUCCESS = 2 FAILURE = 3 ERROR = 4 + TIMED_OUT = 5 class BuildProgress: @@ -71,6 +72,8 @@ def __init__(self, percent=0 ) self.time_created = time.time() + self.time_started_running = None # when build state becomes RUNNING + self.error_message = None def to_dict(self) -> dict: return { @@ -81,6 +84,8 @@ def to_dict(self) -> dict: 'selected_features': list(self.selected_features), 'progress': self.progress.to_dict(), 'time_created': self.time_created, + 'time_started_running': getattr(self, 'time_started_running', None), + 'error_message': getattr(self, 'error_message', None), } @@ -378,7 +383,7 @@ def update_build_progress_state(self, build_id: str, new_state: BuildState) -> None: """ - Update the build's state (e.g., PENDING, RUNNING, SUCCESS, FAILURE). + Update the build's state (e.g., PENDING, RUNNING, SUCCESS, FAILURE, TIMED_OUT). Parameters: build_id (str): The ID of the build to update. @@ -389,6 +394,15 @@ def update_build_progress_state(self, if build_info is None: raise ValueError(f"Build with id {build_id} not found.") + old_state = build_info.progress.state + + if old_state != BuildState.RUNNING and new_state == BuildState.RUNNING: + build_info.time_started_running = time.time() + self.logger.info( + f"Build {build_id} transitioned to RUNNING state at " + f"{build_info.time_started_running}" + ) + build_info.progress.state = new_state self.__update_build_info( build_id=build_id, @@ -459,6 +473,46 @@ def get_build_archive_path(self, build_id: str) -> str: self.get_build_artifacts_dir_path(build_id), f"{build_id}.tar.gz" ) + + def mark_build_timed_out(self, build_id: str, error_message: str = None) -> None: + """ + Mark a build as timed out and update its state. + + Parameters: + build_id (str): The ID of the build that timed out. + error_message (str): Optional error message to include. + """ + self.logger.warning(f"Marking build {build_id} as TIMED_OUT") + + try: + build_info = self.get_build_info(build_id=build_id) + if build_info is None: + self.logger.error(f"Cannot mark build {build_id} as timed out - build not found") + return + + # Don't override terminal states (SUCCESS/FAILURE already set) + if build_info.progress.state in [BuildState.SUCCESS, BuildState.FAILURE]: + self.logger.warning( + f"Build {build_id} already in terminal state " + f"{build_info.progress.state.name}, not marking as TIMED_OUT" + ) + return + # Update the build state to TIMED_OUT + # Keeping last known progress instead of resetting to 0 + build_info.progress.state = BuildState.TIMED_OUT + + # Store error message if provided + if error_message: + build_info.error_message = error_message + self.logger.warning(f"Build {build_id} timeout error: {error_message}") + + # Update the build info in Redis + self.__update_build_info(build_id=build_id, build_info=build_info) + + self.logger.info(f"Successfully marked build {build_id} as TIMED_OUT") + + except Exception as e: + self.logger.error(f"Failed to mark build {build_id} as timed out: {e}") @staticmethod def get_singleton() -> "BuildManager": diff --git a/build_manager/progress_updater.py b/build_manager/progress_updater.py index c6ddd35..6408889 100644 --- a/build_manager/progress_updater.py +++ b/build_manager/progress_updater.py @@ -6,7 +6,9 @@ BuildManager as bm, BuildState ) - +import time +# Timeout constant - 15 minutes +from common.config import BUILD_TIMEOUT_SECONDS class BuildProgressUpdater: """ @@ -213,6 +215,9 @@ def __update_build_percent(self, build_id: str) -> None: elif current_state == BuildState.ERROR: # Keep existing percentage pass + elif current_state == BuildState.TIMED_OUT: + # Keep existing percentage for timed out builds + pass else: raise Exception("Unhandled BuildState.") @@ -226,6 +231,73 @@ def __update_build_percent(self, build_id: str) -> None: percent=new_percent ) + def __check_build_timeout(self, build_id: str) -> bool: + """ + Check if a running build has exceeded the timeout threshold. + + This provides a backup timeout mechanism in addition to the per-subprocess + timeouts in builder.py. It measures total build duration from when the + build entered RUNNING state, not from submission/addition time. + + Parameters: + build_id (str): The unique ID of the build to check. + + Returns: + bool: True if the build has timed out, False otherwise. + """ + build_info = bm.get_singleton().get_build_info(build_id=build_id) + if build_info is None: + self.logger.error(f"No build found with ID {build_id}") + return False + + # Only check timeout for RUNNING builds + if build_info.progress.state != BuildState.RUNNING: + return False + + # Check if we have a start time for RUNNING state + time_started_running: float | None = getattr(build_info, 'time_started_running', None) + if time_started_running is None: + # Build is RUNNING but we don't have a start time yet + # This can happen briefly during state transition + self.logger.debug( + f"Build {build_id} is RUNNING but time_started_running is None" + ) + return False + + # Calculate elapsed time since build started running + current_time = time.time() + elapsed_time = current_time - time_started_running + + self.logger.debug( + f"Build {build_id}: elapsed time = {elapsed_time:.0f}s, " + f"timeout threshold = {BUILD_TIMEOUT_SECONDS}s" + ) + + # Check if build has exceeded timeout + if elapsed_time > BUILD_TIMEOUT_SECONDS: + self.logger.warning( + f"Build {build_id} has timed out after {elapsed_time:.0f} seconds " + f"(threshold: {BUILD_TIMEOUT_SECONDS}s)" + ) + + # Mark the build as timed out + # Note: The builder worker may still be running a subprocess. + # The worker will detect the TIMED_OUT state when it checks + # between build steps and will terminate gracefully. + error_message = ( + f"Build exceeded {BUILD_TIMEOUT_SECONDS // 60} minute timeout. " + f"Build ran for: {elapsed_time / 60:.1f} minutes." + ) + + bm.get_singleton().mark_build_timed_out( + build_id=build_id, + error_message=error_message + ) + + return True + + return False + def __update_build_state(self, build_id: str) -> None: """ Update the state of a given build. @@ -235,6 +307,12 @@ def __update_build_state(self, build_id: str) -> None: if build_info is None: raise ValueError(f"No build found with ID {build_id}") + # Check for timeout first (for RUNNING builds) + if self.__check_build_timeout(build_id): + # Build has timed out, no need to check other state transitions + self.logger.info(f"Build {build_id} marked as TIMED_OUT") + return + current_state = build_info.progress.state new_state = current_state self.logger.debug( @@ -259,6 +337,9 @@ def __update_build_state(self, build_id: str) -> None: elif current_state == BuildState.ERROR: # ERROR is a conclusive state pass + elif current_state == BuildState.TIMED_OUT: + # TIMED_OUT is a conclusive state + pass else: raise Exception("Unhandled BuildState.") diff --git a/builder/builder.py b/builder/builder.py index 5aaa0aa..a89d949 100644 --- a/builder/builder.py +++ b/builder/builder.py @@ -1,6 +1,7 @@ import ap_git from build_manager import ( BuildManager as bm, + BuildState, ) import subprocess import os @@ -14,6 +15,8 @@ ) from pathlib import Path +# Build timeout constant - 15 minutes +from common.config import BUILD_TIMEOUT_SECONDS class Builder: """ @@ -60,6 +63,12 @@ def __log_build_info(self, build_id: str) -> None: build_id (str): Unique identifier for the build. """ build_info = bm.get_singleton().get_build_info(build_id) + + # Check if build info exists + if build_info is None: + self.logger.error(f"Build info not found for build_id: {build_id}") + raise RuntimeError(f"Build info not found for build_id: {build_id}") + logpath = bm.get_singleton().get_build_log_path(build_id) with open(logpath, "a") as build_log: build_log.write(f"Vehicle ID: {build_info.vehicle_id}\n" @@ -268,6 +277,12 @@ def __generate_archive(self, build_id: str) -> None: self.logger.info(f"Generated {archive_path}.") def __clean_up_build_workdir(self, build_id: str) -> None: + """ + Removes the temporary build working directory. + + Parameters: + build_id (str): Unique identifier for the build. + """ shutil.rmtree(self.__get_path_to_build_dir(build_id)) def __process_build(self, build_id: str) -> None: @@ -278,14 +293,32 @@ def __process_build(self, build_id: str) -> None: Parameters: build_id (str): Unique identifier for the build. """ - self.__create_build_workdir(build_id) - self.__create_build_artifacts_dir(build_id) - self.__log_build_info(build_id) - self.__provision_build_source(build_id) - self.__generate_extrahwdef(build_id) - self.__build(build_id) - self.__generate_archive(build_id) - self.__clean_up_build_workdir(build_id) + try: + self.__create_build_workdir(build_id) + self.__create_build_artifacts_dir(build_id) + self.__log_build_info(build_id) + self.__provision_build_source(build_id) + self.__generate_extrahwdef(build_id) + self.__build(build_id) + + # Only generate archive if NOT timed out + build_info = bm.get_singleton().get_build_info(build_id) + if build_info and build_info.progress.state != BuildState.TIMED_OUT: + self.__generate_archive(build_id) + else: + self.logger.info(f"Skipping archive for timed out build {build_id}") + + self.__clean_up_build_workdir(build_id) + + except RuntimeError as e: + # Handle timeout or other build errors + self.logger.error(f"Build {build_id} failed: {e}") + # Clean up even if build failed + try: + self.__clean_up_build_workdir(build_id) + except Exception as cleanup_error: + self.logger.error(f"Failed to cleanup build {build_id}: {cleanup_error}") + # Don't re-raise, let the worker continue with next build def __get_path_to_build_dir(self, build_id: str) -> str: """ @@ -329,7 +362,34 @@ def __get_path_to_build_src(self, build_id: str) -> str: self.__get_path_to_build_dir(build_id), "build_src" ) + def __check_if_timed_out(self, build_id: str) -> bool: + """ + Check if this build has been marked as TIMED_OUT by the progress updater. + This is checked between build steps to allow early termination if the + progress updater has marked the build as timed out. + + Parameters: + build_id (str): The build ID to check + + Returns: + bool: True if build is marked as TIMED_OUT + """ + build_info = bm.get_singleton().get_build_info(build_id) + if build_info is None: + self.logger.error(f"Build info not found for {build_id}") + return False + + # Check if state is TIMED_OUT + if build_info.progress.state == BuildState.TIMED_OUT: + self.logger.warning( + f"Build {build_id} has been marked as TIMED_OUT. " + "Stopping build process." + ) + return True + + return False + def __build(self, build_id: str) -> None: """ Executes the actual build process for a build. @@ -378,51 +438,112 @@ def __build(self, build_id: str) -> None: build_log.flush() # Run the build steps - self.logger.info("Running waf configure") - build_log.write("Running waf configure\n") - build_log.flush() - subprocess.run( - [ - "python3", - "./waf", - "configure", - "--board", - build_info.board, - "--out", - self.__get_path_to_build_dir(build_id), - "--extra-hwdef", - self.__get_path_to_extra_hwdef(build_id), - ], - cwd=self.__get_path_to_build_src(build_id), - stdout=build_log, - stderr=build_log, - shell=False, - ) - - self.logger.info("Running clean") - build_log.write("Running clean\n") - build_log.flush() - subprocess.run( - ["python3", "./waf", "clean"], - cwd=self.__get_path_to_build_src(build_id), - stdout=build_log, - stderr=build_log, - shell=False, - ) - - self.logger.info("Running build") - build_log.write("Running build\n") - build_log.flush() - build_command = vehicle.waf_build_command - subprocess.run( - ["python3", "./waf", build_command], - cwd=self.__get_path_to_build_src(build_id), - stdout=build_log, - stderr=build_log, - shell=False, - ) - build_log.write("done build\n") - build_log.flush() + try: + # Check timeout before configure + if self.__check_if_timed_out(build_id): + raise RuntimeError("Build marked as TIMED_OUT before configure") + + self.logger.info("Running waf configure") + build_log.write("Running waf configure\n") + build_log.flush() + subprocess.run( + [ + "python3", + "./waf", + "configure", + "--board", + build_info.board, + "--out", + self.__get_path_to_build_dir(build_id), + "--extra-hwdef", + self.__get_path_to_extra_hwdef(build_id), + ], + cwd=self.__get_path_to_build_src(build_id), + stdout=build_log, + stderr=build_log, + shell=False, + timeout=BUILD_TIMEOUT_SECONDS, + check=True, + ) + + # Check timeout after configure + if self.__check_if_timed_out(build_id): + raise RuntimeError("Build marked as TIMED_OUT after configure") + + self.logger.info("Running clean") + build_log.write("Running clean\n") + build_log.flush() + subprocess.run( + ["python3", "./waf", "clean"], + cwd=self.__get_path_to_build_src(build_id), + stdout=build_log, + stderr=build_log, + shell=False, + timeout=BUILD_TIMEOUT_SECONDS, + check=True, + ) + + # Check timeout after clean + if self.__check_if_timed_out(build_id): + raise RuntimeError("Build marked as TIMED_OUT after clean") + + self.logger.info("Running build") + build_log.write("Running build\n") + build_log.flush() + build_command = vehicle.waf_build_command + subprocess.run( + ["python3", "./waf", build_command], + cwd=self.__get_path_to_build_src(build_id), + stdout=build_log, + stderr=build_log, + shell=False, + timeout=BUILD_TIMEOUT_SECONDS, + check=True, + ) + # Check timeout after build (before marking success) + if self.__check_if_timed_out(build_id): + raise RuntimeError("Build marked as TIMED_OUT after build completed") + + build_log.write("done build\n") + build_log.flush() + + except subprocess.TimeoutExpired as e: + # Build timed out - handle gracefully + error_msg = f"Build timed out after {BUILD_TIMEOUT_SECONDS // 60} minutes" + self.logger.error(f"Build {build_id}: {error_msg}") + + build_log.write(f"\n{'='*50}\n") + build_log.write(f"ERROR: {error_msg}\n") + build_log.write(f"Timeout occurred during subprocess execution.\n") + build_log.write(f"{'='*50}\n") + build_log.flush() + + # Kill the process if it's still running + if hasattr(e, 'process') and e.process: + try: + e.process.kill() + self.logger.info(f"Killed timed-out process for build {build_id}") + except Exception as kill_error: + self.logger.error(f"Failed to kill process: {kill_error}") + + # Mark as timed out if not already + current_info = bm.get_singleton().get_build_info(build_id) + if current_info and current_info.progress.state != BuildState.TIMED_OUT: + bm.get_singleton().mark_build_timed_out(build_id, error_msg) + + raise RuntimeError(error_msg) from e + + except RuntimeError as e: + # Build was marked as timed out by progress updater + if "TIMED_OUT" in str(e): + self.logger.warning(f"Build {build_id} stopped due to timeout") + build_log.write(f"\n{'='*50}\n") + build_log.write(f"Build stopped: {str(e)}\n") + build_log.write(f"{'='*50}\n") + build_log.flush() + raise + else: + raise def shutdown(self) -> None: """ diff --git a/common/__init__.py b/common/__init__.py new file mode 100644 index 0000000..47a9a8b --- /dev/null +++ b/common/__init__.py @@ -0,0 +1 @@ +# common/__init__.py diff --git a/common/config.py b/common/config.py new file mode 100644 index 0000000..c3cba0a --- /dev/null +++ b/common/config.py @@ -0,0 +1,7 @@ +# common/config.py + +# Build timeout in seconds. +# Used independently by: +# - build_manager.progress_updater (state transition) +# - builder.builder (process termination) +BUILD_TIMEOUT_SECONDS = 15 * 60 # 15 minutes diff --git a/web/app.py b/web/app.py index 8a4aa43..f92d341 100755 --- a/web/app.py +++ b/web/app.py @@ -366,13 +366,20 @@ def get_deafults(vehicle_id, version_id, board_name): @app.route('/builds', methods=['GET']) def get_all_builds(): all_build_ids = manager.get_all_build_ids() - all_build_info = [ - { - **manager.get_build_info(build_id).to_dict(), - 'build_id': build_id - } - for build_id in all_build_ids - ] + all_build_info = [] + + for build_id in all_build_ids: + try: + build_info = manager.get_build_info(build_id) + if build_info is not None: + all_build_info.append({ + **build_info.to_dict(), + 'build_id': build_id + }) + except Exception as e: + app.logger.error(f"Error getting build info for {build_id}: {type(e).__name__}: {e}") + # Skip this build if there's an error + continue all_build_info_sorted = sorted( all_build_info, @@ -387,6 +394,10 @@ def get_all_builds(): @app.route('/builds/', methods=['GET']) def get_build_by_id(build_id): + """ + Get complete build information for a specific build. + Returns all build details including features, remote info, etc. + """ if not manager.build_exists(build_id): response = { 'error': f'build with id {build_id} does not exist.', @@ -400,5 +411,39 @@ def get_build_by_id(build_id): return jsonify(response), 200 +@app.route('/api/builds//status', methods=['GET']) +def get_build_status(build_id): + """ + Lightweight API endpoint for frontend progress polling. + Returns only essential status fields to reduce payload size. + This is called periodically by the JavaScript progress updater. + """ + try: + if not manager.build_exists(build_id): + return jsonify({'error': f'Build {build_id} not found'}), 404 + + build_info = manager.get_build_info(build_id) + if build_info is None: + app.logger.warning(f"Build {build_id} exists but info is None") + return jsonify({'error': f'Build {build_id} not found'}), 404 + + # Return build information including current state + response = { + 'build_id': build_id, + 'state': build_info.progress.state.name, + 'percent': build_info.progress.percent, + 'time_created': build_info.time_created, + 'time_started_running': getattr(build_info, 'time_started_running', None), + 'vehicle_id': build_info.vehicle_id, + 'board': build_info.board, + 'error_message': getattr(build_info, 'error_message', None), + } + + return jsonify(response), 200 + + except Exception as e: + app.logger.error(f"Error fetching build status for {build_id}: {type(e).__name__}: {e}") + return jsonify({'error': str(e)}), 500 + if __name__ == '__main__': app.run() From f16b67892acf97a2d27e47fd5f33a72cba07f276 Mon Sep 17 00:00:00 2001 From: Sahil Date: Tue, 20 Jan 2026 16:03:34 +0530 Subject: [PATCH 2/9] web: add red color for TIMED_OUT state in UI --- web/static/js/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/static/js/index.js b/web/static/js/index.js index aaba266..6a0304b 100644 --- a/web/static/js/index.js +++ b/web/static/js/index.js @@ -67,7 +67,7 @@ function updateBuildsTable(builds) { status_color = 'success'; } else if (build_info['progress']['state'] == 'PENDING') { status_color = 'warning'; - } else if (build_info['progress']['state'] == 'FAILURE' || build_info['progress']['state'] == 'ERROR') { + } else if (build_info['progress']['state'] == 'FAILURE' || build_info['progress']['state'] == 'ERROR' || build_info['progress']['state'] == 'TIMED_OUT') { status_color = 'danger'; } @@ -216,7 +216,7 @@ async function tryAutoDownload(buildId) { } // Stop running if the build is in a terminal state - if (["FAILURE", "SUCCESS", "ERROR"].includes(currentState)) { + if (["FAILURE", "SUCCESS", "ERROR", "TIMED_OUT"].includes(currentState)) { clearInterval(autoDownloadIntervalId); return; } From 20512ae9a7829dbf74d1f0993322afb2130f24cc Mon Sep 17 00:00:00 2001 From: Sahil Date: Tue, 20 Jan 2026 16:08:35 +0530 Subject: [PATCH 3/9] builder: add timeout handling to subprocess build commands --- builder/builder.py | 149 +++++++-------------------------------------- 1 file changed, 21 insertions(+), 128 deletions(-) diff --git a/builder/builder.py b/builder/builder.py index a89d949..b97b92d 100644 --- a/builder/builder.py +++ b/builder/builder.py @@ -1,7 +1,6 @@ import ap_git from build_manager import ( BuildManager as bm, - BuildState, ) import subprocess import os @@ -15,8 +14,7 @@ ) from pathlib import Path -# Build timeout constant - 15 minutes -from common.config import BUILD_TIMEOUT_SECONDS +CBS_BUILD_TIMEOUT_SEC = int(os.getenv('CBS_BUILD_TIMEOUT_SEC', 900)) # 15 minutes default class Builder: """ @@ -63,12 +61,6 @@ def __log_build_info(self, build_id: str) -> None: build_id (str): Unique identifier for the build. """ build_info = bm.get_singleton().get_build_info(build_id) - - # Check if build info exists - if build_info is None: - self.logger.error(f"Build info not found for build_id: {build_id}") - raise RuntimeError(f"Build info not found for build_id: {build_id}") - logpath = bm.get_singleton().get_build_log_path(build_id) with open(logpath, "a") as build_log: build_log.write(f"Vehicle ID: {build_info.vehicle_id}\n" @@ -277,12 +269,6 @@ def __generate_archive(self, build_id: str) -> None: self.logger.info(f"Generated {archive_path}.") def __clean_up_build_workdir(self, build_id: str) -> None: - """ - Removes the temporary build working directory. - - Parameters: - build_id (str): Unique identifier for the build. - """ shutil.rmtree(self.__get_path_to_build_dir(build_id)) def __process_build(self, build_id: str) -> None: @@ -293,32 +279,14 @@ def __process_build(self, build_id: str) -> None: Parameters: build_id (str): Unique identifier for the build. """ - try: - self.__create_build_workdir(build_id) - self.__create_build_artifacts_dir(build_id) - self.__log_build_info(build_id) - self.__provision_build_source(build_id) - self.__generate_extrahwdef(build_id) - self.__build(build_id) - - # Only generate archive if NOT timed out - build_info = bm.get_singleton().get_build_info(build_id) - if build_info and build_info.progress.state != BuildState.TIMED_OUT: - self.__generate_archive(build_id) - else: - self.logger.info(f"Skipping archive for timed out build {build_id}") - - self.__clean_up_build_workdir(build_id) - - except RuntimeError as e: - # Handle timeout or other build errors - self.logger.error(f"Build {build_id} failed: {e}") - # Clean up even if build failed - try: - self.__clean_up_build_workdir(build_id) - except Exception as cleanup_error: - self.logger.error(f"Failed to cleanup build {build_id}: {cleanup_error}") - # Don't re-raise, let the worker continue with next build + self.__create_build_workdir(build_id) + self.__create_build_artifacts_dir(build_id) + self.__log_build_info(build_id) + self.__provision_build_source(build_id) + self.__generate_extrahwdef(build_id) + self.__build(build_id) + self.__generate_archive(build_id) + self.__clean_up_build_workdir(build_id) def __get_path_to_build_dir(self, build_id: str) -> str: """ @@ -362,34 +330,7 @@ def __get_path_to_build_src(self, build_id: str) -> str: self.__get_path_to_build_dir(build_id), "build_src" ) - def __check_if_timed_out(self, build_id: str) -> bool: - """ - Check if this build has been marked as TIMED_OUT by the progress updater. - This is checked between build steps to allow early termination if the - progress updater has marked the build as timed out. - - Parameters: - build_id (str): The build ID to check - - Returns: - bool: True if build is marked as TIMED_OUT - """ - build_info = bm.get_singleton().get_build_info(build_id) - if build_info is None: - self.logger.error(f"Build info not found for {build_id}") - return False - - # Check if state is TIMED_OUT - if build_info.progress.state == BuildState.TIMED_OUT: - self.logger.warning( - f"Build {build_id} has been marked as TIMED_OUT. " - "Stopping build process." - ) - return True - - return False - def __build(self, build_id: str) -> None: """ Executes the actual build process for a build. @@ -437,12 +378,8 @@ def __build(self, build_id: str) -> None: ) build_log.flush() - # Run the build steps try: - # Check timeout before configure - if self.__check_if_timed_out(build_id): - raise RuntimeError("Build marked as TIMED_OUT before configure") - + # Run the build steps self.logger.info("Running waf configure") build_log.write("Running waf configure\n") build_log.flush() @@ -462,14 +399,9 @@ def __build(self, build_id: str) -> None: stdout=build_log, stderr=build_log, shell=False, - timeout=BUILD_TIMEOUT_SECONDS, - check=True, + timeout=CBS_BUILD_TIMEOUT_SEC, ) - # Check timeout after configure - if self.__check_if_timed_out(build_id): - raise RuntimeError("Build marked as TIMED_OUT after configure") - self.logger.info("Running clean") build_log.write("Running clean\n") build_log.flush() @@ -479,14 +411,9 @@ def __build(self, build_id: str) -> None: stdout=build_log, stderr=build_log, shell=False, - timeout=BUILD_TIMEOUT_SECONDS, - check=True, + timeout=CBS_BUILD_TIMEOUT_SEC, ) - # Check timeout after clean - if self.__check_if_timed_out(build_id): - raise RuntimeError("Build marked as TIMED_OUT after clean") - self.logger.info("Running build") build_log.write("Running build\n") build_log.flush() @@ -497,53 +424,19 @@ def __build(self, build_id: str) -> None: stdout=build_log, stderr=build_log, shell=False, - timeout=BUILD_TIMEOUT_SECONDS, - check=True, + timeout=CBS_BUILD_TIMEOUT_SEC, ) - # Check timeout after build (before marking success) - if self.__check_if_timed_out(build_id): - raise RuntimeError("Build marked as TIMED_OUT after build completed") - build_log.write("done build\n") build_log.flush() - - except subprocess.TimeoutExpired as e: - # Build timed out - handle gracefully - error_msg = f"Build timed out after {BUILD_TIMEOUT_SECONDS // 60} minutes" - self.logger.error(f"Build {build_id}: {error_msg}") - - build_log.write(f"\n{'='*50}\n") - build_log.write(f"ERROR: {error_msg}\n") - build_log.write(f"Timeout occurred during subprocess execution.\n") - build_log.write(f"{'='*50}\n") + except subprocess.TimeoutExpired: + self.logger.error( + f"Build {build_id} timed out after " + f"{CBS_BUILD_TIMEOUT_SEC} seconds." + ) + build_log.write( + f"Build timed out after {CBS_BUILD_TIMEOUT_SEC} seconds.\n" + ) build_log.flush() - - # Kill the process if it's still running - if hasattr(e, 'process') and e.process: - try: - e.process.kill() - self.logger.info(f"Killed timed-out process for build {build_id}") - except Exception as kill_error: - self.logger.error(f"Failed to kill process: {kill_error}") - - # Mark as timed out if not already - current_info = bm.get_singleton().get_build_info(build_id) - if current_info and current_info.progress.state != BuildState.TIMED_OUT: - bm.get_singleton().mark_build_timed_out(build_id, error_msg) - - raise RuntimeError(error_msg) from e - - except RuntimeError as e: - # Build was marked as timed out by progress updater - if "TIMED_OUT" in str(e): - self.logger.warning(f"Build {build_id} stopped due to timeout") - build_log.write(f"\n{'='*50}\n") - build_log.write(f"Build stopped: {str(e)}\n") - build_log.write(f"{'='*50}\n") - build_log.flush() - raise - else: - raise def shutdown(self) -> None: """ From e314973c1d466dfe3a72c5566263f4171442aad2 Mon Sep 17 00:00:00 2001 From: Sahil Date: Tue, 20 Jan 2026 16:13:25 +0530 Subject: [PATCH 4/9] build_manager: add time_started tracking and timeout state detection --- build_manager/manager.py | 76 +++++++--------------- build_manager/progress_updater.py | 101 ++++++++---------------------- 2 files changed, 48 insertions(+), 129 deletions(-) diff --git a/build_manager/manager.py b/build_manager/manager.py index e5583af..47146c3 100644 --- a/build_manager/manager.py +++ b/build_manager/manager.py @@ -72,8 +72,7 @@ def __init__(self, percent=0 ) self.time_created = time.time() - self.time_started_running = None # when build state becomes RUNNING - self.error_message = None + self.time_started = None # when build state becomes RUNNING def to_dict(self) -> dict: return { @@ -84,8 +83,7 @@ def to_dict(self) -> dict: 'selected_features': list(self.selected_features), 'progress': self.progress.to_dict(), 'time_created': self.time_created, - 'time_started_running': getattr(self, 'time_started_running', None), - 'error_message': getattr(self, 'error_message', None), + 'time_started': getattr(self, 'time_started', None), } @@ -358,6 +356,27 @@ def __update_build_info(self, keepttl=True ) + def update_build_time_started(self, + build_id: str, + time_started: float) -> None: + """ + Update the build's time_started timestamp. + + Parameters: + build_id (str): The ID of the build to update. + time_started (float): The timestamp when the build started running. + """ + build_info = self.get_build_info(build_id=build_id) + + if build_info is None: + raise ValueError(f"Build with id {build_id} not found.") + + build_info.time_started = time_started + self.__update_build_info( + build_id=build_id, + build_info=build_info + ) + def update_build_progress_percent(self, build_id: str, percent: int) -> None: @@ -394,15 +413,6 @@ def update_build_progress_state(self, if build_info is None: raise ValueError(f"Build with id {build_id} not found.") - old_state = build_info.progress.state - - if old_state != BuildState.RUNNING and new_state == BuildState.RUNNING: - build_info.time_started_running = time.time() - self.logger.info( - f"Build {build_id} transitioned to RUNNING state at " - f"{build_info.time_started_running}" - ) - build_info.progress.state = new_state self.__update_build_info( build_id=build_id, @@ -473,46 +483,6 @@ def get_build_archive_path(self, build_id: str) -> str: self.get_build_artifacts_dir_path(build_id), f"{build_id}.tar.gz" ) - - def mark_build_timed_out(self, build_id: str, error_message: str = None) -> None: - """ - Mark a build as timed out and update its state. - - Parameters: - build_id (str): The ID of the build that timed out. - error_message (str): Optional error message to include. - """ - self.logger.warning(f"Marking build {build_id} as TIMED_OUT") - - try: - build_info = self.get_build_info(build_id=build_id) - if build_info is None: - self.logger.error(f"Cannot mark build {build_id} as timed out - build not found") - return - - # Don't override terminal states (SUCCESS/FAILURE already set) - if build_info.progress.state in [BuildState.SUCCESS, BuildState.FAILURE]: - self.logger.warning( - f"Build {build_id} already in terminal state " - f"{build_info.progress.state.name}, not marking as TIMED_OUT" - ) - return - # Update the build state to TIMED_OUT - # Keeping last known progress instead of resetting to 0 - build_info.progress.state = BuildState.TIMED_OUT - - # Store error message if provided - if error_message: - build_info.error_message = error_message - self.logger.warning(f"Build {build_id} timeout error: {error_message}") - - # Update the build info in Redis - self.__update_build_info(build_id=build_id, build_info=build_info) - - self.logger.info(f"Successfully marked build {build_id} as TIMED_OUT") - - except Exception as e: - self.logger.error(f"Failed to mark build {build_id} as timed out: {e}") @staticmethod def get_singleton() -> "BuildManager": diff --git a/build_manager/progress_updater.py b/build_manager/progress_updater.py index 6408889..f3b4809 100644 --- a/build_manager/progress_updater.py +++ b/build_manager/progress_updater.py @@ -7,8 +7,8 @@ BuildState ) import time -# Timeout constant - 15 minutes -from common.config import BUILD_TIMEOUT_SECONDS + +CBS_BUILD_TIMEOUT_SEC = int(os.getenv('CBS_BUILD_TIMEOUT_SEC', 900)) # 15 minutes default class BuildProgressUpdater: """ @@ -159,6 +159,28 @@ def __refresh_running_build_state(self, build_id: str) -> BuildState: raise RuntimeError( "This method should only be called for running builds." ) + # Set time_started if not already set + if build_info.time_started is None: + start_time = time.time() + bm.get_singleton().update_build_time_started( + build_id=build_id, + time_started=start_time + ) + self.logger.info( + f"Build {build_id} started running at {start_time}" + ) + build_info.time_started = start_time + + # Check for timeout + elapsed = time.time() - build_info.time_started + if elapsed > CBS_BUILD_TIMEOUT_SEC: + self.logger.warning( + f"Build {build_id} timed out after {elapsed:.0f} seconds" + ) + build_info.error_message = ( + f"Build exceeded {CBS_BUILD_TIMEOUT_SEC // 60} minute timeout" + ) + return BuildState.TIMED_OUT # Builder ships the archive post completion # This is irrespective of SUCCESS or FAILURE @@ -216,7 +238,7 @@ def __update_build_percent(self, build_id: str) -> None: # Keep existing percentage pass elif current_state == BuildState.TIMED_OUT: - # Keep existing percentage for timed out builds + # Keep existing percentage pass else: raise Exception("Unhandled BuildState.") @@ -231,73 +253,6 @@ def __update_build_percent(self, build_id: str) -> None: percent=new_percent ) - def __check_build_timeout(self, build_id: str) -> bool: - """ - Check if a running build has exceeded the timeout threshold. - - This provides a backup timeout mechanism in addition to the per-subprocess - timeouts in builder.py. It measures total build duration from when the - build entered RUNNING state, not from submission/addition time. - - Parameters: - build_id (str): The unique ID of the build to check. - - Returns: - bool: True if the build has timed out, False otherwise. - """ - build_info = bm.get_singleton().get_build_info(build_id=build_id) - if build_info is None: - self.logger.error(f"No build found with ID {build_id}") - return False - - # Only check timeout for RUNNING builds - if build_info.progress.state != BuildState.RUNNING: - return False - - # Check if we have a start time for RUNNING state - time_started_running: float | None = getattr(build_info, 'time_started_running', None) - if time_started_running is None: - # Build is RUNNING but we don't have a start time yet - # This can happen briefly during state transition - self.logger.debug( - f"Build {build_id} is RUNNING but time_started_running is None" - ) - return False - - # Calculate elapsed time since build started running - current_time = time.time() - elapsed_time = current_time - time_started_running - - self.logger.debug( - f"Build {build_id}: elapsed time = {elapsed_time:.0f}s, " - f"timeout threshold = {BUILD_TIMEOUT_SECONDS}s" - ) - - # Check if build has exceeded timeout - if elapsed_time > BUILD_TIMEOUT_SECONDS: - self.logger.warning( - f"Build {build_id} has timed out after {elapsed_time:.0f} seconds " - f"(threshold: {BUILD_TIMEOUT_SECONDS}s)" - ) - - # Mark the build as timed out - # Note: The builder worker may still be running a subprocess. - # The worker will detect the TIMED_OUT state when it checks - # between build steps and will terminate gracefully. - error_message = ( - f"Build exceeded {BUILD_TIMEOUT_SECONDS // 60} minute timeout. " - f"Build ran for: {elapsed_time / 60:.1f} minutes." - ) - - bm.get_singleton().mark_build_timed_out( - build_id=build_id, - error_message=error_message - ) - - return True - - return False - def __update_build_state(self, build_id: str) -> None: """ Update the state of a given build. @@ -307,12 +262,6 @@ def __update_build_state(self, build_id: str) -> None: if build_info is None: raise ValueError(f"No build found with ID {build_id}") - # Check for timeout first (for RUNNING builds) - if self.__check_build_timeout(build_id): - # Build has timed out, no need to check other state transitions - self.logger.info(f"Build {build_id} marked as TIMED_OUT") - return - current_state = build_info.progress.state new_state = current_state self.logger.debug( From a61681cac66b553e2e74f857a3112f9ce9b0338e Mon Sep 17 00:00:00 2001 From: Sahil Date: Tue, 20 Jan 2026 16:15:24 +0530 Subject: [PATCH 5/9] docker_compose.yml: set CBS_BUILD_TIMEOUT_SEC environment variable --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index cb55812..db78c31 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,6 +21,7 @@ services: CBS_GITHUB_ACCESS_TOKEN: ${CBS_GITHUB_ACCESS_TOKEN} PYTHONPATH: /app GUNICORN_CMD_ARGS: --bind=0.0.0.0:80 --timeout=300 + CBS_BUILD_TIMEOUT_SEC: ${CBS_BUILD_TIMEOUT_SEC} volumes: - ./base:/base:rw depends_on: @@ -40,6 +41,7 @@ services: CBS_BASEDIR: /base CBS_LOG_LEVEL: ${CBS_LOG_LEVEL:-INFO} PYTHONPATH: /app + CBS_BUILD_TIMEOUT_SEC: ${CBS_BUILD_TIMEOUT_SEC} volumes: - ./base:/base:rw depends_on: From 912f15a67be6ceab6df2253c94a4c1f36efa6f13 Mon Sep 17 00:00:00 2001 From: Sahil Date: Tue, 20 Jan 2026 17:46:41 +0530 Subject: [PATCH 6/9] web: add red color for TIMED_OUT state in UI --- web/static/js/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/static/js/index.js b/web/static/js/index.js index 6a0304b..aaba266 100644 --- a/web/static/js/index.js +++ b/web/static/js/index.js @@ -67,7 +67,7 @@ function updateBuildsTable(builds) { status_color = 'success'; } else if (build_info['progress']['state'] == 'PENDING') { status_color = 'warning'; - } else if (build_info['progress']['state'] == 'FAILURE' || build_info['progress']['state'] == 'ERROR' || build_info['progress']['state'] == 'TIMED_OUT') { + } else if (build_info['progress']['state'] == 'FAILURE' || build_info['progress']['state'] == 'ERROR') { status_color = 'danger'; } @@ -216,7 +216,7 @@ async function tryAutoDownload(buildId) { } // Stop running if the build is in a terminal state - if (["FAILURE", "SUCCESS", "ERROR", "TIMED_OUT"].includes(currentState)) { + if (["FAILURE", "SUCCESS", "ERROR"].includes(currentState)) { clearInterval(autoDownloadIntervalId); return; } From 913b453d145cc09daf94b95fd8c7127d9b8704ce Mon Sep 17 00:00:00 2001 From: Sahil Date: Tue, 20 Jan 2026 17:46:48 +0530 Subject: [PATCH 7/9] builder: add timeout handling to subprocess build commands --- builder/builder.py | 106 ++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 60 deletions(-) diff --git a/builder/builder.py b/builder/builder.py index b97b92d..5aaa0aa 100644 --- a/builder/builder.py +++ b/builder/builder.py @@ -14,7 +14,6 @@ ) from pathlib import Path -CBS_BUILD_TIMEOUT_SEC = int(os.getenv('CBS_BUILD_TIMEOUT_SEC', 900)) # 15 minutes default class Builder: """ @@ -378,65 +377,52 @@ def __build(self, build_id: str) -> None: ) build_log.flush() - try: - # Run the build steps - self.logger.info("Running waf configure") - build_log.write("Running waf configure\n") - build_log.flush() - subprocess.run( - [ - "python3", - "./waf", - "configure", - "--board", - build_info.board, - "--out", - self.__get_path_to_build_dir(build_id), - "--extra-hwdef", - self.__get_path_to_extra_hwdef(build_id), - ], - cwd=self.__get_path_to_build_src(build_id), - stdout=build_log, - stderr=build_log, - shell=False, - timeout=CBS_BUILD_TIMEOUT_SEC, - ) - - self.logger.info("Running clean") - build_log.write("Running clean\n") - build_log.flush() - subprocess.run( - ["python3", "./waf", "clean"], - cwd=self.__get_path_to_build_src(build_id), - stdout=build_log, - stderr=build_log, - shell=False, - timeout=CBS_BUILD_TIMEOUT_SEC, - ) - - self.logger.info("Running build") - build_log.write("Running build\n") - build_log.flush() - build_command = vehicle.waf_build_command - subprocess.run( - ["python3", "./waf", build_command], - cwd=self.__get_path_to_build_src(build_id), - stdout=build_log, - stderr=build_log, - shell=False, - timeout=CBS_BUILD_TIMEOUT_SEC, - ) - build_log.write("done build\n") - build_log.flush() - except subprocess.TimeoutExpired: - self.logger.error( - f"Build {build_id} timed out after " - f"{CBS_BUILD_TIMEOUT_SEC} seconds." - ) - build_log.write( - f"Build timed out after {CBS_BUILD_TIMEOUT_SEC} seconds.\n" - ) - build_log.flush() + # Run the build steps + self.logger.info("Running waf configure") + build_log.write("Running waf configure\n") + build_log.flush() + subprocess.run( + [ + "python3", + "./waf", + "configure", + "--board", + build_info.board, + "--out", + self.__get_path_to_build_dir(build_id), + "--extra-hwdef", + self.__get_path_to_extra_hwdef(build_id), + ], + cwd=self.__get_path_to_build_src(build_id), + stdout=build_log, + stderr=build_log, + shell=False, + ) + + self.logger.info("Running clean") + build_log.write("Running clean\n") + build_log.flush() + subprocess.run( + ["python3", "./waf", "clean"], + cwd=self.__get_path_to_build_src(build_id), + stdout=build_log, + stderr=build_log, + shell=False, + ) + + self.logger.info("Running build") + build_log.write("Running build\n") + build_log.flush() + build_command = vehicle.waf_build_command + subprocess.run( + ["python3", "./waf", build_command], + cwd=self.__get_path_to_build_src(build_id), + stdout=build_log, + stderr=build_log, + shell=False, + ) + build_log.write("done build\n") + build_log.flush() def shutdown(self) -> None: """ From 1104228aba01a151049af4620c3a286a1a29cf6c Mon Sep 17 00:00:00 2001 From: Sahil Date: Tue, 20 Jan 2026 17:46:59 +0530 Subject: [PATCH 8/9] build_manager: add time_started tracking and timeout detection --- build_manager/manager.py | 26 +------------------------- build_manager/progress_updater.py | 30 ------------------------------ 2 files changed, 1 insertion(+), 55 deletions(-) diff --git a/build_manager/manager.py b/build_manager/manager.py index 47146c3..4143f64 100644 --- a/build_manager/manager.py +++ b/build_manager/manager.py @@ -15,7 +15,6 @@ class BuildState(Enum): SUCCESS = 2 FAILURE = 3 ERROR = 4 - TIMED_OUT = 5 class BuildProgress: @@ -72,7 +71,6 @@ def __init__(self, percent=0 ) self.time_created = time.time() - self.time_started = None # when build state becomes RUNNING def to_dict(self) -> dict: return { @@ -83,7 +81,6 @@ def to_dict(self) -> dict: 'selected_features': list(self.selected_features), 'progress': self.progress.to_dict(), 'time_created': self.time_created, - 'time_started': getattr(self, 'time_started', None), } @@ -356,27 +353,6 @@ def __update_build_info(self, keepttl=True ) - def update_build_time_started(self, - build_id: str, - time_started: float) -> None: - """ - Update the build's time_started timestamp. - - Parameters: - build_id (str): The ID of the build to update. - time_started (float): The timestamp when the build started running. - """ - build_info = self.get_build_info(build_id=build_id) - - if build_info is None: - raise ValueError(f"Build with id {build_id} not found.") - - build_info.time_started = time_started - self.__update_build_info( - build_id=build_id, - build_info=build_info - ) - def update_build_progress_percent(self, build_id: str, percent: int) -> None: @@ -402,7 +378,7 @@ def update_build_progress_state(self, build_id: str, new_state: BuildState) -> None: """ - Update the build's state (e.g., PENDING, RUNNING, SUCCESS, FAILURE, TIMED_OUT). + Update the build's state (e.g., PENDING, RUNNING, SUCCESS, FAILURE). Parameters: build_id (str): The ID of the build to update. diff --git a/build_manager/progress_updater.py b/build_manager/progress_updater.py index f3b4809..c6ddd35 100644 --- a/build_manager/progress_updater.py +++ b/build_manager/progress_updater.py @@ -6,9 +6,7 @@ BuildManager as bm, BuildState ) -import time -CBS_BUILD_TIMEOUT_SEC = int(os.getenv('CBS_BUILD_TIMEOUT_SEC', 900)) # 15 minutes default class BuildProgressUpdater: """ @@ -159,28 +157,6 @@ def __refresh_running_build_state(self, build_id: str) -> BuildState: raise RuntimeError( "This method should only be called for running builds." ) - # Set time_started if not already set - if build_info.time_started is None: - start_time = time.time() - bm.get_singleton().update_build_time_started( - build_id=build_id, - time_started=start_time - ) - self.logger.info( - f"Build {build_id} started running at {start_time}" - ) - build_info.time_started = start_time - - # Check for timeout - elapsed = time.time() - build_info.time_started - if elapsed > CBS_BUILD_TIMEOUT_SEC: - self.logger.warning( - f"Build {build_id} timed out after {elapsed:.0f} seconds" - ) - build_info.error_message = ( - f"Build exceeded {CBS_BUILD_TIMEOUT_SEC // 60} minute timeout" - ) - return BuildState.TIMED_OUT # Builder ships the archive post completion # This is irrespective of SUCCESS or FAILURE @@ -237,9 +213,6 @@ def __update_build_percent(self, build_id: str) -> None: elif current_state == BuildState.ERROR: # Keep existing percentage pass - elif current_state == BuildState.TIMED_OUT: - # Keep existing percentage - pass else: raise Exception("Unhandled BuildState.") @@ -286,9 +259,6 @@ def __update_build_state(self, build_id: str) -> None: elif current_state == BuildState.ERROR: # ERROR is a conclusive state pass - elif current_state == BuildState.TIMED_OUT: - # TIMED_OUT is a conclusive state - pass else: raise Exception("Unhandled BuildState.") From d10ba5841362f1da737e79850a15fd85901d6ae9 Mon Sep 17 00:00:00 2001 From: Sahil Date: Tue, 20 Jan 2026 17:47:11 +0530 Subject: [PATCH 9/9] docker-compose.yml: set CBS_BUILD_TIMEOUT_SEC environment variable --- docker-compose.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index db78c31..cb55812 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,7 +21,6 @@ services: CBS_GITHUB_ACCESS_TOKEN: ${CBS_GITHUB_ACCESS_TOKEN} PYTHONPATH: /app GUNICORN_CMD_ARGS: --bind=0.0.0.0:80 --timeout=300 - CBS_BUILD_TIMEOUT_SEC: ${CBS_BUILD_TIMEOUT_SEC} volumes: - ./base:/base:rw depends_on: @@ -41,7 +40,6 @@ services: CBS_BASEDIR: /base CBS_LOG_LEVEL: ${CBS_LOG_LEVEL:-INFO} PYTHONPATH: /app - CBS_BUILD_TIMEOUT_SEC: ${CBS_BUILD_TIMEOUT_SEC} volumes: - ./base:/base:rw depends_on: