From 1f8e4c650255bb4966ff4dcd51109d1970855c67 Mon Sep 17 00:00:00 2001 From: Ambud <9707358+ambud@users.noreply.github.com> Date: Fri, 17 Feb 2023 22:22:16 +0000 Subject: [PATCH] Add support for app ids in spark --- gprofiler/gprofiler_types.py | 3 +-- gprofiler/metadata/application_identifiers_java.py | 8 ++++++++ gprofiler/spark/spark_collector.py | 1 - 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/gprofiler/gprofiler_types.py b/gprofiler/gprofiler_types.py index 4fc96afec..bbb4bd989 100644 --- a/gprofiler/gprofiler_types.py +++ b/gprofiler/gprofiler_types.py @@ -34,7 +34,6 @@ class ProfileData: class ProfilingErrorStack(StackToSampleCount): - PROFILING_ERROR_STACK_PATTERN = re.compile(r".*;\[Profiling .+: .+\]") def __init__(self, what: str, reason: str, comm: str): @@ -54,7 +53,7 @@ def attach_error_to_stacks( ) -> StackToSampleCount: _, error_frame = next(iter(error_stack)).split(";", maxsplit=1) dest_stacks: StackToSampleCount = StackToSampleCount() - for (frame, count) in source_stacks.items(): + for frame, count in source_stacks.items(): comm, stack = frame.split(";", maxsplit=1) annotated = f"{comm};{error_frame};{stack}" dest_stacks[annotated] = count diff --git a/gprofiler/metadata/application_identifiers_java.py b/gprofiler/metadata/application_identifiers_java.py index 36e0912c5..42da7eb26 100644 --- a/gprofiler/metadata/application_identifiers_java.py +++ b/gprofiler/metadata/application_identifiers_java.py @@ -50,6 +50,7 @@ class _JavaSparkApplicationIdentifier(_ApplicationIdentifier): _SPARK_PROPS_FILE = os.path.join("__spark_conf__", "__spark_conf__.properties") _APP_NAME_NOT_FOUND = "app name not found" _APP_NAME_KEY = "spark.app.name" + _APP_ID_KEY = "--app-id" @staticmethod def _is_java_spark_executor(process: Process) -> bool: @@ -73,4 +74,11 @@ def get_app_id(self, process: Process) -> Optional[str]: ) if self._APP_NAME_KEY in props: return f"spark: {props[self._APP_NAME_KEY]}" + args = process.cmdline() + try: + for idx, x in enumerate(args): + if x == self._APP_ID_KEY: + return f"spark: {args[idx+1]}" + except Exception: + pass return self._APP_NAME_NOT_FOUND diff --git a/gprofiler/spark/spark_collector.py b/gprofiler/spark/spark_collector.py index 433c357c0..f484d735d 100644 --- a/gprofiler/spark/spark_collector.py +++ b/gprofiler/spark/spark_collector.py @@ -485,7 +485,6 @@ def _yarn_get_spark_apps(self, *args: Any, **kwargs: Any) -> Dict[str, Tuple[str if metrics_json.get("apps"): if metrics_json["apps"].get("app") is not None: - for app_json in metrics_json["apps"]["app"]: app_id = app_json.get("id") tracking_url = app_json.get("trackingUrl")