DataDog · sarahchen6 · Feb 6, 2026
@@ -4,12 +4,29 @@ include:
   - local: ".gitlab/macrobenchmarks.yml"
   - local: ".gitlab/exploration-tests.yml"
   - local: ".gitlab/ci-visibility-tests.yml"
+  - project: 'DataDog/apm-reliability/apm-sdks-benchmarks'
+    file: '.gitlab/ci-java-spring-petclinic.yml'
+    ref: 'main'
+  - project: 'DataDog/apm-reliability/apm-sdks-benchmarks'
+    file: '.gitlab/ci-java-insecure-bank.yml'
+    ref: 'main'
+  - project: 'DataDog/apm-reliability/apm-sdks-benchmarks'
+    file: '.gitlab/ci-java-dacapo.yml'
+    ref: 'main'
 
 stages:
   - build
   - publish
   - shared-pipeline
   - benchmarks
+  - infrastructure
+  - java-spring-petclinic-tests
+  - java-spring-petclinic-macrobenchmarks
+  - java-startup-microbenchmarks
+  - java-load-microbenchmarks
+  - java-dacapo-microbenchmarks
+  - generate-slos
+  - upload-to-bp-api
   - macrobenchmarks
   - tests
   - exploration-tests

@@ -1,111 +1,4 @@
-.benchmarks:
-  stage: benchmarks
-  timeout: 1h
-  tags: ["runner:apm-k8s-tweaked-metal"]
-  image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-java-benchmarks
-  needs: [ "build", "publish-artifacts-to-s3" ]
-  rules:
-    - if: '$POPULATE_CACHE'
-      when: never
-    - if: '$CI_COMMIT_TAG =~ /^v?[0-9]+\.[0-9]+\.[0-9]+$/'
-      when: manual
-      allow_failure: true
-    - if: '$CI_COMMIT_BRANCH == "master"'
-      when: on_success
-      interruptible: false
-    - when: on_success
-      interruptible: true
-  script:
-    - export ARTIFACTS_DIR="$(pwd)/reports" && mkdir -p "${ARTIFACTS_DIR}"
-    - git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/"
-    - git clone --branch dd-trace-java/tracer-benchmarks-parallel https://github.com/DataDog/benchmarking-platform.git /platform && cd /platform
-  artifacts:
-    name: "reports"
-    paths:
-      - reports/
-    expire_in: 3 months
-  variables:
-    UPSTREAM_PROJECT_ID: $CI_PROJECT_ID # The ID of the current project. This ID is unique across all projects on the GitLab instance.
-    UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME # "dd-trace-java"
-    UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME # The branch or tag name for which project is built.
-    UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA # The commit revision the project is built for.
-
-benchmarks-startup:
-  extends: .benchmarks
-  script:
-    - !reference [ .benchmarks, script ]
-    - ./steps/capture-hardware-software-info.sh
-    - ./steps/run-benchmarks.sh startup
-    - ./steps/analyze-results.sh startup
-
-benchmarks-load:
-  extends: .benchmarks
-  script:
-    - !reference [ .benchmarks, script ]
-    - ./steps/capture-hardware-software-info.sh
-    - ./steps/run-benchmarks.sh load
-    - ./steps/analyze-results.sh load
-
-benchmarks-dacapo:
-  extends: .benchmarks
-  script:
-    - !reference [ .benchmarks, script ]
-    - ./steps/capture-hardware-software-info.sh
-    - ./steps/run-benchmarks.sh dacapo
-    - ./steps/analyze-results.sh dacapo
-
-benchmarks-post-results:
-  extends: .benchmarks
-  tags: ["arch:amd64"]
-  script:
-    - !reference [ .benchmarks, script ]
-    - ./steps/upload-results-to-s3.sh
-    - ./steps/post-pr-comment.sh
-  needs:
-    - job: benchmarks-startup
-      artifacts: true
-    - job: benchmarks-load
-      artifacts: true
-    - job: benchmarks-dacapo
-      artifacts: true
-
-check-big-regressions:
-  extends: .benchmarks
-  needs:
-    - job: benchmarks-startup
-      artifacts: true
-    - job: benchmarks-dacapo
-      artifacts: true
-  when: on_success
-  tags: ["arch:amd64"]
-  rules:
-    - if: '$POPULATE_CACHE'
-      when: never
-    - if: '$CI_COMMIT_BRANCH =~ /backport-pr-/'
-      when: never
-    - if: '$CI_COMMIT_BRANCH !~ /^(master|release\/)/'
-      when: on_success
-    - when: never
-  # ARTIFACTS_DIR /go/src/github.com/DataDog/apm-reliability/dd-trace-java/reports/
-  # need to convert them
-  script:
-    - !reference [ .benchmarks, script ]
-    - | 
-      for benchmarkType in startup dacapo; do
-          find "$ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do
-            relpath="${file#$ARTIFACTS_DIR/$benchmarkType/}"
-            prefix="${relpath%/benchmark-*}" # Remove the trailing /benchmark-(baseline|candidate).json
-            prefix="${prefix#./}" # Remove any leading ./
-            prefix="${prefix//\//-}" # Replace / with -
-            case "$file" in
-              *benchmark-baseline.json) type="baseline" ;;
-              *benchmark-candidate.json) type="candidate" ;;
-            esac
-            echo "Moving $file to $ARTIFACTS_DIR/${type}-${prefix}.converted.json"
-            cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json"
-          done
-      done
-    - bp-runner $CI_PROJECT_DIR/.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
+# Insert more benchmark logic here
 
 .dsm-kafka-benchmarks:
   stage: benchmarks

@@ -92,12 +92,6 @@ check-slo-breaches:
       artifacts: true
     - job: otel-latest
       artifacts: true
-    - job: benchmarks-startup
-      artifacts: true
-    - job: benchmarks-load
-      artifacts: true
-    - job: benchmarks-dacapo
-      artifacts: true
   script:
     # macrobenchmarks are located here, files are already in "converted" format
     - export ARTIFACTS_DIR="$(pwd)/platform/artifacts/" && mkdir -p "${ARTIFACTS_DIR}"

@@ -0,0 +1,36 @@
+# Thresholds set based on guidance in https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-thresholds-for-pre-release-gates%3F
+
+experiments:
+  - name: Run SLO breach check
+    steps:
+      - name: SLO breach check
+        run: fail_on_breach
+        # https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-a-warning-range-for-pre-release-gates%3F
+        warning_range: 10
+        # File spec
+        #   https://datadoghq.atlassian.net/wiki/x/LgI1LgE#Specification
+        # Measurements
+        #   https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario
+        scenarios:
+          # Note that thresholds there are chosen based the confidence interval with a 10% adjustment.
+
+          # Standard macrobenchmarks
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fonly-tracing&trendsType=scenario
+          - name: normal_operation/only-tracing
+            thresholds:
+              - agg_http_req_duration_p50 < 2.6 ms
+              - agg_http_req_duration_p99 < 8.5 ms
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fotel-latest&trendsType=scenario
+          - name: normal_operation/otel-latest
+            thresholds:
+              - agg_http_req_duration_p50 < 2.5 ms
+              - agg_http_req_duration_p99 < 10 ms
+
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fonly-tracing&trendsType=scenario
+          - name: high_load/only-tracing
+            thresholds:
+              - throughput > 1100.0 op/s
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fotel-latest&trendsType=scenario
+          - name: high_load/otel-latest
+            thresholds:
+              - throughput > 1100.0 op/s