diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 965b273941..0000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- -# Prometheus has switched to GitHub action. -# Circle CI is not disabled repository-wise so that previous pull requests -# continue working. -# This file does not generate any CircleCI workflow. - -version: 2.1 - -executors: - golang: - docker: - - image: busybox - -jobs: - noopjob: - executor: golang - - steps: - - run: - command: "true" - -workflows: - version: 2 - prometheus: - jobs: - - noopjob - triggers: - - schedule: - cron: "0 0 30 2 *" - filters: - branches: - only: - - main diff --git a/.gcloudignore b/.gcloudignore new file mode 100644 index 0000000000..87dd89ddeb --- /dev/null +++ b/.gcloudignore @@ -0,0 +1,3 @@ +.git +.gitignore +#!include:.gitignore diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index 7f7cec9cda..0000000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1,10 +0,0 @@ -/web/ui @juliusv -/web/ui/module @juliusv @nexucis -/storage/remote @cstyan @bwplotka @tomwilkie -/storage/remote/otlptranslator @aknuds1 @jesusvazquez -/discovery/kubernetes @brancz -/tsdb @jesusvazquez -/promql @roidelapluie -/cmd/promtool @dgl -/documentation/prometheus-mixin @metalmatze - diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml deleted file mode 100644 index 90773a63ef..0000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ /dev/null @@ -1,74 +0,0 @@ ---- -name: Bug report -description: Create a report to help us improve. -body: - - type: markdown - attributes: - value: | - Thank you for opening a bug report for Prometheus. - - Please do *NOT* ask support questions in Github issues. - - If your issue is not a feature request or bug report use our [community support](https://prometheus.io/community/). - - There is also [commercial support](https://prometheus.io/support-training/) available. - - type: textarea - attributes: - label: What did you do? - description: Please provide steps for us to reproduce this issue. - validations: - required: true - - type: textarea - attributes: - label: What did you expect to see? - - type: textarea - attributes: - label: What did you see instead? Under which circumstances? - validations: - required: true - - type: markdown - attributes: - value: | - ## Environment - - type: input - attributes: - label: System information - description: insert output of `uname -srm` here, or operating system version - placeholder: e.g. Linux 5.16.15 x86_64 - - type: textarea - attributes: - label: Prometheus version - description: Insert output of `prometheus --version` here. - render: text - placeholder: | - e.g. prometheus, version 2.23.0 (branch: HEAD, revision: 26d89b4b0776fe4cd5a3656dfa520f119a375273) - build user: root@37609b3a0a21 - build date: 20201126-10:56:17 - go version: go1.15.5 - platform: linux/amd64 - - type: textarea - attributes: - label: Prometheus configuration file - description: Insert relevant configuration here. Don't forget to remove secrets. - render: yaml - - type: textarea - attributes: - label: Alertmanager version - description: Insert output of `alertmanager --version` here (if relevant to the issue). - render: text - placeholder: | - e.g. alertmanager, version 0.22.2 (branch: HEAD, revision: 44f8adc06af5101ad64bd8b9c8b18273f2922051) - build user: root@b595c7f32520 - build date: 20210602-07:50:37 - go version: go1.16.4 - platform: linux/amd64 - - type: textarea - attributes: - label: Alertmanager configuration file - description: Insert relevant configuration here. Don't forget to remove secrets. - render: yaml - - type: textarea - attributes: - label: Logs - description: Insert Prometheus and Alertmanager logs relevant to the issue here. - render: text diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index bb4e2d24c9..0000000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,8 +0,0 @@ -blank_issues_enabled: true -contact_links: - - name: Prometheus Community Support - url: https://prometheus.io/community/ - about: If you need help or support, please request help here. - - name: Commercial Support & Training - url: https://prometheus.io/support-training/ - about: If you want commercial support or training, vendors are listed here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml deleted file mode 100644 index 40f6f1388c..0000000000 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: Feature request -description: Suggest an idea for this project. -body: - - type: markdown - attributes: - value: >- - Please do *NOT* ask support questions in Github issues. - - - If your issue is not a feature request or bug report use - our [community support](https://prometheus.io/community/). - - - There is also [commercial - support](https://prometheus.io/support-training/) available. - - type: textarea - attributes: - label: Proposal - description: Use case. Why is this important? - placeholder: “Nice to have” is not a good use case. :) - validations: - required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index cf90177b1d..0000000000 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,19 +0,0 @@ - diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 0000000000..6a4ecfa61f --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,51 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +version: 2 +updates: +- package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" + groups: + all: + patterns: + - "*" + open-pull-requests-limit: 0 # This ensures, only security updates will be created. +- package-ecosystem: "npm" + directory: "/web/ui" + schedule: + interval: "weekly" + groups: + all: + patterns: + - "*" + open-pull-requests-limit: 0 # This ensures, only security updates will be created. +- package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + groups: + all: + patterns: + - "*" + open-pull-requests-limit: 0 # This ensures, only security updates will be created. +- package-ecosystem: "docker" + directory: "/" + schedule: + interval: "weekly" + groups: + all: + patterns: + - "*" + open-pull-requests-limit: 0 # This ensures, only security updates will be created. diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 191e07ffac..0000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,27 +0,0 @@ -version: 2 -updates: - - package-ecosystem: "docker" - directory: "/" - schedule: - interval: "monthly" - - package-ecosystem: "github-actions" - directories: - - "/" - - "/scripts" - schedule: - interval: "monthly" - - package-ecosystem: "gomod" - directories: - - "/" - - "/documentation/examples/remote_storage" - - "/internal/tools" - schedule: - interval: "monthly" - groups: - k8s.io: - patterns: - - "k8s.io/*" - go.opentelemetry.io: - patterns: - - "go.opentelemetry.io/*" - open-pull-requests-limit: 20 diff --git a/.github/workflows/automerge-dependabot.yml b/.github/workflows/automerge-dependabot.yml index 3909f57329..cb8ce2f46a 100644 --- a/.github/workflows/automerge-dependabot.yml +++ b/.github/workflows/automerge-dependabot.yml @@ -1,4 +1,17 @@ ---- +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + name: Dependabot auto-merge on: pull_request @@ -7,19 +20,17 @@ concurrency: cancel-in-progress: true permissions: - contents: read + contents: write + pull-requests: write jobs: dependabot: - permissions: - contents: write - pull-requests: write runs-on: ubuntu-latest - if: ${{ github.event.pull_request.user.login == 'dependabot[bot]' && github.repository_owner == 'prometheus' }} + if: ${{ github.actor == 'dependabot[bot]' }} steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7 # v2.3.0 + uses: dependabot/fetch-metadata@dbb049abf0d677abbd7f7eee0375145b417fdd34 # v2.2.0 with: github-token: "${{ secrets.GITHUB_TOKEN }}" - name: Enable auto-merge for Dependabot PRs diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml deleted file mode 100644 index 4e942f1f3b..0000000000 --- a/.github/workflows/buf-lint.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: buf.build -on: - pull_request: - paths: - - ".github/workflows/buf-lint.yml" - - "**.proto" -permissions: - contents: read - -jobs: - buf: - name: lint - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: bufbuild/buf-setup-action@a47c93e0b1648d5651a065437926377d060baa99 # v1.50.0 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1 - with: - input: 'prompb' - - uses: bufbuild/buf-breaking-action@c57b3d842a5c3f3b454756ef65305a50a587c5ba # v1.1.4 - with: - input: 'prompb' - against: 'https://github.com/prometheus/prometheus.git#branch=main,ref=HEAD,subdir=prompb' diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml deleted file mode 100644 index add72cc89c..0000000000 --- a/.github/workflows/buf.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: buf.build -on: - push: - branches: - - main -permissions: - contents: read - -jobs: - buf: - name: lint and publish - runs-on: ubuntu-latest - if: github.repository_owner == 'prometheus' - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: bufbuild/buf-setup-action@a47c93e0b1648d5651a065437926377d060baa99 # v1.50.0 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1 - with: - input: 'prompb' - - uses: bufbuild/buf-breaking-action@c57b3d842a5c3f3b454756ef65305a50a587c5ba # v1.1.4 - with: - input: 'prompb' - against: 'https://github.com/prometheus/prometheus.git#branch=main,ref=HEAD~1,subdir=prompb' - - uses: bufbuild/buf-push-action@a654ff18effe4641ebea4a4ce242c49800728459 # v1.1.1 - with: - input: 'prompb' - buf_token: ${{ secrets.BUF_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index ea10fd0091..0000000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,328 +0,0 @@ ---- -name: CI -on: - pull_request: - push: - -permissions: - contents: read - -jobs: - test_go: - name: Go tests - runs-on: ubuntu-latest - container: - # Whenever the Go version is updated here, .promu.yml - # should also be updated. - image: quay.io/prometheus/golang-builder:1.24-base - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - - uses: ./.github/promci/actions/setup_environment - with: - enable_npm: true - - run: make GO_ONLY=1 SKIP_GOLANGCI_LINT=1 - - run: go test ./tsdb/ -test.tsdb-isolation=false - - run: make -C documentation/examples/remote_storage - - run: make -C documentation/examples - - test_go_more: - name: More Go tests - runs-on: ubuntu-latest - container: - image: quay.io/prometheus/golang-builder:1.24-base - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - - uses: ./.github/promci/actions/setup_environment - - run: go test --tags=dedupelabels ./... - - run: go test --tags=slicelabels -race ./cmd/prometheus - - run: go test --tags=forcedirectio -race ./tsdb/ - - run: GOARCH=386 go test ./... - - uses: ./.github/promci/actions/check_proto - with: - version: "3.15.8" - - test_go_oldest: - name: Go tests with previous Go version - runs-on: ubuntu-latest - env: - # Enforce the Go version. - GOTOOLCHAIN: local - container: - # The go version in this image should be N-1 wrt test_go. - image: quay.io/prometheus/golang-builder:1.23-base - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - run: make build - # Don't run NPM build; don't run race-detector. - - run: make test GO_ONLY=1 test-flags="" - - test_ui: - name: UI tests - runs-on: ubuntu-latest - # Whenever the Go version is updated here, .promu.yml - # should also be updated. - container: - image: quay.io/prometheus/golang-builder:1.24-base - - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - - uses: ./.github/promci/actions/setup_environment - with: - enable_go: false - enable_npm: true - - run: make assets-tarball - - run: make ui-lint - - run: make ui-test - - uses: ./.github/promci/actions/save_artifacts - with: - directory: .tarballs - - test_windows: - name: Go tests on Windows - runs-on: windows-latest - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0 - with: - go-version: 1.24.x - - run: | - $TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"} - go test $TestTargets -vet=off -v - shell: powershell - - test_mixins: - name: Mixins tests - runs-on: ubuntu-latest - # Whenever the Go version is updated here, .promu.yml - # should also be updated. - container: - image: quay.io/prometheus/golang-builder:1.24-base - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - run: go install ./cmd/promtool/. - - run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest - - run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest - - run: go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@latest - - run: make -C documentation/prometheus-mixin clean - - run: make -C documentation/prometheus-mixin jb_install - - run: make -C documentation/prometheus-mixin - - run: git diff --exit-code - - build: - name: Build Prometheus for common architectures - runs-on: ubuntu-latest - if: | - !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) - && - !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) - && - !(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-')) - && - !(github.event_name == 'push' && github.event.ref == 'refs/heads/main') - strategy: - matrix: - thread: [ 0, 1, 2 ] - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - - uses: ./.github/promci/actions/build - with: - promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" - parallelism: 3 - thread: ${{ matrix.thread }} - build_all: - name: Build Prometheus for all architectures - runs-on: ubuntu-latest - if: | - (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) - || - (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) - || - (github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-')) - || - (github.event_name == 'push' && github.event.ref == 'refs/heads/main') - strategy: - matrix: - thread: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] - - # Whenever the Go version is updated here, .promu.yml - # should also be updated. - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - - uses: ./.github/promci/actions/build - with: - parallelism: 12 - thread: ${{ matrix.thread }} - build_all_status: - # This status check aggregates the individual matrix jobs of the "Build - # Prometheus for all architectures" step into a final status. Fails if a - # single matrix job fails, succeeds if all matrix jobs succeed. - # See https://github.com/orgs/community/discussions/4324 for why this is - # needed - name: Report status of build Prometheus for all architectures - runs-on: ubuntu-latest - needs: [build_all] - # The run condition needs to include always(). Otherwise actions - # behave unexpected: - # only "needs" will make the Status Report be skipped if one of the builds fails https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-jobs-in-a-workflow#defining-prerequisite-jobs - # And skipped is treated as success https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborat[…]n-repositories-with-code-quality-features/about-status-checks - # Adding always ensures that the status check is run independently of the - # results of Build All - if: always() && github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-') - steps: - - name: Successful build - if: ${{ !(contains(needs.*.result, 'failure')) && !(contains(needs.*.result, 'cancelled')) }} - run: exit 0 - - name: Failing or cancelled build - if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }} - run: exit 1 - check_generated_parser: - name: Check generated parser - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - name: Install Go - uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0 - with: - cache: false - go-version: 1.24.x - - name: Run goyacc and check for diff - run: make install-goyacc check-generated-parser - golangci: - name: golangci-lint - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - name: Install Go - uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0 - with: - go-version: 1.24.x - - name: Install snmp_exporter/generator dependencies - run: sudo apt-get update && sudo apt-get -y install libsnmp-dev - if: github.repository == 'prometheus/snmp_exporter' - - name: Lint - uses: golangci/golangci-lint-action@1481404843c368bc19ca9406f87d6e0fc97bdcfd # v7.0.0 - with: - args: --verbose - # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. - version: v2.1.5 - fuzzing: - uses: ./.github/workflows/fuzzing.yml - if: github.event_name == 'pull_request' - codeql: - uses: ./.github/workflows/codeql-analysis.yml - permissions: - contents: read - security-events: write - - publish_main: - name: Publish main branch artifacts - runs-on: ubuntu-latest - needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] - if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - - uses: ./.github/promci/actions/publish_main - with: - docker_hub_login: ${{ secrets.docker_hub_login }} - docker_hub_password: ${{ secrets.docker_hub_password }} - quay_io_login: ${{ secrets.quay_io_login }} - quay_io_password: ${{ secrets.quay_io_password }} - publish_release: - name: Publish release artefacts - runs-on: ubuntu-latest - needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] - if: | - (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) - || - (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - - uses: ./.github/promci/actions/publish_release - with: - docker_hub_login: ${{ secrets.docker_hub_login }} - docker_hub_password: ${{ secrets.docker_hub_password }} - quay_io_login: ${{ secrets.quay_io_login }} - quay_io_password: ${{ secrets.quay_io_password }} - github_token: ${{ secrets.PROMBOT_GITHUB_TOKEN }} - publish_ui_release: - name: Publish UI on npm Registry - runs-on: ubuntu-latest - needs: [test_ui, codeql] - steps: - - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - - name: Install nodejs - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 - with: - node-version-file: "web/ui/.nvmrc" - registry-url: "https://registry.npmjs.org" - - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 - with: - path: ~/.npm - key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} - restore-keys: | - ${{ runner.os }}-node- - - name: Check libraries version - if: | - (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) - || - (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) - run: ./scripts/ui_release.sh --check-package "$(./scripts/get_module_version.sh ${GH_REF_NAME})" - env: - GH_REF_NAME: ${{ github.ref_name }} - - name: build - run: make assets - - name: Copy files before publishing libs - run: ./scripts/ui_release.sh --copy - - name: Publish dry-run libraries - if: | - !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) - && - !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) - run: ./scripts/ui_release.sh --publish dry-run - - name: Publish libraries - if: | - (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) - || - (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) - run: ./scripts/ui_release.sh --publish - env: - # The setup-node action writes an .npmrc file with this env variable - # as the placeholder for the auth token - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index b444815d3c..0000000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,40 +0,0 @@ ---- -name: "CodeQL" - -on: - workflow_call: - schedule: - - cron: "26 14 * * 1" - -permissions: - contents: read - security-events: write - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - security-events: write - - strategy: - fail-fast: false - matrix: - language: ["javascript"] - - steps: - - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - - name: Initialize CodeQL - uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16 - with: - languages: ${{ matrix.language }} - - - name: Autobuild - uses: github/codeql-action/autobuild@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16 - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16 diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml deleted file mode 100644 index 7de8bb8da7..0000000000 --- a/.github/workflows/container_description.yml +++ /dev/null @@ -1,61 +0,0 @@ ---- -name: Push README to Docker Hub -on: - push: - paths: - - "README.md" - - "README-containers.md" - - ".github/workflows/container_description.yml" - branches: [ main, master ] - -permissions: - contents: read - -jobs: - PushDockerHubReadme: - runs-on: ubuntu-latest - name: Push README to Docker Hub - if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. - steps: - - name: git checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - name: Set docker hub repo name - run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - - name: Push README to Dockerhub - uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1 - env: - DOCKER_USER: ${{ secrets.DOCKER_HUB_LOGIN }} - DOCKER_PASS: ${{ secrets.DOCKER_HUB_PASSWORD }} - with: - destination_container_repo: ${{ env.DOCKER_REPO_NAME }} - provider: dockerhub - short_description: ${{ env.DOCKER_REPO_NAME }} - # Empty string results in README-containers.md being pushed if it - # exists. Otherwise, README.md is pushed. - readme_file: '' - - PushQuayIoReadme: - runs-on: ubuntu-latest - name: Push README to quay.io - if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. - steps: - - name: git checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - name: Set quay.io org name - run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - - name: Set quay.io repo name - run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - - name: Push README to quay.io - uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1 - env: - DOCKER_APIKEY: ${{ secrets.QUAY_IO_API_TOKEN }} - with: - destination_container_repo: ${{ env.DOCKER_REPO_NAME }} - provider: quay - # Empty string results in README-containers.md being pushed if it - # exists. Otherwise, README.md is pushed. - readme_file: '' diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml deleted file mode 100644 index 27c09b4187..0000000000 --- a/.github/workflows/fuzzing.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: CIFuzz -on: - workflow_call: -permissions: - contents: read - -jobs: - Fuzzing: - runs-on: ubuntu-latest - steps: - - name: Build Fuzzers - id: build - uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@cafd7a0eb8ecb4e007c56897996a9b65c49c972f # master - with: - oss-fuzz-project-name: "prometheus" - dry-run: false - - name: Run Fuzzers - uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@cafd7a0eb8ecb4e007c56897996a9b65c49c972f # master - # Note: Regularly check for updates to the pinned commit hash at: - # https://github.com/google/oss-fuzz/tree/master/infra/cifuzz/actions/run_fuzzers - with: - oss-fuzz-project-name: "prometheus" - fuzz-seconds: 600 - dry-run: false - - name: Upload Crash - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - if: failure() && steps.build.outcome == 'success' - with: - name: artifacts - path: ./out/artifacts diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml deleted file mode 100644 index e7e813e3b6..0000000000 --- a/.github/workflows/lock.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: 'Lock Threads' - -on: - schedule: - - cron: '13 23 * * *' - workflow_dispatch: - -permissions: - issues: write - -concurrency: - group: lock - -jobs: - action: - runs-on: ubuntu-latest - if: github.repository_owner == 'prometheus' - steps: - - uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771 # v5.0.1 - with: - process-only: 'issues' - issue-inactive-days: '180' - github-token: ${{ secrets.PROMBOT_LOCKTHREADS_TOKEN }} diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml new file mode 100644 index 0000000000..a9a74c6bbf --- /dev/null +++ b/.github/workflows/presubmit.yml @@ -0,0 +1,52 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: Presubmit +on: + push: +jobs: + go: + name: Go & npm tests + runs-on: ubuntu-latest + container: + image: quay.io/prometheus/golang-builder:1.24-base + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 + - uses: ./.github/promci/actions/setup_environment + with: + enable_npm: true + - run: make install-goyacc build test + build-image-amd64: + name: Ensure Google image builds (amd64) + timeout-minutes: 30 + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Ensure forked image is buildable + run: | + # Our dockerfile expects npm vendoring, yet this is done during the mirror stage. + # Do it on demand here, similar to how it will be added in our mirror job. + go mod vendor + find . -name "package.json" -not -path "*/node_modules/*" -execdir npm install \; + + docker run --rm --privileged multiarch/qemu-user-static --reset --credential yes --persistent yes + docker buildx create --name multi-arch-builder --use + docker buildx build -t ensure-it-builds:amd64 . --platform linux/amd64 --load + # TODO: One day we could enable this, but right now it's OOMing on free GH action. + # docker buildx build -t ensure-it-builds:arm64 . --platform linux/arm64 --load diff --git a/.github/workflows/prombench.yml b/.github/workflows/prombench.yml deleted file mode 100644 index 65d1d71917..0000000000 --- a/.github/workflows/prombench.yml +++ /dev/null @@ -1,130 +0,0 @@ -on: - repository_dispatch: - types: [prombench_start, prombench_restart, prombench_stop] -name: Prombench Workflow -permissions: - contents: read -env: - AUTH_FILE: ${{ secrets.TEST_INFRA_PROVIDER_AUTH }} - CLUSTER_NAME: test-infra - DOMAIN_NAME: prombench.prometheus.io - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_ORG: prometheus - GITHUB_REPO: prometheus - GITHUB_STATUS_TARGET_URL: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}} - LAST_COMMIT_SHA: ${{ github.event.client_payload.LAST_COMMIT_SHA }} - GKE_PROJECT_ID: macro-mile-203600 - PR_NUMBER: ${{ github.event.client_payload.PR_NUMBER }} - PROVIDER: gke - RELEASE: ${{ github.event.client_payload.RELEASE }} - BENCHMARK_VERSION: ${{ github.event.client_payload.BENCHMARK_VERSION }} - BENCHMARK_DIRECTORY: ${{ github.event.client_payload.BENCHMARK_DIRECTORY }} - ZONE: europe-west3-a -jobs: - benchmark_start: - name: Benchmark Start - if: github.event.action == 'prombench_start' - runs-on: ubuntu-latest - steps: - - name: Update status to pending - run: >- - curl -i -X POST - -H "Authorization: Bearer $GITHUB_TOKEN" - -H "Content-Type: application/json" - --data '{"state":"pending", "context": "prombench-status-update-start", "target_url": "'$GITHUB_STATUS_TARGET_URL'"}' - "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA" - - name: Run make deploy to start test - id: make_deploy - uses: docker://prominfra/prombench:master - with: - args: >- - until make all_nodes_deleted; do echo "waiting for nodepools to be deleted"; sleep 10; done; - make deploy; - - name: Update status to failure - if: failure() - run: >- - curl -i -X POST - -H "Authorization: Bearer $GITHUB_TOKEN" - -H "Content-Type: application/json" - --data '{"state":"failure", "context": "prombench-status-update-start", "target_url": "'$GITHUB_STATUS_TARGET_URL'"}' - "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA" - - name: Update status to success - if: success() - run: >- - curl -i -X POST - -H "Authorization: Bearer $GITHUB_TOKEN" - -H "Content-Type: application/json" - --data '{"state":"success", "context": "prombench-status-update-start", "target_url": "'$GITHUB_STATUS_TARGET_URL'"}' - "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA" - benchmark_cancel: - name: Benchmark Cancel - if: github.event.action == 'prombench_stop' - runs-on: ubuntu-latest - steps: - - name: Update status to pending - run: >- - curl -i -X POST - -H "Authorization: Bearer $GITHUB_TOKEN" - -H "Content-Type: application/json" - --data '{"state":"pending", "context": "prombench-status-update-cancel", "target_url": "'$GITHUB_STATUS_TARGET_URL'"}' - "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA" - - name: Run make clean to stop test - id: make_clean - uses: docker://prominfra/prombench:master - with: - args: >- - until make all_nodes_running; do echo "waiting for nodepools to be created"; sleep 10; done; - make clean; - - name: Update status to failure - if: failure() - run: >- - curl -i -X POST - -H "Authorization: Bearer $GITHUB_TOKEN" - -H "Content-Type: application/json" - --data '{"state":"failure", "context": "prombench-status-update-cancel", "target_url": "'$GITHUB_STATUS_TARGET_URL'"}' - "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA" - - name: Update status to success - if: success() - run: >- - curl -i -X POST - -H "Authorization: Bearer $GITHUB_TOKEN" - -H "Content-Type: application/json" - --data '{"state":"success", "context": "prombench-status-update-cancel", "target_url": "'$GITHUB_STATUS_TARGET_URL'"}' - "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA" - benchmark_restart: - name: Benchmark Restart - if: github.event.action == 'prombench_restart' - runs-on: ubuntu-latest - steps: - - name: Update status to pending - run: >- - curl -i -X POST - -H "Authorization: Bearer $GITHUB_TOKEN" - -H "Content-Type: application/json" - --data '{"state":"pending", "context": "prombench-status-update-restart", "target_url": "'$GITHUB_STATUS_TARGET_URL'"}' - "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA" - - name: Run make clean then make deploy to restart test - id: make_restart - uses: docker://prominfra/prombench:master - with: - args: >- - until make all_nodes_running; do echo "waiting for nodepools to be created"; sleep 10; done; - make clean; - until make all_nodes_deleted; do echo "waiting for nodepools to be deleted"; sleep 10; done; - make deploy; - - name: Update status to failure - if: failure() - run: >- - curl -i -X POST - -H "Authorization: Bearer $GITHUB_TOKEN" - -H "Content-Type: application/json" - --data '{"state":"failure", "context": "prombench-status-update-restart", "target_url": "'$GITHUB_STATUS_TARGET_URL'"}' - "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA" - - name: Update status to success - if: success() - run: >- - curl -i -X POST - -H "Authorization: Bearer $GITHUB_TOKEN" - -H "Content-Type: application/json" - --data '{"state":"success", "context": "prombench-status-update-restart", "target_url": "'$GITHUB_STATUS_TARGET_URL'"}' - "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA" diff --git a/.github/workflows/repo_sync.yml b/.github/workflows/repo_sync.yml deleted file mode 100644 index fa8d2e5abe..0000000000 --- a/.github/workflows/repo_sync.yml +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: Sync repo files -on: - schedule: - - cron: '44 17 * * *' -permissions: - contents: read - -jobs: - repo_sync: - runs-on: ubuntu-latest - if: github.repository_owner == 'prometheus' - container: - image: quay.io/prometheus/golang-builder - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - persist-credentials: false - - run: ./scripts/sync_repo_files.sh - env: - GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }} diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml deleted file mode 100644 index c2335a8e46..0000000000 --- a/.github/workflows/scorecards.yml +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2022 Google LLC - -name: Scorecards supply-chain security -on: - pull_request: - push: - branches: [ "main" ] - -# Declare default permissions as read only. -permissions: read-all - -jobs: - analysis: - name: Scorecards analysis - runs-on: ubuntu-latest - permissions: - # Needed to upload the results to code-scanning dashboard. - security-events: write - # Used to receive a badge. - id-token: write - - steps: - - name: "Checkout code" - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 - with: - persist-credentials: false - - - name: "Run analysis" - uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # tag=v2.4.1 - with: - results_file: results.sarif - results_format: sarif - # Publish the results for public repositories to enable scorecard badges. For more details, see - # https://github.com/ossf/scorecard-action#publishing-results. - publish_results: ${{ github.event_name != 'pull_request' }} - - # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF - # format to the repository Actions tab. - - name: "Upload artifact" - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # tag=v4.6.2 - with: - name: SARIF file - path: results.sarif - retention-days: 5 - - # Upload the results to GitHub's code scanning dashboard. - - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@28deaeda66b76a05916b6923827895f2b14ab387 # tag=v3.28.16 - with: - sarif_file: results.sarif diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml deleted file mode 100644 index 371d92a69a..0000000000 --- a/.github/workflows/stale.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Stale Check -on: - workflow_dispatch: {} - schedule: - - cron: '16 22 * * *' -permissions: - issues: write - pull-requests: write -jobs: - stale: - if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. - runs-on: ubuntu-latest - steps: - - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - # opt out of defaults to avoid marking issues as stale and closing them - # https://github.com/actions/stale#days-before-close - # https://github.com/actions/stale#days-before-stale - days-before-stale: -1 - days-before-close: -1 - # Setting it to empty string to skip comments. - # https://github.com/actions/stale#stale-pr-message - # https://github.com/actions/stale#stale-issue-message - stale-pr-message: '' - stale-issue-message: '' - operations-per-run: 30 - # override days-before-stale, for only marking the pull requests as stale - days-before-pr-stale: 60 - stale-pr-label: stale - exempt-pr-labels: keepalive diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index d1959fb861..0000000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,3063 +0,0 @@ -# Changelog - -## main / unreleased - -## 3.5.0-rc.1 / 2025-07-08 - -The following feature was removed (reverted), since problems were discovered: -* [FEATURE] OTLP: Support promoting OTel scope name/version/schema URL/attributes as metric labels, via `otlp.promote_scope_metadata`. #16730 #16760 - -## 3.5.0-rc.0 / 2025-06-25 - -* [FEATURE] PromQL: Add experimental type and unit metadata labels, behind feature flag `type-and-unit-labels`. #16228 #16632 #16718 #16743 -* [FEATURE] PromQL: Add `ts_of_(min|max|last)_over_time`, behind feature flag `experimental-promql-functions`. #16722 #16733 -* [FEATURE] Scraping: Add global option `always_scrape_classic_histograms` to scrape a classic histogram even if it is also exposed as native. #16452 -* [FEATURE] OTLP: Support promoting OTel scope name/version/schema URL/attributes as metric labels, via `otlp.promote_scope_metadata`. #16730 #16760 -* [FEATURE] OTLP: New config options `promote_all_resource_attributes` and `ignore_resource_attributes`. #16426 -* [FEATURE] Discovery: New service discovery for STACKIT Cloud. #16401 -* [ENHANCEMENT] Hetzner SD: Add `label_selector` to filter servers. #16512 -* [ENHANCEMENT] PromQL: support non-constant parameter in aggregations like `quantile` and `topk`. #16404 -* [ENHANCEMENT] UI: Better total target count display when using `keep_dropped_targets` option. #16604 -* [ENHANCEMENT] UI: Add simple filtering on the `/rules` page. #16605 -* [ENHANCEMENT] UI: Display query stats in hover tooltip over table query tab. #16723 -* [ENHANCEMENT] UI: Clear search field on `/targets` page. #16567 -* [ENHANCEMENT] Rules: Check that rules parse without error earlier at startup. #16601 -* [ENHANCEMENT] Promtool: Optional fuzzy float64 comparison in rules unittests. #16395 -* [PERF] PromQL: Reuse `histogramStatsIterator` where possible. #16686 -* [PERF] PromQL: Reuse storage for custom bucket values for native histograms. #16565 -* [PERF] UI: Optimize memoization and search debouncing on `/targets` page. #16589 -* [PERF] UI: Fix full-page re-rendering when opening status nav menu. #16590 -* [PERF] Kubernetes SD: use service cache.Indexer to achieve better performance. #16365 -* [PERF] TSDB: Optionally use Direct IO for chunks writing. #15365 -* [PERF] TSDB: When fetching label values, stop work earlier if the limit is reached. #16158 -* [PERF] Labels: Simpler/faster stringlabels encoding. #16069 -* [PERF] Scraping: Reload scrape pools concurrently. #16595 #16783 -* [BUGFIX] Top-level: Update GOGC before loading TSDB. #16491 -* [BUGFIX] Config: Respect GOGC environment variable if no "runtime" block exists. #16558 -* [BUGFIX] PromQL: Fix native histogram `last_over_time`. #16744 -* [BUGFIX] PromQL: Fix reported parser position range in errors for aggregations wrapped in ParenExpr #16041 #16754 -* [BUGFIX] PromQL: Don't emit a value from `histogram_fraction` or `histogram_quantile` if classic and native histograms are present at the same timestamp. #16552 -* [BUGFIX] PromQL: Incorrect rounding of `[1001ms]` to `[1s]` and similar. #16478 -* [BUGFIX] PromQL: Fix inconsistent / sometimes negative `histogram_count` and `histogram_sum`. #16682 -* [BUGFIX] PromQL: Improve handling of NaNs in native histograms. #16724 -* [BUGFIX] PromQL: Fix unary operator precedence in duration expressions. #16713 -* [BUGFIX] PromQL: Improve consistency of `avg` aggregation and `avg_over_time`. #16569 #16773 -* [BUGFIX] UI: Add query warnings and info to graph view. #16753 #16759 -* [BUGFIX] API: Add HTTP `Vary: Origin` header to responses to avoid cache poisoning. #16008 -* [BUGFIX] Discovery: Avoid deadlocks by taking locks in consistent order. #16587 -* [BUGFIX] Remote-write: For Azure AD auth, allow empty `client_id` to suppport system assigned managed identity. #16421 -* [BUGFIX] Scraping: Fix rare memory corruption bug. #16623 -* [BUGFIX] Scraping: continue handling custom-bucket histograms after an exponential histogram is encountered. #16720 -* [BUGFIX] OTLP: Default config not respected when `otlp:` block is unset. #16693 - -## 3.4.2 / 2025-06-26 - -* [BUGFIX] OTLP receiver: Fix default configuration not being respected if the `otlp:` block is unset in the config file. #16693 - -## 3.4.1 / 2025-05-31 - -* [BUGFIX] Parser: Add reproducer for a dangling-reference issue in parsers. #16633 - -## 3.4.0 / 2025-05-17 - -* [CHANGE] Config: Make setting out-of-order native histograms feature (`--enable-feature=ooo-native-histograms`) a no-op. Out-of-order native histograms are now always enabled when `out_of_order_time_window` is greater than zero and `--enable-feature=native-histograms` is set. #16207 -* [FEATURE] OTLP translate: Add feature flag for optionally translating OTel explicit bucket histograms into native histograms with custom buckets. #15850 -* [FEATURE] OTLP translate: Add option to receive OTLP metrics without translating names or attributes. #16441 -* [FEATURE] PromQL: allow arithmetic operations in durations in PromQL parser. #16249 -* [FEATURE] OTLP receiver: Add primitive support for ingesting OTLP delta metrics as-is. #16360 -* [ENHANCEMENT] PromQL: histogram_fraction for bucket histograms. #16095 -* [ENHANCEMENT] TSDB: add `prometheus_tsdb_wal_replay_unknown_refs_total` and `prometheus_tsdb_wbl_replay_unknown_refs_total` metrics to track unknown series references during WAL/WBL replay. #16166 -* [ENHANCEMENT] Scraping: Add config option for escaping scheme request. #16066 -* [ENHANCEMENT] Config: Add global config option for convert_classic_histograms_to_nhcb. #16226 -* [ENHANCEMENT] Alerting: make batch size configurable (`--alertmanager.notification-batch-size`). #16254 -* [PERF] Kubernetes SD: make endpointSlice discovery more efficient. #16433 -* [BUGFIX] Config: Fix auto-reload on changes to rule and scrape config files. #16340 -* [BUGFIX] Scraping: Skip native histogram series if ingestion is disabled. #16218 -* [BUGFIX] TSDB: Handle metadata/tombstones/exemplars for duplicate series during WAL replay. #16231 -* [BUGFIX] TSDB: Avoid processing exemplars outside the valid time range during WAL replay. #16242 -* [BUGFIX] Promtool: Add feature flags for PromQL features. #16443 -* [BUGFIX] Rules: correct logging of alert name & template data. #15093 -* [BUGFIX] PromQL: Use arithmetic mean for `histogram_stddev()` and `histogram_stdvar()` . #16444 - -## 3.3.0 / 2025-04-15 - -* [FEATURE] PromQL: Implement `idelta()` and `irate()` for native histograms. #15853 -* [ENHANCEMENT] Scaleway SD: Add `__meta_scaleway_instance_public_ipv4_addresses` and `__meta_scaleway_instance_public_ipv6_addresses` labels. #14228 -* [ENHANCEMENT] TSDB: Reduce locking while reloading blocks. #12920 -* [ENHANCEMENT] PromQL: Allow UTF-8 labels in `label_replace()`. #15974 -* [ENHANCEMENT] Promtool: `tsdb create-blocks-from openmetrics` can now read from a Pipe. #16011 -* [ENHANCEMENT] Rules: Add support for anchors and aliases in rule files. #14957 -* [ENHANCEMENT] Dockerfile: Make `/prometheus` writable. #16073 -* [ENHANCEMENT] API: Include scrape pool name for dropped targets in `/api/v1/targets`. #16085 -* [ENHANCEMENT] UI: Improve time formatting and copying of selectors. #15999 #16165 -* [ENHANCEMENT] UI: Bring back vertical grid lines and graph legend series toggling instructions. #16163 #16164 -* [ENHANCEMENT] Mixin: The `cluster` label can be customized using `clusterLabel`. #15826 -* [PERF] TSDB: Optimize some operations on head chunks by taking shortcuts. #12659 -* [PERF] TSDB & Agent: Reduce memory footprint during WL replay. #15778 -* [PERF] Remote-Write: Reduce memory footprint during WAL replay. #16197 -* [PERF] API: Reduce memory footprint during header parsing. #16001 -* [PERF] Rules: Improve dependency evaluation, enabling better concurrency. #16039 -* [PERF] Scraping: Improve scraping performance for native histograms. #15731 -* [PERF] Scraping: Improve parsing of created timestamps. #16072 -* [BUGFIX] Scraping: Bump cache iteration after error to avoid false duplicate detections. #16174 -* [BUGFIX] Scraping: Skip native histograms series when ingestion is disabled. #16218 -* [BUGFIX] PromQL: Fix counter reset detection for native histograms. #15902 #15987 -* [BUGFIX] PromQL: Fix inconsistent behavior with an empty range. #15970 -* [BUGFIX] PromQL: Fix inconsistent annotation in `quantile_over_time()`. #16018 -* [BUGFIX] PromQL: Prevent `label_join()` from producing duplicates. #15975 -* [BUGFIX] PromQL: Ignore native histograms in `scalar()`, `sort()` and `sort_desc()`. #15964 -* [BUGFIX] PromQL: Fix annotations for binary operations between incompatible native histograms. #15895 -* [BUGFIX] Alerting: Consider alert relabeling when deciding whether alerts are dropped. #15979 -* [BUGFIX] Config: Set `GoGC` to the default value in case of an empty configuration. #16052 -* [BUGFIX] TSDB: Fix unknown series errors and potential data loss during WAL replay when inactive series are removed from the head and reappear before the next WAL checkpoint. #16060 -* [BUGFIX] Scaleway SD: The public IP will no longer be set to `__meta_meta_scaleway_instance_public_ipv4` if it is an IPv6 address. #14228 -* [BUGFIX] UI: Display the correct value of Alerting rules' `keep_firing_for`. #16211 - -## 3.2.1 / 2025-02-25 - -* [BUGFIX] Don't send Accept` header `escape=allow-utf-8` when `metric_name_validation_scheme: legacy` is configured. #16061 - -## 3.2.0 / 2025-02-17 - -* [CHANGE] relabel: Replace actions can now use UTF-8 characters in `targetLabel` field. Note that `$` or `${}` will be expanded. This also apply to `replacement` field for `LabelMap` action. #15851 -* [CHANGE] rulefmt: Rule names can use UTF-8 characters, except `{` and `}` characters (due to common mistake checks). #15851 -* [FEATURE] remote/otlp: Add feature flag `otlp-deltatocumulative` to support conversion from delta to cumulative. #15165 -* [ENHANCEMENT] openstack SD: Discover Octavia loadbalancers. #15539 -* [ENHANCEMENT] scrape: Add metadata for automatic metrics to WAL for `metadata-wal-records` feature. #15837 -* [ENHANCEMENT] promtool: Support linting of scrape interval, through lint option `too-long-scrape-interval`. #15719 -* [ENHANCEMENT] promtool: Add --ignore-unknown-fields option. #15706 -* [ENHANCEMENT] ui: Make "hide empty rules" and hide empty rules" persistent #15807 -* [ENHANCEMENT] web/api: Add a limit parameter to `/query` and `/query_range`. #15552 -* [ENHANCEMENT] api: Add fields Node and ServerTime to `/status`. #15784 -* [PERF] Scraping: defer computing labels for dropped targets until they are needed by the UI. #15261 -* [BUGFIX] remotewrite2: Fix invalid metadata bug for metrics without metadata. #15829 -* [BUGFIX] remotewrite2: Fix the unit field propagation. #15825 -* [BUGFIX] scrape: Fix WAL metadata for histograms and summaries. #15832 -* [BUGFIX] ui: Merge duplicate "Alerts page settings" sections. #15810 -* [BUGFIX] PromQL: Fix `` functions with histograms. #15711 - -## 3.1.0 / 2025-01-02 - - * [SECURITY] upgrade golang.org/x/crypto to address reported CVE-2024-45337. #15691 - * [CHANGE] Notifier: Increment prometheus_notifications_errors_total by the number of affected alerts rather than per batch. #15428 - * [CHANGE] API: list rules field "groupNextToken:omitempty" renamed to "groupNextToken". #15400 - * [ENHANCEMENT] OTLP translate: keep identifying attributes in target_info. #15448 - * [ENHANCEMENT] Paginate rule groups, add infinite scroll to rules within groups. #15677 - * [ENHANCEMENT] TSDB: Improve calculation of space used by labels. #13880 - * [ENHANCEMENT] Rules: new metric rule_group_last_rule_duration_sum_seconds. #15672 - * [ENHANCEMENT] Observability: Export 'go_sync_mutex_wait_total_seconds_total' metric. #15339 - * [ENHANCEMEN] Remote-Write: optionally use a DNS resolver that picks a random IP. #15329 - * [PERF] Optimize `l=~".+"` matcher. #15474, #15684 - * [PERF] TSDB: Cache all symbols for compaction . #15455 - * [PERF] TSDB: MemPostings: keep a map of label values slices. #15426 - * [PERF] Remote-Write: Remove interning hook. #15456 - * [PERF] Scrape: optimize string manipulation for experimental native histograms with custom buckets. #15453 - * [PERF] TSDB: reduce memory allocations. #15465, #15427 - * [PERF] Storage: Implement limit in mergeGenericQuerier. #14489 - * [PERF] TSDB: Optimize inverse matching. #14144 - * [PERF] Regex: use stack memory for lowercase copy of string. #15210 - * [PERF] TSDB: When deleting from postings index, pause to unlock and let readers read. #15242 - * [BUGFIX] Main: Avoid possible segfault at exit. (#15724) - * [BUGFIX] Rules: Do not run rules concurrently if uncertain about dependencies. #15560 - * [BUGFIX] PromQL: Adds test for `absent`, `absent_over_time` and `deriv` func with histograms. #15667 - * [BUGFIX] PromQL: Fix various bugs related to quoting UTF-8 characters. #15531 - * [BUGFIX] Scrape: fix nil panic after scrape loop reload. #15563 - * [BUGFIX] Remote-write: fix panic on repeated log message. #15562 - * [BUGFIX] Scrape: reload would ignore always_scrape_classic_histograms and convert_classic_histograms_to_nhcb configs. #15489 - * [BUGFIX] TSDB: fix data corruption in experimental native histograms. #15482 - * [BUGFIX] PromQL: Ignore histograms in all time related functions. #15479 - * [BUGFIX] OTLP receiver: Convert metric metadata. #15416 - * [BUGFIX] PromQL: Fix `resets` function for histograms. #15527 - * [BUGFIX] PromQL: Fix behaviour of `changes()` for mix of histograms and floats. #15469 - * [BUGFIX] PromQL: Fix behaviour of some aggregations with histograms. #15432 - * [BUGFIX] allow quoted exemplar keys in openmetrics text format. #15260 - * [BUGFIX] TSDB: fixes for rare conditions when loading write-behind-log (WBL). #15380 - * [BUGFIX] `round()` function did not remove `__name__` label. #15250 - * [BUGFIX] Promtool: analyze block shows metric name with 0 cardinality. #15438 - * [BUGFIX] PromQL: Fix `count_values` for histograms. #15422 - * [BUGFIX] PromQL: fix issues with comparison binary operations with `bool` modifier and native histograms. #15413 - * [BUGFIX] PromQL: fix incorrect "native histogram ignored in aggregation" annotations. #15414 - * [BUGFIX] PromQL: Corrects the behaviour of some operator and aggregators with Native Histograms. #15245 - * [BUGFIX] TSDB: Always return unknown hint for first sample in non-gauge histogram chunk. #15343 - * [BUGFIX] PromQL: Clamp functions: Ignore any points with native histograms. #15169 - * [BUGFIX] TSDB: Fix race on stale values in headAppender. #15322 - * [BUGFIX] UI: Fix selector / series formatting for empty metric names. #15340 - * [BUGFIX] OTLP receiver: Allow colons in non-standard units. #15710 - -## 3.0.1 / 2024-11-28 - -The first bug fix release for Prometheus 3. - -* [BUGFIX] Promql: Make subqueries left open. #15431 -* [BUGFIX] Fix memory leak when query log is enabled. #15434 -* [BUGFIX] Support utf8 names on /v1/label/:name/values endpoint. #15399 - -## 3.0.0 / 2024-11-14 - -This release includes new features such as a brand new UI and UTF-8 support enabled by default. As this marks the first new major version in seven years, several breaking changes are introduced. The breaking changes are mainly around the removal of deprecated feature flags and CLI arguments, and the full list can be found below. For users that want to upgrade we recommend to read through our [migration guide](https://prometheus.io/docs/prometheus/3.0/migration/). - -* [CHANGE] Set the `GOMAXPROCS` variable automatically to match the Linux CPU quota. Use `--no-auto-gomaxprocs` to disable it. The `auto-gomaxprocs` feature flag was removed. #15376 -* [CHANGE] Set the `GOMEMLIMIT` variable automatically to match the Linux container memory limit. Use `--no-auto-gomemlimit` to disable it. The `auto-gomemlimit` feature flag was removed. #15373 -* [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136 -* [CHANGE] Remote-write: default enable_http2 to false. #15219 -* [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164 -* [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178 -* [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657 -* [CHANGE] Disallow configuring AM with the v1 api. #13883 -* [CHANGE] regexp `.` now matches all characters (performance improvement). #14505 -* [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930 -* [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894 -* [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160 -* [CHANGE] Logging: the format of log lines has changed a little, along with the adoption of Go's Structured Logging package. #14906 -* [CHANGE] Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion` is enabled. #14738 -* [CHANGE] Float literals and time durations being the same is now a stable fetaure. #15111 -* [CHANGE] UI: The old web UI has been replaced by a completely new one that is less cluttered and adds a few new features (PromLens-style tree view, better metrics explorer, "Explain" tab). However, it is still missing some features of the old UI (notably, exemplar display and heatmaps). To switch back to the old UI, you can use the feature flag `--enable-feature=old-ui` for the time being. #14872 -* [CHANGE] PromQL: Range selectors and the lookback delta are now left-open, i.e. a sample coinciding with the lower time limit is excluded rather than included. #13904 -* [CHANGE] Kubernetes SD: Remove support for `discovery.k8s.io/v1beta1` API version of EndpointSlice. This version is no longer served as of Kubernetes v1.25. #14365 -* [CHANGE] Kubernetes SD: Remove support for `networking.k8s.io/v1beta1` API version of Ingress. This version is no longer served as of Kubernetes v1.22. #14365 -* [CHANGE] UTF-8: Enable UTF-8 support by default. Prometheus now allows all UTF-8 characters in metric and label names. The corresponding `utf8-name` feature flag has been removed. #14705, #15258 -* [CHANGE] Console: Remove example files for the console feature. Users can continue using the console feature by supplying their own JavaScript and templates. #14807 -* [CHANGE] SD: Enable the new service discovery manager by default. This SD manager does not restart unchanged discoveries upon reloading. This makes reloads faster and reduces pressure on service discoveries' sources. The corresponding `new-service-discovery-manager` feature flag has been removed. #14770 -* [CHANGE] Agent mode has been promoted to stable. The feature flag `agent` has been removed. To run Prometheus in Agent mode, use the new `--agent` cmdline arg instead. #14747 -* [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526 -* [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643 -* [FEATURE] OTLP receiver: Ability to skip UTF-8 normalization using `otlp.translation_strategy = NoUTF8EscapingWithSuffixes` configuration option. #15384 -* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769, #15011 -* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 -* [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196 -* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 -* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994, #15096 -* [ENHANCEMENT] UI: Web UI now displays notifications, e.g. when starting up and shutting down. #15082 -* [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677 -* [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546 -* [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909 -* [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929 -* [ENHANCEMENT] Consul SD: Support catalog filters. #11224 -* [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875 -* [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975 -* [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932 -* [PERF] TSDB: Grow postings by doubling. #14721 -* [PERF] Relabeling: Optimize adding a constant label pair. #12180 -* [BUGFIX] Scraping: Don't log errors on empty scrapes. #15357 -* [BUGFIX] UI: fix selector / series formatting for empty metric names. #15341 -* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 -* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 -* [BUGFIX] OTLP receiver: Preserve colons when generating metric names in suffix adding mode (this mode is always enabled, unless one uses Prometheus as a library). #15251 -* [BUGFIX] Scraping: Unit was missing when using protobuf format. #15095 -* [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910 -* [BUGFIX] TSDB: Chunks could have one unnecessary zero byte at the end. #14854 -* [BUGFIX] "superfluous response.WriteHeader call" messages in log. #14884 -* [BUGFIX] PromQL: Unary negation of native histograms. #14821 -* [BUGFIX] PromQL: Handle stale marker in native histogram series (e.g. if series goes away and comes back). #15025 -* [BUGFIX] Autoreload: Reload invalid yaml files. #14947 -* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 - -## 2.53.4 / 2025-03-18 - -* [BUGFIX] Runtime: fix GOGC is being set to 0 when installed with empty prometheus.yml file resulting high cpu usage. #16090 -* [BUGFIX] Scrape: fix dropping valid metrics after previous scrape failed. #16220 - -## 2.53.3 / 2024-11-04 - -* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps. #14685, #14740 - -## 2.53.2 / 2024-08-09 - -Fix a bug where Prometheus would crash with a segmentation fault if a remote-read -request accessed a block on disk at about the same time as TSDB created a new block. - -[BUGFIX] Remote-Read: Resolve occasional segmentation fault on query. #14515,#14523 - -## 2.55.1 / 2024-11-04 - -* [BUGFIX] `round()` function did not remove `__name__` label. #15250 - -## 2.55.0 / 2024-10-22 - -* [FEATURE] PromQL: Add experimental `info` function. #14495 -* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 -* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 -* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 -* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734 -* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200 -* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346 -* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403 -* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506 -* [FEATURE] TSDB: Add `delayed-compaction` feature flag, for people running many Prometheus to randomize timing. #12532 -* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706 -* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612 -* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 -* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450 -* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 -* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655, #14985 -* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621 -* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413 -* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816 -* [ENHANCEMENT] API: Support multiple listening addresses. #14665 -* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934 -* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948, #15120 -* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729 -* [BUGFIX] PromQL: make sort_by_label stable. #14985 -* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147 -* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622 -* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810 -* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766 -* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716 -* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821 -* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042 - -## 2.54.1 / 2024-08-27 - -* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps (mixing samples of the same series with and without timestamps is still rejected). #14685 -* [BUGFIX] Docker SD: fix crash in `match_first_network` mode when container is reconnected to a new network. #14654 -* [BUGFIX] PromQL: fix experimental native histograms getting corrupted due to vector selector bug in range queries. #14538 -* [BUGFIX] PromQL: fix experimental native histogram counter reset detection on stale samples. #14514 -* [BUGFIX] PromQL: fix native histograms getting corrupted due to vector selector bug in range queries. #14605 - -## 2.54.0 / 2024-08-09 - -Release 2.54 brings a release candidate of a major new version of [Remote Write: 2.0](https://prometheus.io/docs/specs/remote_write_spec_2_0/). -This is experimental at this time and may still change. -Remote-write v2 is enabled by default, but can be disabled via feature-flag `web.remote-write-receiver.accepted-protobuf-messages`. - -* [CHANGE] Remote-Write: `highest_timestamp_in_seconds` and `queue_highest_sent_timestamp_seconds` metrics now initialized to 0. #14437 -* [CHANGE] API: Split warnings from info annotations in API response. #14327 -* [FEATURE] Remote-Write: Version 2.0 experimental, plus metadata in WAL via feature flag `metadata-wal-records` (defaults on). #14395,#14427,#14444 -* [FEATURE] PromQL: add limitk() and limit_ratio() aggregation operators. #12503 -* [ENHANCEMENT] PromQL: Accept underscores in literal numbers, e.g. 1_000_000 for 1 million. #12821 -* [ENHANCEMENT] PromQL: float literal numbers and durations are now interchangeable (experimental). Example: `time() - my_timestamp > 10m`. #9138 -* [ENHANCEMENT] PromQL: use Kahan summation for sum(). #14074,#14362 -* [ENHANCEMENT] PromQL (experimental native histograms): Optimize `histogram_count` and `histogram_sum` functions. #14097 -* [ENHANCEMENT] TSDB: Better support for out-of-order experimental native histogram samples. #14438 -* [ENHANCEMENT] TSDB: Optimise seek within index. #14393 -* [ENHANCEMENT] TSDB: Optimise deletion of stale series. #14307 -* [ENHANCEMENT] TSDB: Reduce locking to optimise adding and removing series. #13286,#14286 -* [ENHANCEMENT] TSDB: Small optimisation: streamline special handling for out-of-order data. #14396,#14584 -* [ENHANCEMENT] Regexps: Optimize patterns with multiple prefixes. #13843,#14368 -* [ENHANCEMENT] Regexps: Optimize patterns containing multiple literal strings. #14173 -* [ENHANCEMENT] AWS SD: expose Primary IPv6 addresses as __meta_ec2_primary_ipv6_addresses. #14156 -* [ENHANCEMENT] Docker SD: add MatchFirstNetwork for containers with multiple networks. #10490 -* [ENHANCEMENT] OpenStack SD: Use `flavor.original_name` if available. #14312 -* [ENHANCEMENT] UI (experimental native histograms): more accurate representation. #13680,#14430 -* [ENHANCEMENT] Agent: `out_of_order_time_window` config option now applies to agent. #14094 -* [ENHANCEMENT] Notifier: Send any outstanding Alertmanager notifications when shutting down. #14290 -* [ENHANCEMENT] Rules: Add label-matcher support to Rules API. #10194 -* [ENHANCEMENT] HTTP API: Add url to message logged on error while sending response. #14209 -* [BUGFIX] TSDB: Exclude OOO chunks mapped after compaction starts (introduced by #14396). #14584 -* [BUGFIX] CLI: escape `|` characters when generating docs. #14420 -* [BUGFIX] PromQL (experimental native histograms): Fix some binary operators between native histogram values. #14454 -* [BUGFIX] TSDB: LabelNames API could fail during compaction. #14279 -* [BUGFIX] TSDB: Fix rare issue where pending OOO read can be left dangling if creating querier fails. #14341 -* [BUGFIX] TSDB: fix check for context cancellation in LabelNamesFor. #14302 -* [BUGFIX] Rules: Fix rare panic on reload. #14366 -* [BUGFIX] Config: In YAML marshalling, do not output a regexp field if it was never set. #14004 -* [BUGFIX] Remote-Write: reject samples with future timestamps. #14304 -* [BUGFIX] Remote-Write: Fix data corruption in remote write if max_sample_age is applied. #14078 -* [BUGFIX] Notifier: Fix Alertmanager discovery not updating under heavy load. #14174 -* [BUGFIX] Regexes: some Unicode characters were not matched by case-insensitive comparison. #14170,#14299 -* [BUGFIX] Remote-Read: Resolve occasional segmentation fault on query. #14515 - -## 2.53.1 / 2024-07-10 - -Fix a bug which would drop samples in remote-write if the sending flow stalled -for longer than it takes to write one "WAL segment". How long this takes depends on the size -of your Prometheus; as a rough guide with 10 million series it is about 2-3 minutes. - -* [BUGFIX] Remote-write: stop dropping samples in catch-up #14446 - -## 2.53.0 / 2024-06-16 - -This release changes the default for GOGC, the Go runtime control for the trade-off between excess memory use and CPU usage. We have found that Prometheus operates with minimal additional CPU usage, but greatly reduced memory by adjusting the upstream Go default from 100 to 75. - -* [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 #14048 -* [CHANGE] Runtime: Change GOGC threshold from 100 to 75 #14176 #14285 -* [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061 #14216 #14273 -* [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` metric to measure the time it takes to restore a rule group. #13974 -* [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 -* [ENHANCEMENT] TSDB: Optimize querying with regexp matchers. #13620 -* [BUGFIX] OTLP: Don't generate target_info unless there are metrics and at least one identifying label is defined. #13991 -* [BUGFIX] Scrape: Do no try to ingest native histograms when the native histograms feature is turned off. This happened when protobuf scrape was enabled by for example the created time feature. #13987 -* [BUGFIX] Scaleway SD: Use the instance's public IP if no private IP is available as the `__address__` meta label. #13941 -* [BUGFIX] Query logger: Do not leak file descriptors on error. #13948 -* [BUGFIX] TSDB: Let queries with heavy regex matches be cancelled and not use up the CPU. #14096 #14103 #14118 #14199 -* [BUGFIX] API: Do not warn if result count is equal to the limit, only when exceeding the limit for the series, label-names and label-values APIs. #14116 -* [BUGFIX] TSDB: Fix head stats and hooks when replaying a corrupted snapshot. #14079 - -## 2.52.1 / 2024-05-29 - -* [BUGFIX] Linode SD: Fix partial fetch when discovery would return more than 500 elements. #14141 - -## 2.52.0 / 2024-05-07 - -* [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633 -* [CHANGE] Scrape: Multiple samples (even with different timestamps) are treated as duplicates during one scrape. -* [FEATURE] Kubernetes SD: Add a new metric `prometheus_sd_kubernetes_failures_total` to track failed requests to Kubernetes API. #13554 -* [FEATURE] Kubernetes SD: Add node and zone metadata labels when using the endpointslice role. #13935 -* [FEATURE] Azure SD/Remote Write: Allow usage of Azure authorization SDK. #13099 -* [FEATURE] Alerting: Support native histogram templating. #13731 -* [FEATURE] Linode SD: Support IPv6 range discovery and region filtering. #13774 -* [ENHANCEMENT] PromQL: Performance improvements for queries with regex matchers. #13461 -* [ENHANCEMENT] PromQL: Performance improvements when using aggregation operators. #13744 -* [ENHANCEMENT] PromQL: Validate label_join destination label. #13803 -* [ENHANCEMENT] Scrape: Increment `prometheus_target_scrapes_sample_duplicate_timestamp_total` metric on duplicated series during one scrape. #12933 -* [ENHANCEMENT] TSDB: Many improvements in performance. #13742 #13673 #13782 -* [ENHANCEMENT] TSDB: Pause regular block compactions if the head needs to be compacted (prioritize head as it increases memory consumption). #13754 -* [ENHANCEMENT] Observability: Improved logging during signal handling termination. #13772 -* [ENHANCEMENT] Observability: All log lines for drop series use "num_dropped" key consistently. #13823 -* [ENHANCEMENT] Observability: Log chunk snapshot and mmapped chunk replay duration during WAL replay. #13838 -* [ENHANCEMENT] Observability: Log if the block is being created from WBL during compaction. #13846 -* [BUGFIX] PromQL: Fix inaccurate sample number statistic when querying histograms. #13667 -* [BUGFIX] PromQL: Fix `histogram_stddev` and `histogram_stdvar` for cases where the histogram has negative buckets. #13852 -* [BUGFIX] PromQL: Fix possible duplicated label name and values in a metric result for specific queries. #13845 -* [BUGFIX] Scrape: Fix setting native histogram schema factor during scrape. #13846 -* [BUGFIX] TSDB: Fix counting of histogram samples when creating WAL checkpoint stats. #13776 -* [BUGFIX] TSDB: Fix cases of compacting empty heads. #13755 -* [BUGFIX] TSDB: Count float histograms in WAL checkpoint. #13844 -* [BUGFIX] Remote Read: Fix memory leak due to broken requests. #13777 -* [BUGFIX] API: Stop building response for `/api/v1/series/` when the API request was cancelled. #13766 -* [BUGFIX] promtool: Fix panic on `promtool tsdb analyze --extended` when no native histograms are present. #13976 - -## 2.51.2 / 2024-04-09 - -Bugfix release. - -[BUGFIX] Notifier: could hang when using relabeling on alerts #13861 - -## 2.51.1 / 2024-03-27 - -Bugfix release. - -* [BUGFIX] PromQL: Re-instate validation of label_join destination label #13803 -* [BUGFIX] Scraping (experimental native histograms): Fix handling of the min bucket factor on sync of targets #13846 -* [BUGFIX] PromQL: Some queries could return the same series twice (library use only) #13845 - -## 2.51.0 / 2024-03-18 - -This version is built with Go 1.22.1. - -There is a new optional build tag "dedupelabels", which should reduce memory consumption (#12304). -It is off by default; there will be an optional alternative image to try it out. - -* [CHANGE] Scraping: Do experimental timestamp alignment even if tolerance is bigger than 1% of scrape interval #13624, #13737 -* [FEATURE] Alerting: Relabel rules for AlertManagerConfig; allows routing alerts to different alertmanagers #12551, #13735 -* [FEATURE] API: add limit param to series, label-names and label-values APIs #13396 -* [FEATURE] UI (experimental native histograms): Add native histogram chart to Table view #13658 -* [FEATURE] Promtool: Add a "tsdb dump-openmetrics" to dump in OpenMetrics format. #13194 -* [FEATURE] PromQL (experimental native histograms): Add histogram_avg function #13467 -* [ENHANCEMENT] Rules: Evaluate independent rules concurrently #12946, #13527 -* [ENHANCEMENT] Scraping (experimental native histograms): Support exemplars #13488 -* [ENHANCEMENT] Remote Write: Disable resharding during active retry backoffs #13562 -* [ENHANCEMENT] Observability: Add native histograms to latency/duration metrics #13681 -* [ENHANCEMENT] Observability: Add 'type' label to prometheus_tsdb_head_out_of_order_samples_appended_total #13607 -* [ENHANCEMENT] API: Faster generation of targets into JSON #13469, #13484 -* [ENHANCEMENT] Scraping, API: Use faster compression library #10782 -* [ENHANCEMENT] OpenTelemetry: Performance improvements in OTLP parsing #13627 -* [ENHANCEMENT] PromQL: Optimisations to reduce CPU and memory #13448, #13536 -* [BUGFIX] PromQL: Constrain extrapolation in rate() to half of sample interval #13725 -* [BUGFIX] Remote Write: Stop slowing down when a new WAL segment is created #13583, #13628 -* [BUGFIX] PromQL: Fix wrongly scoped range vectors with @ modifier #13559 -* [BUGFIX] Kubernetes SD: Pod status changes were not discovered by Endpoints service discovery #13337 -* [BUGFIX] Azure SD: Fix 'error: parameter virtualMachineScaleSetName cannot be empty' (#13702) -* [BUGFIX] Remote Write: Fix signing for AWS sigv4 transport #13497 -* [BUGFIX] Observability: Exemplars emitted by Prometheus use "trace_id" not "traceID" #13589 - -## 2.50.1 / 2024-02-26 - -* [BUGFIX] API: Fix metadata API using wrong field names. #13633 - -## 2.50.0 / 2024-02-22 - -* [CHANGE] Remote Write: Error `storage.ErrTooOldSample` is now generating HTTP error 400 instead of HTTP error 500. #13335 -* [FEATURE] Remote Write: Drop old inmemory samples. Activated using the config entry `sample_age_limit`. #13002 -* [FEATURE] **Experimental**: Add support for ingesting zeros as created timestamps. (enabled under the feature-flag `created-timestamp-zero-ingestion`). #12733 #13279 -* [FEATURE] Promtool: Add `analyze` histograms command. #12331 -* [FEATURE] TSDB/compaction: Add a way to enable overlapping compaction. #13282 #13393 #13398 -* [FEATURE] Add automatic memory limit handling. Activated using the feature flag. `auto-gomemlimit` #13395 -* [ENHANCEMENT] Promtool: allow specifying multiple matchers in `promtool tsdb dump`. #13296 -* [ENHANCEMENT] PromQL: Restore more efficient version of `NewPossibleNonCounterInfo` annotation. #13022 -* [ENHANCEMENT] Kuma SD: Extend configuration to allow users to specify client ID. #13278 -* [ENHANCEMENT] PromQL: Use natural sort in `sort_by_label` and `sort_by_label_desc`. This is **experimental**. #13411 -* [ENHANCEMENT] Native Histograms: support `native_histogram_min_bucket_factor` in scrape_config. #13222 -* [ENHANCEMENT] Native Histograms: Issue warning if histogramRate is applied to the wrong kind of histogram. #13392 -* [ENHANCEMENT] TSDB: Make transaction isolation data structures smaller. #13015 -* [ENHANCEMENT] TSDB/postings: Optimize merge using Loser Tree. #12878 -* [ENHANCEMENT] TSDB: Simplify internal series delete function. #13261 -* [ENHANCEMENT] Agent: Performance improvement by making the global hash lookup table smaller. #13262 -* [ENHANCEMENT] PromQL: faster execution of metric functions, e.g. abs(), rate() #13446 -* [ENHANCEMENT] TSDB: Optimize label values with matchers by taking shortcuts. #13426 -* [ENHANCEMENT] Kubernetes SD: Check preconditions earlier and avoid unnecessary checks or iterations in kube_sd. #13408 -* [ENHANCEMENT] Promtool: Improve visibility for `promtool test rules` with JSON colored formatting. #13342 -* [ENHANCEMENT] Consoles: Exclude iowait and steal from CPU Utilisation. #9593 -* [ENHANCEMENT] Various improvements and optimizations on Native Histograms. #13267, #13215, #13276 #13289, #13340 -* [BUGFIX] Scraping: Fix quality value in HTTP Accept header. #13313 -* [BUGFIX] UI: Fix usage of the function `time()` that was crashing. #13371 -* [BUGFIX] Azure SD: Fix SD crashing when it finds a VM scale set. #13578 - -## 2.49.1 / 2024-01-15 - -* [BUGFIX] TSDB: Fixed a wrong `q=` value in scrape accept header #13313 - -## 2.49.0 / 2024-01-15 - -* [FEATURE] Promtool: Add `--run` flag promtool test rules command. #12206 -* [FEATURE] SD: Add support for `NS` records to DNS SD. #13219 -* [FEATURE] UI: Add heatmap visualization setting in the Graph tab, useful histograms. #13096 #13371 -* [FEATURE] Scraping: Add `scrape_config.enable_compression` (default true) to disable gzip compression when scraping the target. #13166 -* [FEATURE] PromQL: Add a `promql-experimental-functions` feature flag containing some new experimental PromQL functions. #13103 NOTE: More experimental functions might be added behind the same feature flag in the future. Added functions: - * Experimental `mad_over_time` (median absolute deviation around the median) function. #13059 - * Experimental `sort_by_label` and `sort_by_label_desc` functions allowing sorting returned series by labels. #11299 -* [FEATURE] SD: Add `__meta_linode_gpus` label to Linode SD. #13097 -* [FEATURE] API: Add `exclude_alerts` query parameter to `/api/v1/rules` to only return recording rules. #12999 -* [FEATURE] TSDB: --storage.tsdb.retention.time flag value is now exposed as a `prometheus_tsdb_retention_limit_seconds` metric. #12986 -* [FEATURE] Scraping: Add ability to specify priority of scrape protocols to accept during scrape (e.g. to scrape Prometheus proto format for certain jobs). This can be changed by setting `global.scrape_protocols` and `scrape_config.scrape_protocols`. #12738 -* [ENHANCEMENT] Scraping: Automated handling of scraping histograms that violate `scrape_config.native_histogram_bucket_limit` setting. #13129 -* [ENHANCEMENT] Scraping: Optimized memory allocations when scraping. #12992 -* [ENHANCEMENT] SD: Added cache for Azure SD to avoid rate-limits. #12622 -* [ENHANCEMENT] TSDB: Various improvements to OOO exemplar scraping. E.g. allowing ingestion of exemplars with the same timestamp, but with different labels. #13021 -* [ENHANCEMENT] API: Optimize `/api/v1/labels` and `/api/v1/label//values` when 1 set of matchers are used. #12888 -* [ENHANCEMENT] TSDB: Various optimizations for TSDB block index, head mmap chunks and WAL, reducing latency and memory allocations (improving API calls, compaction queries etc). #12997 #13058 #13056 #13040 -* [ENHANCEMENT] PromQL: Optimize memory allocations and latency when querying float histograms. #12954 -* [ENHANCEMENT] Rules: Instrument TraceID in log lines for rule evaluations. #13034 -* [ENHANCEMENT] PromQL: Optimize memory allocations in query_range calls. #13043 -* [ENHANCEMENT] Promtool: unittest interval now defaults to evaluation_intervals when not set. #12729 -* [BUGFIX] SD: Fixed Azure SD public IP reporting #13241 -* [BUGFIX] API: Fix inaccuracies in posting cardinality statistics. #12653 -* [BUGFIX] PromQL: Fix inaccuracies of `histogram_quantile` with classic histograms. #13153 -* [BUGFIX] TSDB: Fix rare fails or inaccurate queries with OOO samples. #13115 -* [BUGFIX] TSDB: Fix rare panics on append commit when exemplars are used. #13092 -* [BUGFIX] TSDB: Fix exemplar WAL storage, so remote write can send/receive samples before exemplars. #13113 -* [BUGFIX] Mixins: Fix `url` filter on remote write dashboards. #10721 -* [BUGFIX] PromQL/TSDB: Various fixes to float histogram operations. #12891 #12977 #12609 #13190 #13189 #13191 #13201 #13212 #13208 -* [BUGFIX] Promtool: Fix int32 overflow issues for 32-bit architectures. #12978 -* [BUGFIX] SD: Fix Azure VM Scale Set NIC issue. #13283 - -## 2.48.1 / 2023-12-07 - -* [BUGFIX] TSDB: Make the wlog watcher read segments synchronously when not tailing. #13224 -* [BUGFIX] Agent: Participate in notify calls (fixes slow down in remote write handling introduced in 2.45). #13223 - -## 2.48.0 / 2023-11-16 - -* [CHANGE] Remote-write: respect Retry-After header on 5xx errors. #12677 -* [FEATURE] Alerting: Add AWS SigV4 authentication support for Alertmanager endpoints. #12774 -* [FEATURE] Promtool: Add support for histograms in the TSDB dump command. #12775 -* [FEATURE] PromQL: Add warnings (and annotations) to PromQL query results. #12152 #12982 #12988 #13012 -* [FEATURE] Remote-write: Add Azure AD OAuth authentication support for remote write requests. #12572 -* [ENHANCEMENT] Remote-write: Add a header to count retried remote write requests. #12729 -* [ENHANCEMENT] TSDB: Improve query performance by re-using iterator when moving between series. #12757 -* [ENHANCEMENT] UI: Move /targets page discovered labels to expandable section #12824 -* [ENHANCEMENT] TSDB: Optimize WBL loading by not sending empty buffers over channel. #12808 -* [ENHANCEMENT] TSDB: Reply WBL mmap markers concurrently. #12801 -* [ENHANCEMENT] Promtool: Add support for specifying series matchers in the TSDB analyze command. #12842 -* [ENHANCEMENT] PromQL: Prevent Prometheus from overallocating memory on subquery with large amount of steps. #12734 -* [ENHANCEMENT] PromQL: Add warning when monotonicity is forced in the input to histogram_quantile. #12931 -* [ENHANCEMENT] Scraping: Optimize sample appending by reducing garbage. #12939 -* [ENHANCEMENT] Storage: Reduce memory allocations in queries that merge series sets. #12938 -* [ENHANCEMENT] UI: Show group interval in rules display. #12943 -* [ENHANCEMENT] Scraping: Save memory when scraping by delaying creation of buffer. #12953 -* [ENHANCEMENT] Agent: Allow ingestion of out-of-order samples. #12897 -* [ENHANCEMENT] Promtool: Improve support for native histograms in TSDB analyze command. #12869 -* [ENHANCEMENT] Scraping: Add configuration option for tracking staleness of scraped timestamps. #13060 -* [BUGFIX] SD: Ensure that discovery managers are properly canceled. #10569 -* [BUGFIX] TSDB: Fix PostingsForMatchers race with creating new series. #12558 -* [BUGFIX] TSDB: Fix handling of explicit counter reset header in histograms. #12772 -* [BUGFIX] SD: Validate HTTP client configuration in HTTP, EC2, Azure, Uyuni, PuppetDB, and Lightsail SDs. #12762 #12811 #12812 #12815 #12814 #12816 -* [BUGFIX] TSDB: Fix counter reset edgecases causing native histogram panics. #12838 -* [BUGFIX] TSDB: Fix duplicate sample detection at chunk size limit. #12874 -* [BUGFIX] Promtool: Fix errors not being reported in check rules command. #12715 -* [BUGFIX] TSDB: Avoid panics reported in logs when head initialization takes a long time. #12876 -* [BUGFIX] TSDB: Ensure that WBL is repaired when possible. #12406 -* [BUGFIX] Storage: Fix crash caused by incorrect mixed samples handling. #13055 -* [BUGFIX] TSDB: Fix compactor failures by adding min time to histogram chunks. #13062 - -## 2.47.1 / 2023-10-04 - -* [BUGFIX] Fix duplicate sample detection at chunk size limit #12874 - -## 2.47.0 / 2023-09-06 - -This release adds an experimental OpenTelemetry (OTLP) Ingestion feature, -and also new setting `keep_dropped_targets` to limit the amount of dropped -targets held in memory. This defaults to 0 meaning 'no limit', so we encourage -users with large Prometheus to try setting a limit such as 100. - -* [FEATURE] Web: Add OpenTelemetry (OTLP) Ingestion endpoint. #12571 #12643 -* [FEATURE] Scraping: Optionally limit detail on dropped targets, to save memory. #12647 -* [ENHANCEMENT] TSDB: Write head chunks to disk in the background to reduce blocking. #11818 -* [ENHANCEMENT] PromQL: Speed up aggregate and function queries. #12682 -* [ENHANCEMENT] PromQL: More efficient evaluation of query with `timestamp()`. #12579 -* [ENHANCEMENT] API: Faster streaming of Labels to JSON. #12598 -* [ENHANCEMENT] Agent: Memory pooling optimisation. #12651 -* [ENHANCEMENT] TSDB: Prevent storage space leaks due to terminated snapshots on shutdown. #12664 -* [ENHANCEMENT] Histograms: Refactoring and optimisations. #12352 #12584 #12596 #12711 #12054 -* [ENHANCEMENT] Histograms: Add `histogram_stdvar` and `histogram_stddev` functions. #12614 -* [ENHANCEMENT] Remote-write: add http.resend_count tracing attribute. #12676 -* [ENHANCEMENT] TSDB: Support native histograms in snapshot on shutdown. #12722 -* [BUGFIX] TSDB/Agent: ensure that new series get written to WAL on rollback. #12592 -* [BUGFIX] Scraping: fix infinite loop on exemplar in protobuf format. #12737 - -## 2.46.0 / 2023-07-25 - -* [FEATURE] Promtool: Add PromQL format and label matcher set/delete commands to promtool. #11411 -* [FEATURE] Promtool: Add push metrics command. #12299 -* [ENHANCEMENT] Promtool: Read from stdin if no filenames are provided in check rules. #12225 -* [ENHANCEMENT] Hetzner SD: Support larger ID's that will be used by Hetzner in September. #12569 -* [ENHANCEMENT] Kubernetes SD: Add more labels for endpointslice and endpoints role. #10914 -* [ENHANCEMENT] Kubernetes SD: Do not add pods to target group if the PodIP status is not set. #11642 -* [ENHANCEMENT] OpenStack SD: Include instance image ID in labels. #12502 -* [ENHANCEMENT] Remote Write receiver: Validate the metric names and labels. #11688 -* [ENHANCEMENT] Web: Initialize `prometheus_http_requests_total` metrics with `code` label set to `200`. #12472 -* [ENHANCEMENT] TSDB: Add Zstandard compression option for wlog. #11666 -* [ENHANCEMENT] TSDB: Support native histograms in snapshot on shutdown. #12258 -* [ENHANCEMENT] Labels: Avoid compiling regexes that are literal. #12434 -* [BUGFIX] Histograms: Fix parsing of float histograms without zero bucket. #12577 -* [BUGFIX] Histograms: Fix scraping native and classic histograms missing some histograms. #12554 -* [BUGFIX] Histograms: Enable ingestion of multiple exemplars per sample. 12557 -* [BUGFIX] File SD: Fix path handling in File-SD watcher to allow directory monitoring on Windows. #12488 -* [BUGFIX] Linode SD: Cast `InstanceSpec` values to `int64` to avoid overflows on 386 architecture. #12568 -* [BUGFIX] PromQL Engine: Include query parsing in active-query tracking. #12418 -* [BUGFIX] TSDB: Handle TOC parsing failures. #10623 - -## 2.45.0 / 2023-06-23 - -This release is a LTS (Long-Term Support) release of Prometheus and will -receive security, documentation and bugfix patches for at least 12 months. -Please read more about our LTS release cycle at -. - -* [FEATURE] API: New limit parameter to limit the number of items returned by `/api/v1/status/tsdb` endpoint. #12336 -* [FEATURE] Config: Add limits to global config. #12126 -* [FEATURE] Consul SD: Added support for `path_prefix`. #12372 -* [FEATURE] Native histograms: Add option to scrape both classic and native histograms. #12350 -* [FEATURE] Native histograms: Added support for two more arithmetic operators `avg_over_time` and `sum_over_time`. #12262 -* [FEATURE] Promtool: When providing the block id, only one block will be loaded and analyzed. #12031 -* [FEATURE] Remote-write: New Azure ad configuration to support remote writing directly to Azure Monitor workspace. #11944 -* [FEATURE] TSDB: Samples per chunk are now configurable with flag `storage.tsdb.samples-per-chunk`. By default set to its former value 120. #12055 -* [ENHANCEMENT] Native histograms: bucket size can now be limited to avoid scrape fails. #12254 -* [ENHANCEMENT] TSDB: Dropped series are now deleted from the WAL sooner. #12297 -* [BUGFIX] Native histograms: ChunkSeries iterator now checks if a new sample can be appended to the open chunk. #12185 -* [BUGFIX] Native histograms: Fix Histogram Appender `Appendable()` segfault. #12357 -* [BUGFIX] Native histograms: Fix setting reset header to gauge histograms in seriesToChunkEncoder. #12329 -* [BUGFIX] TSDB: Tombstone intervals are not modified after Get() call. #12245 -* [BUGFIX] TSDB: Use path/filepath to set the WAL directory. #12349 - -## 2.44.0 / 2023-05-13 - -This version is built with Go tag `stringlabels`, to use the smaller data -structure for Labels that was optional in the previous release. For more -details about this code change see #10991. - -* [CHANGE] Remote-write: Raise default samples per send to 2,000. #12203 -* [FEATURE] Remote-read: Handle native histograms. #12085, #12192 -* [FEATURE] Promtool: Health and readiness check of prometheus server in CLI. #12096 -* [FEATURE] PromQL: Add `query_samples_total` metric, the total number of samples loaded by all queries. #12251 -* [ENHANCEMENT] Storage: Optimise buffer used to iterate through samples. #12326 -* [ENHANCEMENT] Scrape: Reduce memory allocations on target labels. #12084 -* [ENHANCEMENT] PromQL: Use faster heap method for `topk()` / `bottomk()`. #12190 -* [ENHANCEMENT] Rules API: Allow filtering by rule name. #12270 -* [ENHANCEMENT] Native Histograms: Various fixes and improvements. #11687, #12264, #12272 -* [ENHANCEMENT] UI: Search of scraping pools is now case-insensitive. #12207 -* [ENHANCEMENT] TSDB: Add an affirmative log message for successful WAL repair. #12135 -* [BUGFIX] TSDB: Block compaction failed when shutting down. #12179 -* [BUGFIX] TSDB: Out-of-order chunks could be ignored if the write-behind log was deleted. #12127 - -## 2.43.1 / 2023-05-03 - -* [BUGFIX] Labels: `Set()` after `Del()` would be ignored, which broke some relabeling rules. #12322 - -## 2.43.0 / 2023-03-21 - -We are working on some performance improvements in Prometheus, which are only -built into Prometheus when compiling it using the Go tag `stringlabels` -(therefore they are not shipped in the default binaries). It uses a data -structure for labels that uses a single string to hold all the label/values, -resulting in a smaller heap size and some speedups in most cases. We would like -to encourage users who are interested in these improvements to help us measure -the gains on their production architecture. We are providing release artefacts -`2.43.0+stringlabels` and Docker images tagged `v2.43.0-stringlabels` with those -improvements for testing. #10991 - -* [FEATURE] Promtool: Add HTTP client configuration to query commands. #11487 -* [FEATURE] Scrape: Add `scrape_config_files` to include scrape configs from different files. #12019 -* [FEATURE] HTTP client: Add `no_proxy` to exclude URLs from proxied requests. #12098 -* [FEATURE] HTTP client: Add `proxy_from_environment` to read proxies from env variables. #12098 -* [ENHANCEMENT] API: Add support for setting lookback delta per query via the API. #12088 -* [ENHANCEMENT] API: Change HTTP status code from 503/422 to 499 if a request is canceled. #11897 -* [ENHANCEMENT] Scrape: Allow exemplars for all metric types. #11984 -* [ENHANCEMENT] TSDB: Add metrics for head chunks and WAL folders size. #12013 -* [ENHANCEMENT] TSDB: Automatically remove incorrect snapshot with index that is ahead of WAL. #11859 -* [ENHANCEMENT] TSDB: Improve Prometheus parser error outputs to be more comprehensible. #11682 -* [ENHANCEMENT] UI: Scope `group by` labels to metric in autocompletion. #11914 -* [BUGFIX] Scrape: Fix `prometheus_target_scrape_pool_target_limit` metric not set before reloading. #12002 -* [BUGFIX] TSDB: Correctly update `prometheus_tsdb_head_chunks_removed_total` and `prometheus_tsdb_head_chunks` metrics when reading WAL. #11858 -* [BUGFIX] TSDB: Use the correct unit (seconds) when recording out-of-order append deltas in the `prometheus_tsdb_sample_ooo_delta` metric. #12004 - -## 2.42.0 / 2023-01-31 - -This release comes with a bunch of feature coverage for native histograms and breaking changes. - -If you are trying native histograms already, we recommend you remove the `wal` directory when upgrading. -Because the old WAL record for native histograms is not backward compatible in v2.42.0, this will lead to some data loss for the latest data. - -Additionally, if you scrape "float histograms" or use recording rules on native histograms in v2.42.0 (which writes float histograms), -it is a one-way street since older versions do not support float histograms. - -* [CHANGE] **breaking** TSDB: Changed WAL record format for the experimental native histograms. #11783 -* [FEATURE] Add 'keep_firing_for' field to alerting rules. #11827 -* [FEATURE] Promtool: Add support of selecting timeseries for TSDB dump. #11872 -* [ENHANCEMENT] Agent: Native histogram support. #11842 -* [ENHANCEMENT] Rules: Support native histograms in recording rules. #11838 -* [ENHANCEMENT] SD: Add container ID as a meta label for pod targets for Kubernetes. #11844 -* [ENHANCEMENT] SD: Add VM size label to azure service discovery. #11650 -* [ENHANCEMENT] Support native histograms in federation. #11830 -* [ENHANCEMENT] TSDB: Add gauge histogram support. #11783 #11840 #11814 -* [ENHANCEMENT] TSDB/Scrape: Support FloatHistogram that represents buckets as float64 values. #11522 #11817 #11716 -* [ENHANCEMENT] UI: Show individual scrape pools on /targets page. #11142 - -## 2.41.0 / 2022-12-20 - -* [FEATURE] Relabeling: Add `keepequal` and `dropequal` relabel actions. #11564 -* [FEATURE] Add support for HTTP proxy headers. #11712 -* [ENHANCEMENT] Reload private certificates when changed on disk. #11685 -* [ENHANCEMENT] Add `max_version` to specify maximum TLS version in `tls_config`. #11685 -* [ENHANCEMENT] Add `goos` and `goarch` labels to `prometheus_build_info`. #11685 -* [ENHANCEMENT] SD: Add proxy support for EC2 and LightSail SDs #11611 -* [ENHANCEMENT] SD: Add new metric `prometheus_sd_file_watcher_errors_total`. #11066 -* [ENHANCEMENT] Remote Read: Use a pool to speed up marshalling. #11357 -* [ENHANCEMENT] TSDB: Improve handling of tombstoned chunks in iterators. #11632 -* [ENHANCEMENT] TSDB: Optimize postings offset table reading. #11535 -* [BUGFIX] Scrape: Validate the metric name, label names, and label values after relabeling. #11074 -* [BUGFIX] Remote Write receiver and rule manager: Fix error handling. #11727 - -## 2.40.7 / 2022-12-14 - -* [BUGFIX] Use Windows native DNS resolver. #11704 -* [BUGFIX] TSDB: Fix queries involving negative buckets of native histograms. #11699 - -## 2.40.6 / 2022-12-09 - -* [SECURITY] Security upgrade from go and upstream dependencies that include - security fixes to the net/http and os packages. #11691 - -## 2.40.5 / 2022-12-01 - -* [BUGFIX] TSDB: Fix queries involving native histograms due to improper reset of iterators. #11643 - -## 2.40.4 / 2022-11-29 - -* [SECURITY] Fix basic authentication bypass vulnerability (CVE-2022-46146). GHSA-4v48-4q5m-8vx4 - -## 2.40.3 / 2022-11-23 - -* [BUGFIX] TSDB: Fix compaction after a deletion is called. #11623 - -## 2.40.2 / 2022-11-16 - -* [BUGFIX] UI: Fix black-on-black metric name color in dark mode. #11572 - -## 2.40.1 / 2022-11-09 - -* [BUGFIX] TSDB: Fix alignment for atomic int64 for 32 bit architecture. #11547 -* [BUGFIX] Scrape: Fix accept headers. #11552 - -## 2.40.0 / 2022-11-08 - -This release introduces an experimental, native way of representing and storing histograms. - -It can be enabled in Prometheus via `--enable-feature=native-histograms` to accept native histograms. -Enabling native histograms will also switch the preferred exposition format to protobuf. - -To instrument your application with native histograms, use the `main` branch of `client_golang` (this will change for the final release when v1.14.0 of client_golang will be out), and set the `NativeHistogramBucketFactor` in your `HistogramOpts` (`1.1` is a good starting point). -Your existing histograms won't switch to native histograms until `NativeHistogramBucketFactor` is set. - -* [FEATURE] Add **experimental** support for native histograms. Enable with the flag `--enable-feature=native-histograms`. #11447 -* [FEATURE] SD: Add service discovery for OVHcloud. #10802 -* [ENHANCEMENT] Kubernetes SD: Use protobuf encoding. #11353 -* [ENHANCEMENT] TSDB: Use golang.org/x/exp/slices for improved sorting speed. #11054 #11318 #11380 -* [ENHANCEMENT] Consul SD: Add enterprise admin partitions. Adds `__meta_consul_partition` label. Adds `partition` config in `consul_sd_config`. #11482 -* [BUGFIX] API: Fix API error codes for `/api/v1/labels` and `/api/v1/series`. #11356 - -## 2.39.2 / 2022-11-09 - -* [BUGFIX] TSDB: Fix alignment for atomic int64 for 32 bit architecture. #11547 - -## 2.39.1 / 2022-10-07 - -* [BUGFIX] Rules: Fix notifier relabel changing the labels on active alerts. #11427 - -## 2.39.0 / 2022-10-05 - -* [FEATURE] **experimental** TSDB: Add support for ingesting out-of-order samples. This is configured via `out_of_order_time_window` field in the config file; check config file docs for more info. #11075 -* [ENHANCEMENT] API: `/-/healthy` and `/-/ready` API calls now also respond to a `HEAD` request on top of existing `GET` support. #11160 -* [ENHANCEMENT] PuppetDB SD: Add `__meta_puppetdb_query` label. #11238 -* [ENHANCEMENT] AWS EC2 SD: Add `__meta_ec2_region` label. #11326 -* [ENHANCEMENT] AWS Lightsail SD: Add `__meta_lightsail_region` label. #11326 -* [ENHANCEMENT] Scrape: Optimise relabeling by re-using memory. #11147 -* [ENHANCEMENT] TSDB: Improve WAL replay timings. #10973 #11307 #11319 -* [ENHANCEMENT] TSDB: Optimise memory by not storing unnecessary data in the memory. #11280 #11288 #11296 -* [ENHANCEMENT] TSDB: Allow overlapping blocks by default. `--storage.tsdb.allow-overlapping-blocks` now has no effect. #11331 -* [ENHANCEMENT] UI: Click to copy label-value pair from query result to clipboard. #11229 -* [BUGFIX] TSDB: Turn off isolation for Head compaction to fix a memory leak. #11317 -* [BUGFIX] TSDB: Fix 'invalid magic number 0' error on Prometheus startup. #11338 -* [BUGFIX] PromQL: Properly close file descriptor when logging unfinished queries. #11148 -* [BUGFIX] Agent: Fix validation of flag options and prevent WAL from growing more than desired. #9876 - -## 2.38.0 / 2022-08-16 - -* [FEATURE]: Web: Add a `/api/v1/format_query` HTTP API endpoint that allows pretty-formatting PromQL expressions. #11036 #10544 #11005 -* [FEATURE]: UI: Add support for formatting PromQL expressions in the UI. #11039 -* [FEATURE]: DNS SD: Support MX records for discovering targets. #10099 -* [FEATURE]: Templates: Add `toTime()` template function that allows converting sample timestamps to Go `time.Time` values. #10993 -* [ENHANCEMENT]: Kubernetes SD: Add `__meta_kubernetes_service_port_number` meta label indicating the service port number. #11002 #11053 -* [ENHANCEMENT]: Kubernetes SD: Add `__meta_kubernetes_pod_container_image` meta label indicating the container image. #11034 #11146 -* [ENHANCEMENT]: PromQL: When a query panics, also log the query itself alongside the panic message. #10995 -* [ENHANCEMENT]: UI: Tweak colors in the dark theme to improve the contrast ratio. #11068 -* [ENHANCEMENT]: Web: Speed up calls to `/api/v1/rules` by avoiding locks and using atomic types instead. #10858 -* [ENHANCEMENT]: Scrape: Add a `no-default-scrape-port` feature flag, which omits or removes any default HTTP (`:80`) or HTTPS (`:443`) ports in the target's scrape address. #9523 -* [BUGFIX]: TSDB: In the WAL watcher metrics, expose the `type="exemplar"` label instead of `type="unknown"` for exemplar records. #11008 -* [BUGFIX]: TSDB: Fix race condition around allocating series IDs during chunk snapshot loading. #11099 - -## 2.37.0 / 2022-07-14 - -This release is a LTS (Long-Term Support) release of Prometheus and will -receive security, documentation and bugfix patches for at least 6 months. -Please read more about our LTS release cycle at -. - -Following data loss by users due to lack of unified buffer cache in OpenBSD, we -will no longer release Prometheus upstream for OpenBSD until a proper solution is -found. #8799 - -* [FEATURE] Nomad SD: New service discovery for Nomad built-in service discovery. #10915 -* [ENHANCEMENT] Kubernetes SD: Allow attaching node labels for endpoint role. #10759 -* [ENHANCEMENT] PromQL: Optimise creation of signature with/without labels. #10667 -* [ENHANCEMENT] TSDB: Memory optimizations. #10873 #10874 -* [ENHANCEMENT] TSDB: Reduce sleep time when reading WAL. #10859 #10878 -* [ENHANCEMENT] OAuth2: Add appropriate timeouts and User-Agent header. #11020 -* [BUGFIX] Alerting: Fix Alertmanager targets not being updated when alerts were queued. #10948 -* [BUGFIX] Hetzner SD: Make authentication files relative to Prometheus config file. #10813 -* [BUGFIX] Promtool: Fix `promtool check config` not erroring properly on failures. #10952 -* [BUGFIX] Scrape: Keep relabeled scrape interval and timeout on reloads. #10916 -* [BUGFIX] TSDB: Don't increment `prometheus_tsdb_compactions_failed_total` when context is canceled. #10772 -* [BUGFIX] TSDB: Fix panic if series is not found when deleting series. #10907 -* [BUGFIX] TSDB: Increase `prometheus_tsdb_mmap_chunk_corruptions_total` on out of sequence errors. #10406 -* [BUGFIX] Uyuni SD: Make authentication files relative to Prometheus configuration file and fix default configuration values. #10813 - -## 2.36.2 / 2022-06-20 - -* [BUGFIX] Fix serving of static assets like fonts and favicon. #10888 - -## 2.36.1 / 2022-06-09 - -* [BUGFIX] promtool: Add --lint-fatal option. #10840 - -## 2.36.0 / 2022-05-30 - -* [FEATURE] Add lowercase and uppercase relabel action. #10641 -* [FEATURE] SD: Add IONOS Cloud integration. #10514 -* [FEATURE] SD: Add Vultr integration. #10714 -* [FEATURE] SD: Add Linode SD failure count metric. #10673 -* [FEATURE] Add prometheus_ready metric. #10682 -* [ENHANCEMENT] Add stripDomain to template function. #10475 -* [ENHANCEMENT] UI: Enable active search through dropped targets. #10668 -* [ENHANCEMENT] promtool: support matchers when querying label values. #10727 -* [ENHANCEMENT] Add agent mode identifier. #9638 -* [BUGFIX] Changing TotalQueryableSamples from int to int64. #10549 -* [BUGFIX] tsdb/agent: Ignore duplicate exemplars. #10595 -* [BUGFIX] TSDB: Fix chunk overflow appending samples at a variable rate. #10607 -* [BUGFIX] Stop rule manager before TSDB is stopped. #10680 - -## 2.35.0 / 2022-04-21 - -This Prometheus release is built with go1.18, which contains two noticeable changes related to TLS: - -1. [TLS 1.0 and 1.1 disabled by default client-side](https://go.dev/doc/go1.18#tls10). -Prometheus users can override this with the `min_version` parameter of [tls_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tls_config). -2. [Certificates signed with the SHA-1 hash function are rejected](https://go.dev/doc/go1.18#sha1). This doesn't apply to self-signed root certificates. - -* [CHANGE] TSDB: Delete `*.tmp` WAL files when Prometheus starts. #10317 -* [CHANGE] promtool: Add new flag `--lint` (enabled by default) for the commands `check rules` and `check config`, resulting in a new exit code (`3`) for linter errors. #10435 -* [FEATURE] Support for automatically setting the variable `GOMAXPROCS` to the container CPU limit. Enable with the flag `--enable-feature=auto-gomaxprocs`. #10498 -* [FEATURE] PromQL: Extend statistics with total and peak number of samples in a query. Additionally, per-step statistics are available with --enable-feature=promql-per-step-stats and using `stats=all` in the query API. -Enable with the flag `--enable-feature=per-step-stats`. #10369 -* [ENHANCEMENT] Prometheus is built with Go 1.18. #10501 -* [ENHANCEMENT] TSDB: more efficient sorting of postings read from WAL at startup. #10500 -* [ENHANCEMENT] Azure SD: Add metric to track Azure SD failures. #10476 -* [ENHANCEMENT] Azure SD: Add an optional `resource_group` configuration. #10365 -* [ENHANCEMENT] Kubernetes SD: Support `discovery.k8s.io/v1` `EndpointSlice` (previously only `discovery.k8s.io/v1beta1` `EndpointSlice` was supported). #9570 -* [ENHANCEMENT] Kubernetes SD: Allow attaching node metadata to discovered pods. #10080 -* [ENHANCEMENT] OAuth2: Support for using a proxy URL to fetch OAuth2 tokens. #10492 -* [ENHANCEMENT] Configuration: Add the ability to disable HTTP2. #10492 -* [ENHANCEMENT] Config: Support overriding minimum TLS version. #10610 -* [BUGFIX] Kubernetes SD: Explicitly include gcp auth from k8s.io. #10516 -* [BUGFIX] Fix OpenMetrics parser to sort uppercase labels correctly. #10510 -* [BUGFIX] UI: Fix scrape interval and duration tooltip not showing on target page. #10545 -* [BUGFIX] Tracing/GRPC: Set TLS credentials only when insecure is false. #10592 -* [BUGFIX] Agent: Fix ID collision when loading a WAL with multiple segments. #10587 -* [BUGFIX] Remote-write: Fix a deadlock between Batch and flushing the queue. #10608 - -## 2.34.0 / 2022-03-15 - -* [CHANGE] UI: Classic UI removed. #10208 -* [CHANGE] Tracing: Migrate from Jaeger to OpenTelemetry based tracing. #9724, #10203, #10276 -* [ENHANCEMENT] TSDB: Disable the chunk write queue by default and allow configuration with the experimental flag `--storage.tsdb.head-chunks-write-queue-size`. #10425 -* [ENHANCEMENT] HTTP SD: Add a failure counter. #10372 -* [ENHANCEMENT] Azure SD: Set Prometheus User-Agent on requests. #10209 -* [ENHANCEMENT] Uyuni SD: Reduce the number of logins to Uyuni. #10072 -* [ENHANCEMENT] Scrape: Log when an invalid media type is encountered during a scrape. #10186 -* [ENHANCEMENT] Scrape: Accept application/openmetrics-text;version=1.0.0 in addition to version=0.0.1. #9431 -* [ENHANCEMENT] Remote-read: Add an option to not use external labels as selectors for remote read. #10254 -* [ENHANCEMENT] UI: Optimize the alerts page and add a search bar. #10142 -* [ENHANCEMENT] UI: Improve graph colors that were hard to see. #10179 -* [ENHANCEMENT] Config: Allow escaping of `$` with `$$` when using environment variables with external labels. #10129 -* [BUGFIX] PromQL: Properly return an error from histogram_quantile when metrics have the same labelset. #10140 -* [BUGFIX] UI: Fix bug that sets the range input to the resolution. #10227 -* [BUGFIX] TSDB: Fix a query panic when `memory-snapshot-on-shutdown` is enabled. #10348 -* [BUGFIX] Parser: Specify type in metadata parser errors. #10269 -* [BUGFIX] Scrape: Fix label limit changes not applying. #10370 - -## 2.33.5 / 2022-03-08 - -The binaries published with this release are built with Go1.17.8 to avoid [CVE-2022-24921](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-24921). - -* [BUGFIX] Remote-write: Fix deadlock between adding to queue and getting batch. #10395 - -## 2.33.4 / 2022-02-22 - -* [BUGFIX] TSDB: Fix panic when m-mapping head chunks onto the disk. #10316 - -## 2.33.3 / 2022-02-11 - -* [BUGFIX] Azure SD: Fix a regression when public IP Address isn't set. #10289 - -## 2.33.2 / 2022-02-11 - -* [BUGFIX] Azure SD: Fix panic when public IP Address isn't set. #10280 -* [BUGFIX] Remote-write: Fix deadlock when stopping a shard. #10279 - -## 2.33.1 / 2022-02-02 - -* [BUGFIX] SD: Fix _no such file or directory_ in K8s SD when not running inside K8s. #10235 - -## 2.33.0 / 2022-01-29 - -* [CHANGE] PromQL: Promote negative offset and `@` modifier to stable features. #10121 -* [CHANGE] Web: Promote remote-write-receiver to stable. #10119 -* [FEATURE] Config: Add `stripPort` template function. #10002 -* [FEATURE] Promtool: Add cardinality analysis to `check metrics`, enabled by flag `--extended`. #10045 -* [FEATURE] SD: Enable target discovery in own K8s namespace. #9881 -* [FEATURE] SD: Add provider ID label in K8s SD. #9603 -* [FEATURE] Web: Add limit field to the rules API. #10152 -* [ENHANCEMENT] Remote-write: Avoid allocations by buffering concrete structs instead of interfaces. #9934 -* [ENHANCEMENT] Remote-write: Log time series details for out-of-order samples in remote write receiver. #9894 -* [ENHANCEMENT] Remote-write: Shard up more when backlogged. #9274 -* [ENHANCEMENT] TSDB: Use simpler map key to improve exemplar ingest performance. #10111 -* [ENHANCEMENT] TSDB: Avoid allocations when popping from the intersected postings heap. #10092 -* [ENHANCEMENT] TSDB: Make chunk writing non-blocking, avoiding latency spikes in remote-write. #10051 -* [ENHANCEMENT] TSDB: Improve label matching performance. #9907 -* [ENHANCEMENT] UI: Optimize the service discovery page and add a search bar. #10131 -* [ENHANCEMENT] UI: Optimize the target page and add a search bar. #10103 -* [BUGFIX] Promtool: Make exit codes more consistent. #9861 -* [BUGFIX] Promtool: Fix flakiness of rule testing. #8818 -* [BUGFIX] Remote-write: Update `prometheus_remote_storage_queue_highest_sent_timestamp_seconds` metric when write irrecoverably fails. #10102 -* [BUGFIX] Storage: Avoid panic in `BufferedSeriesIterator`. #9945 -* [BUGFIX] TSDB: CompactBlockMetas should produce correct mint/maxt for overlapping blocks. #10108 -* [BUGFIX] TSDB: Fix logging of exemplar storage size. #9938 -* [BUGFIX] UI: Fix overlapping click targets for the alert state checkboxes. #10136 -* [BUGFIX] UI: Fix _Unhealthy_ filter on target page to actually display only _Unhealthy_ targets. #10103 -* [BUGFIX] UI: Fix autocompletion when expression is empty. #10053 -* [BUGFIX] TSDB: Fix deadlock from simultaneous GC and write. #10166 - -## 2.32.1 / 2021-12-17 - -* [BUGFIX] Scrape: Fix reporting metrics when sample limit is reached during the report. #9996 -* [BUGFIX] Scrape: Ensure that scrape interval and scrape timeout are always set. #10023 -* [BUGFIX] TSDB: Expose and fix bug in iterators' `Seek()` method. #10030 - -## 2.32.0 / 2021-12-09 - -This release introduces the Prometheus Agent, a new mode of operation for -Prometheus optimized for remote-write only scenarios. In this mode, Prometheus -does not generate blocks on the local filesystem and is not queryable locally. -Enable with `--enable-feature=agent`. - -Learn more about the Prometheus Agent in our [blog post](https://prometheus.io/blog/2021/11/16/agent/). - -* [CHANGE] Remote-write: Change default max retry time from 100ms to 5 seconds. #9634 -* [FEATURE] Agent: New mode of operation optimized for remote-write only scenarios, without local storage. Enable with `--enable-feature=agent`. #8785 #9851 #9664 #9939 #9941 #9943 -* [FEATURE] Promtool: Add `promtool check service-discovery` command. #8970 -* [FEATURE] UI: Add search in metrics dropdown. #9629 -* [FEATURE] Templates: Add parseDuration to template functions. #8817 -* [ENHANCEMENT] Promtool: Improve test output. #8064 -* [ENHANCEMENT] PromQL: Use kahan summation for better numerical stability. #9588 -* [ENHANCEMENT] Remote-write: Reuse memory for marshalling. #9412 -* [ENHANCEMENT] Scrape: Add `scrape_body_size_bytes` scrape metric behind the `--enable-feature=extra-scrape-metrics` flag. #9569 -* [ENHANCEMENT] TSDB: Add windows arm64 support. #9703 -* [ENHANCEMENT] TSDB: Optimize query by skipping unneeded sorting in TSDB. #9673 -* [ENHANCEMENT] Templates: Support int and uint as datatypes for template formatting. #9680 -* [ENHANCEMENT] UI: Prefer `rate` over `rad`, `delta` over `deg`, and `count` over `cos` in autocomplete. #9688 -* [ENHANCEMENT] Linode SD: Tune API request page sizes. #9779 -* [BUGFIX] TSDB: Add more size checks when writing individual sections in the index. #9710 -* [BUGFIX] PromQL: Make `deriv()` return zero values for constant series. #9728 -* [BUGFIX] TSDB: Fix panic when checkpoint directory is empty. #9687 -* [BUGFIX] TSDB: Fix panic, out of order chunks, and race warning during WAL replay. #9856 -* [BUGFIX] UI: Correctly render links for targets with IPv6 addresses that contain a Zone ID. #9853 -* [BUGFIX] Promtool: Fix checking of `authorization.credentials_file` and `bearer_token_file` fields. #9883 -* [BUGFIX] Uyuni SD: Fix null pointer exception during initialization. #9924 #9950 -* [BUGFIX] TSDB: Fix queries after a failed snapshot replay. #9980 - -## 2.31.2 / 2021-12-09 - -* [BUGFIX] TSDB: Fix queries after a failed snapshot replay. #9980 - -## 2.31.1 / 2021-11-05 - -* [BUGFIX] SD: Fix a panic when the experimental discovery manager receives - targets during a reload. #9656 - -## 2.31.0 / 2021-11-02 - -* [CHANGE] UI: Remove standard PromQL editor in favour of the codemirror-based editor. #9452 -* [FEATURE] PromQL: Add trigonometric functions and `atan2` binary operator. #9239 #9248 #9515 -* [FEATURE] Remote: Add support for exemplar in the remote write receiver endpoint. #9319 #9414 -* [FEATURE] SD: Add PuppetDB service discovery. #8883 -* [FEATURE] SD: Add Uyuni service discovery. #8190 -* [FEATURE] Web: Add support for security-related HTTP headers. #9546 -* [ENHANCEMENT] Azure SD: Add `proxy_url`, `follow_redirects`, `tls_config`. #9267 -* [ENHANCEMENT] Backfill: Add `--max-block-duration` in `promtool create-blocks-from rules`. #9511 -* [ENHANCEMENT] Config: Print human-readable sizes with unit instead of raw numbers. #9361 -* [ENHANCEMENT] HTTP: Re-enable HTTP/2. #9398 -* [ENHANCEMENT] Kubernetes SD: Warn user if number of endpoints exceeds limit. #9467 -* [ENHANCEMENT] OAuth2: Add TLS configuration to token requests. #9550 -* [ENHANCEMENT] PromQL: Several optimizations. #9365 #9360 #9362 #9552 -* [ENHANCEMENT] PromQL: Make aggregations deterministic in instant queries. #9459 -* [ENHANCEMENT] Rules: Add the ability to limit number of alerts or series. #9260 #9541 -* [ENHANCEMENT] SD: Experimental discovery manager to avoid restarts upon reload. Disabled by default, enable with flag `--enable-feature=new-service-discovery-manager`. #9349 #9537 -* [ENHANCEMENT] UI: Debounce timerange setting changes. #9359 -* [BUGFIX] Backfill: Apply rule labels after query labels. #9421 -* [BUGFIX] Scrape: Resolve conflicts between multiple exported label prefixes. #9479 #9518 -* [BUGFIX] Scrape: Restart scrape loops when `__scrape_interval__` is changed. #9551 -* [BUGFIX] TSDB: Fix memory leak in samples deletion. #9151 -* [BUGFIX] UI: Use consistent margin-bottom for all alert kinds. #9318 - -## 2.30.4 / 2021-12-09 - -* [BUGFIX] TSDB: Fix queries after a failed snapshot replay. #9980 - -## 2.30.3 / 2021-10-05 - -* [BUGFIX] TSDB: Fix panic on failed snapshot replay. #9438 -* [BUGFIX] TSDB: Don't fail snapshot replay with exemplar storage disabled when the snapshot contains exemplars. #9438 - -## 2.30.2 / 2021-10-01 - -* [BUGFIX] TSDB: Don't error on overlapping m-mapped chunks during WAL replay. #9381 - -## 2.30.1 / 2021-09-28 - -* [ENHANCEMENT] Remote Write: Redact remote write URL when used for metric label. #9383 -* [ENHANCEMENT] UI: Redact remote write URL and proxy URL passwords in the `/config` page. #9408 -* [BUGFIX] promtool rules backfill: Prevent creation of data before the start time. #9339 -* [BUGFIX] promtool rules backfill: Do not query after the end time. #9340 -* [BUGFIX] Azure SD: Fix panic when no computername is set. #9387 - -## 2.30.0 / 2021-09-14 - -* [FEATURE] **experimental** TSDB: Snapshot in-memory chunks on shutdown for faster restarts. Behind `--enable-feature=memory-snapshot-on-shutdown` flag. #7229 -* [FEATURE] **experimental** Scrape: Configure scrape interval and scrape timeout via relabeling using `__scrape_interval__` and `__scrape_timeout__` labels respectively. #8911 -* [FEATURE] Scrape: Add `scrape_timeout_seconds` and `scrape_sample_limit` metric. Behind `--enable-feature=extra-scrape-metrics` flag to avoid additional cardinality by default. #9247 #9295 -* [ENHANCEMENT] Scrape: Add `--scrape.timestamp-tolerance` flag to adjust scrape timestamp tolerance when enabled via `--scrape.adjust-timestamps`. #9283 -* [ENHANCEMENT] Remote Write: Improve throughput when sending exemplars. #8921 -* [ENHANCEMENT] TSDB: Optimise WAL loading by removing extra map and caching min-time #9160 -* [ENHANCEMENT] promtool: Speed up checking for duplicate rules. #9262/#9306 -* [ENHANCEMENT] Scrape: Reduce allocations when parsing the metrics. #9299 -* [ENHANCEMENT] docker_sd: Support host network mode #9125 -* [BUGFIX] Exemplars: Fix panic when resizing exemplar storage from 0 to a non-zero size. #9286 -* [BUGFIX] TSDB: Correctly decrement `prometheus_tsdb_head_active_appenders` when the append has no samples. #9230 -* [BUGFIX] promtool rules backfill: Return 1 if backfill was unsuccessful. #9303 -* [BUGFIX] promtool rules backfill: Avoid creation of overlapping blocks. #9324 -* [BUGFIX] config: Fix a panic when reloading configuration with a `null` relabel action. #9224 - -## 2.29.2 / 2021-08-27 - -* [BUGFIX] Fix Kubernetes SD failing to discover Ingress in Kubernetes v1.22. #9205 -* [BUGFIX] Fix data race in loading write-ahead-log (WAL). #9259 - -## 2.29.1 / 2021-08-11 - -* [BUGFIX] tsdb: align atomically accessed int64 to prevent panic in 32-bit - archs. #9192 - -## 2.29.0 / 2021-08-11 - -Note for macOS users: Due to [changes in the upcoming Go 1.17](https://tip.golang.org/doc/go1.17#darwin), -this is the last Prometheus release that supports macOS 10.12 Sierra. - -* [CHANGE] Promote `--storage.tsdb.allow-overlapping-blocks` flag to stable. #9117 -* [CHANGE] Promote `--storage.tsdb.retention.size` flag to stable. #9004 -* [FEATURE] Add Kuma service discovery. #8844 -* [FEATURE] Add `present_over_time` PromQL function. #9097 -* [FEATURE] Allow configuring exemplar storage via file and make it reloadable. #8974 -* [FEATURE] UI: Allow selecting time range with mouse drag. #8977 -* [FEATURE] promtool: Add feature flags flag `--enable-feature`. #8958 -* [FEATURE] promtool: Add file_sd file validation. #8950 -* [ENHANCEMENT] Reduce blocking of outgoing remote write requests from series garbage collection. #9109 -* [ENHANCEMENT] Improve write-ahead-log decoding performance. #9106 -* [ENHANCEMENT] Improve append performance in TSDB by reducing mutexes usage. #9061 -* [ENHANCEMENT] Allow configuring `max_samples_per_send` for remote write metadata. #8959 -* [ENHANCEMENT] Add `__meta_gce_interface_ipv4_` meta label to GCE discovery. #8978 -* [ENHANCEMENT] Add `__meta_ec2_availability_zone_id` meta label to EC2 discovery. #8896 -* [ENHANCEMENT] Add `__meta_azure_machine_computer_name` meta label to Azure discovery. #9112 -* [ENHANCEMENT] Add `__meta_hetzner_hcloud_labelpresent_` meta label to Hetzner discovery. #9028 -* [ENHANCEMENT] promtool: Add compaction efficiency to `promtool tsdb analyze` reports. #8940 -* [ENHANCEMENT] promtool: Allow configuring max block duration for backfilling via `--max-block-duration` flag. #8919 -* [ENHANCEMENT] UI: Add sorting and filtering to flags page. #8988 -* [ENHANCEMENT] UI: Improve alerts page rendering performance. #9005 -* [BUGFIX] Log when total symbol size exceeds 2^32 bytes, causing compaction to fail, and skip compaction. #9104 -* [BUGFIX] Fix incorrect `target_limit` reloading of zero value. #9120 -* [BUGFIX] Fix head GC and pending readers race condition. #9081 -* [BUGFIX] Fix timestamp handling in OpenMetrics parser. #9008 -* [BUGFIX] Fix potential duplicate metrics in `/federate` endpoint when specifying multiple matchers. #8885 -* [BUGFIX] Fix server configuration and validation for authentication via client cert. #9123 -* [BUGFIX] Allow `start` and `end` again as label names in PromQL queries. They were disallowed since the introduction of @ timestamp feature. #9119 - -## 2.28.1 / 2021-07-01 - -* [BUGFIX]: HTTP SD: Allow `charset` specification in `Content-Type` header. #8981 -* [BUGFIX]: HTTP SD: Fix handling of disappeared target groups. #9019 -* [BUGFIX]: Fix incorrect log-level handling after moving to go-kit/log. #9021 - -## 2.28.0 / 2021-06-21 - -* [CHANGE] UI: Make the new experimental PromQL editor the default. #8925 -* [FEATURE] Linode SD: Add Linode service discovery. #8846 -* [FEATURE] HTTP SD: Add generic HTTP-based service discovery. #8839 -* [FEATURE] Kubernetes SD: Allow configuring API Server access via a kubeconfig file. #8811 -* [FEATURE] UI: Add exemplar display support to the graphing interface. #8832 #8945 #8929 -* [FEATURE] Consul SD: Add namespace support for Consul Enterprise. #8900 -* [ENHANCEMENT] Promtool: Allow silencing output when importing / backfilling data. #8917 -* [ENHANCEMENT] Consul SD: Support reading tokens from file. #8926 -* [ENHANCEMENT] Rules: Add a new `.ExternalURL` alert field templating variable, containing the external URL of the Prometheus server. #8878 -* [ENHANCEMENT] Scrape: Add experimental `body_size_limit` scrape configuration setting to limit the allowed response body size for target scrapes. #8833 #8886 -* [ENHANCEMENT] Kubernetes SD: Add ingress class name label for ingress discovery. #8916 -* [ENHANCEMENT] UI: Show a startup screen with progress bar when the TSDB is not ready yet. #8662 #8908 #8909 #8946 -* [ENHANCEMENT] SD: Add a target creation failure counter `prometheus_target_sync_failed_total` and improve target creation failure handling. #8786 -* [ENHANCEMENT] TSDB: Improve validation of exemplar label set length. #8816 -* [ENHANCEMENT] TSDB: Add a `prometheus_tsdb_clean_start` metric that indicates whether a TSDB lockfile from a previous run still existed upon startup. #8824 -* [BUGFIX] UI: In the experimental PromQL editor, fix autocompletion and parsing for special float values and improve series metadata fetching. #8856 -* [BUGFIX] TSDB: When merging chunks, split resulting chunks if they would contain more than the maximum of 120 samples. #8582 -* [BUGFIX] SD: Fix the computation of the `prometheus_sd_discovered_targets` metric when using multiple service discoveries. #8828 - -## 2.27.1 / 2021-05-18 - -This release contains a bug fix for a security issue in the API endpoint. An -attacker can craft a special URL that redirects a user to any endpoint via an -HTTP 302 response. See the [security advisory][GHSA-vx57-7f4q-fpc7] for more details. - -[GHSA-vx57-7f4q-fpc7]:https://github.com/prometheus/prometheus/security/advisories/GHSA-vx57-7f4q-fpc7 - -This vulnerability has been reported by Aaron Devaney from MDSec. - -* [BUGFIX] SECURITY: Fix arbitrary redirects under the /new endpoint (CVE-2021-29622) - -## 2.27.0 / 2021-05-12 - -* [CHANGE] Remote write: Metric `prometheus_remote_storage_samples_bytes_total` renamed to `prometheus_remote_storage_bytes_total`. #8296 -* [FEATURE] Promtool: Retroactive rule evaluation functionality. #7675 -* [FEATURE] Configuration: Environment variable expansion for external labels. Behind `--enable-feature=expand-external-labels` flag. #8649 -* [FEATURE] TSDB: Add a flag(`--storage.tsdb.max-block-chunk-segment-size`) to control the max chunks file size of the blocks for small Prometheus instances. #8478 -* [FEATURE] UI: Add a dark theme. #8604 -* [FEATURE] AWS Lightsail Discovery: Add AWS Lightsail Discovery. #8693 -* [FEATURE] Docker Discovery: Add Docker Service Discovery. #8629 -* [FEATURE] OAuth: Allow OAuth 2.0 to be used anywhere an HTTP client is used. #8761 -* [FEATURE] Remote Write: Send exemplars via remote write. Experimental and disabled by default. #8296 -* [ENHANCEMENT] Digital Ocean Discovery: Add `__meta_digitalocean_vpc` label. #8642 -* [ENHANCEMENT] Scaleway Discovery: Read Scaleway secret from a file. #8643 -* [ENHANCEMENT] Scrape: Add configurable limits for label size and count. #8777 -* [ENHANCEMENT] UI: Add 16w and 26w time range steps. #8656 -* [ENHANCEMENT] Templating: Enable parsing strings in `humanize` functions. #8682 -* [BUGFIX] UI: Provide errors instead of blank page on TSDB Status Page. #8654 #8659 -* [BUGFIX] TSDB: Do not panic when writing very large records to the WAL. #8790 -* [BUGFIX] TSDB: Avoid panic when mmapped memory is referenced after the file is closed. #8723 -* [BUGFIX] Scaleway Discovery: Fix nil pointer dereference. #8737 -* [BUGFIX] Consul Discovery: Restart no longer required after config update with no targets. #8766 - -## 2.26.0 / 2021-03-31 - -Prometheus is now built and supporting Go 1.16 (#8544). This reverts the memory release pattern added in Go 1.12. This makes common RSS usage metrics showing more accurate number for actual memory used by Prometheus. You can read more details [here](https://www.bwplotka.dev/2019/golang-memory-monitoring/). - -Note that from this release Prometheus is using Alertmanager v2 by default. - -* [CHANGE] Alerting: Using Alertmanager v2 API by default. #8626 -* [CHANGE] Prometheus/Promtool: As agreed on dev summit, binaries are now printing help and usage to stdout instead of stderr. #8542 -* [FEATURE] Remote: Add support for AWS SigV4 auth method for remote_write. #8509 -* [FEATURE] Scaleway Discovery: Add Scaleway Service Discovery. #8555 -* [FEATURE] PromQL: Allow negative offsets. Behind `--enable-feature=promql-negative-offset` flag. #8487 -* [FEATURE] **experimental** Exemplars: Add in-memory storage for exemplars. Behind `--enable-feature=exemplar-storage` flag. #6635 -* [FEATURE] UI: Add advanced auto-completion, syntax highlighting and linting to graph page query input. #8634 -* [ENHANCEMENT] Digital Ocean Discovery: Add `__meta_digitalocean_image` label. #8497 -* [ENHANCEMENT] PromQL: Add `last_over_time`, `sgn`, `clamp` functions. #8457 -* [ENHANCEMENT] Scrape: Add support for specifying type of Authorization header credentials with Bearer by default. #8512 -* [ENHANCEMENT] Scrape: Add `follow_redirects` option to scrape configuration. #8546 -* [ENHANCEMENT] Remote: Allow retries on HTTP 429 response code for remote_write. Disabled by default. See [configuration docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) for details. #8237 #8477 -* [ENHANCEMENT] Remote: Allow configuring custom headers for remote_read. See [configuration docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_read) for details. #8516 -* [ENHANCEMENT] UI: Hitting Enter now triggers new query. #8581 -* [ENHANCEMENT] UI: Better handling of long rule and names on the `/rules` and `/targets` pages. #8608 #8609 -* [ENHANCEMENT] UI: Add collapse/expand all button on the `/targets` page. #8486 -* [BUGFIX] TSDB: Eager deletion of removable blocks on every compaction, saving disk peak space usage. #8007 -* [BUGFIX] PromQL: Fix parser support for special characters like`炬`. #8517 -* [BUGFIX] Rules: Update rule health for append/commit fails. #8619 - -## 2.25.2 / 2021-03-16 - -* [BUGFIX] Fix the ingestion of scrapes when the wall clock changes, e.g. on suspend. #8601 - -## 2.25.1 / 2021-03-14 - -* [BUGFIX] Fix a crash in `promtool` when a subquery with default resolution is used. #8569 -* [BUGFIX] Fix a bug that could return duplicate datapoints in queries. #8591 -* [BUGFIX] Fix crashes with arm64 when compiled with go1.16. #8593 - -## 2.25.0 / 2021-02-17 - -This release includes a new `--enable-feature=` flag that enables -experimental features. Such features might be changed or removed in the future. - -In the next minor release (2.26), Prometheus will use the Alertmanager API v2. -It will be done by defaulting `alertmanager_config.api_version` to `v2`. -Alertmanager API v2 was released in Alertmanager v0.16.0 (released in January -2019). - -* [FEATURE] **experimental** API: Accept remote_write requests. Behind the --enable-feature=remote-write-receiver flag. #8424 -* [FEATURE] **experimental** PromQL: Add `@ ` modifier. Behind the --enable-feature=promql-at-modifier flag. #8121 #8436 #8425 -* [ENHANCEMENT] Add optional name property to testgroup for better test failure output. #8440 -* [ENHANCEMENT] Add warnings into React Panel on the Graph page. #8427 -* [ENHANCEMENT] TSDB: Increase the number of buckets for the compaction duration metric. #8342 -* [ENHANCEMENT] Remote: Allow passing along custom remote_write HTTP headers. #8416 -* [ENHANCEMENT] Mixins: Scope grafana configuration. #8332 -* [ENHANCEMENT] Kubernetes SD: Add endpoint labels metadata. #8273 -* [ENHANCEMENT] UI: Expose total number of label pairs in head in TSDB stats page. #8343 -* [ENHANCEMENT] TSDB: Reload blocks every minute, to detect new blocks and enforce retention more often. #8340 -* [BUGFIX] API: Fix global URL when external address has no port. #8359 -* [BUGFIX] Backfill: Fix error message handling. #8432 -* [BUGFIX] Backfill: Fix "add sample: out of bounds" error when series span an entire block. #8476 -* [BUGFIX] Deprecate unused flag --alertmanager.timeout. #8407 -* [BUGFIX] Mixins: Support remote-write metrics renamed in v2.23 in alerts. #8423 -* [BUGFIX] Remote: Fix garbage collection of dropped series in remote write. #8387 -* [BUGFIX] Remote: Log recoverable remote write errors as warnings. #8412 -* [BUGFIX] TSDB: Remove pre-2.21 temporary blocks on start. #8353. -* [BUGFIX] UI: Fix duplicated keys on /targets page. #8456 -* [BUGFIX] UI: Fix label name leak into class name. #8459 - -## 2.24.1 / 2021-01-20 - -* [ENHANCEMENT] Cache basic authentication results to significantly improve performance of HTTP endpoints (via an update of prometheus/exporter-toolkit). -* [BUGFIX] Prevent user enumeration by timing requests sent to authenticated HTTP endpoints (via an update of prometheus/exporter-toolkit). - -## 2.24.0 / 2021-01-06 - -* [FEATURE] Add TLS and basic authentication to HTTP endpoints. #8316 -* [FEATURE] promtool: Add `check web-config` subcommand to check web config files. #8319 -* [FEATURE] promtool: Add `tsdb create-blocks-from openmetrics` subcommand to backfill metrics data from an OpenMetrics file. #8084 -* [ENHANCEMENT] HTTP API: Fast-fail queries with only empty matchers. #8288 -* [ENHANCEMENT] HTTP API: Support matchers for labels API. #8301 -* [ENHANCEMENT] promtool: Improve checking of URLs passed on the command line. #7956 -* [ENHANCEMENT] SD: Expose IPv6 as a label in EC2 SD. #7086 -* [ENHANCEMENT] SD: Reuse EC2 client, reducing frequency of requesting credentials. #8311 -* [ENHANCEMENT] TSDB: Add logging when compaction takes more than the block time range. #8151 -* [ENHANCEMENT] TSDB: Avoid unnecessary GC runs after compaction. #8276 -* [BUGFIX] HTTP API: Avoid double-closing of channel when quitting multiple times via HTTP. #8242 -* [BUGFIX] SD: Ignore CNAME records in DNS SD to avoid spurious `Invalid SRV record` warnings. #8216 -* [BUGFIX] SD: Avoid config error triggered by valid label selectors in Kubernetes SD. #8285 - -## 2.23.0 / 2020-11-26 - -* [CHANGE] UI: Make the React UI default. #8142 -* [CHANGE] Remote write: The following metrics were removed/renamed in remote write. #6815 - * `prometheus_remote_storage_succeeded_samples_total` was removed and `prometheus_remote_storage_samples_total` was introduced for all the samples attempted to send. - * `prometheus_remote_storage_sent_bytes_total` was removed and replaced with `prometheus_remote_storage_samples_bytes_total` and `prometheus_remote_storage_metadata_bytes_total`. - * `prometheus_remote_storage_failed_samples_total` -> `prometheus_remote_storage_samples_failed_total` . - * `prometheus_remote_storage_retried_samples_total` -> `prometheus_remote_storage_samples_retried_total`. - * `prometheus_remote_storage_dropped_samples_total` -> `prometheus_remote_storage_samples_dropped_total`. - * `prometheus_remote_storage_pending_samples` -> `prometheus_remote_storage_samples_pending`. -* [CHANGE] Remote: Do not collect non-initialized timestamp metrics. #8060 -* [FEATURE] [EXPERIMENTAL] Remote write: Allow metric metadata to be propagated via remote write. The following new metrics were introduced: `prometheus_remote_storage_metadata_total`, `prometheus_remote_storage_metadata_failed_total`, `prometheus_remote_storage_metadata_retried_total`, `prometheus_remote_storage_metadata_bytes_total`. #6815 -* [ENHANCEMENT] Remote write: Added a metric `prometheus_remote_storage_max_samples_per_send` for remote write. #8102 -* [ENHANCEMENT] TSDB: Make the snapshot directory name always the same length. #8138 -* [ENHANCEMENT] TSDB: Create a checkpoint only once at the end of all head compactions. #8067 -* [ENHANCEMENT] TSDB: Avoid Series API from hitting the chunks. #8050 -* [ENHANCEMENT] TSDB: Cache label name and last value when adding series during compactions making compactions faster. #8192 -* [ENHANCEMENT] PromQL: Improved performance of Hash method making queries a bit faster. #8025 -* [ENHANCEMENT] promtool: `tsdb list` now prints block sizes. #7993 -* [ENHANCEMENT] promtool: Calculate mint and maxt per test avoiding unnecessary calculations. #8096 -* [ENHANCEMENT] SD: Add filtering of services to Docker Swarm SD. #8074 -* [BUGFIX] React UI: Fix button display when there are no panels. #8155 -* [BUGFIX] PromQL: Fix timestamp() method for vector selector inside parenthesis. #8164 -* [BUGFIX] PromQL: Don't include rendered expression on PromQL parse errors. #8177 -* [BUGFIX] web: Fix panic with double close() of channel on calling `/-/quit/`. #8166 -* [BUGFIX] TSDB: Fixed WAL corruption on partial writes within a page causing `invalid checksum` error on WAL replay. #8125 -* [BUGFIX] Update config metrics `prometheus_config_last_reload_successful` and `prometheus_config_last_reload_success_timestamp_seconds` right after initial validation before starting TSDB. -* [BUGFIX] promtool: Correctly detect duplicate label names in exposition. - -## 2.22.2 / 2020-11-16 - -* [BUGFIX] Fix race condition in syncing/stopping/reloading scrapers. #8176 - -## 2.22.1 / 2020-11-03 - -* [BUGFIX] Fix potential "mmap: invalid argument" errors in loading the head chunks, after an unclean shutdown, by performing read repairs. #8061 -* [BUGFIX] Fix serving metrics and API when reloading scrape config. #8104 -* [BUGFIX] Fix head chunk size calculation for size based retention. #8139 - -## 2.22.0 / 2020-10-07 - -As announced in the 2.21.0 release notes, the experimental gRPC API v2 has been -removed. - -* [CHANGE] web: Remove APIv2. #7935 -* [ENHANCEMENT] React UI: Implement missing TSDB head stats section. #7876 -* [ENHANCEMENT] UI: Add Collapse all button to targets page. #6957 -* [ENHANCEMENT] UI: Clarify alert state toggle via checkbox icon. #7936 -* [ENHANCEMENT] Add `rule_group_last_evaluation_samples` and `prometheus_tsdb_data_replay_duration_seconds` metrics. #7737 #7977 -* [ENHANCEMENT] Gracefully handle unknown WAL record types. #8004 -* [ENHANCEMENT] Issue a warning for 64 bit systems running 32 bit binaries. #8012 -* [BUGFIX] Adjust scrape timestamps to align them to the intended schedule, effectively reducing block size. Workaround for a regression in go1.14+. #7976 -* [BUGFIX] promtool: Ensure alert rules are marked as restored in unit tests. #7661 -* [BUGFIX] Eureka: Fix service discovery when compiled in 32-bit. #7964 -* [BUGFIX] Don't do literal regex matching optimisation when case insensitive. #8013 -* [BUGFIX] Fix classic UI sometimes running queries for instant query when in range query mode. #7984 - -## 2.21.0 / 2020-09-11 - -This release is built with Go 1.15, which deprecates [X.509 CommonName](https://golang.org/doc/go1.15#commonname) -in TLS certificates validation. - -In the unlikely case that you use the gRPC API v2 (which is limited to TSDB -admin commands), please note that we will remove this experimental API in the -next minor release 2.22. - -* [CHANGE] Disable HTTP/2 because of concerns with the Go HTTP/2 client. #7588 #7701 -* [CHANGE] PromQL: `query_log_file` path is now relative to the config file. #7701 -* [CHANGE] Promtool: Replace the tsdb command line tool by a promtool tsdb subcommand. #6088 -* [CHANGE] Rules: Label `rule_group_iterations` metric with group name. #7823 -* [FEATURE] Eureka SD: New service discovery. #3369 -* [FEATURE] Hetzner SD: New service discovery. #7822 -* [FEATURE] Kubernetes SD: Support Kubernetes EndpointSlices. #6838 -* [FEATURE] Scrape: Add per scrape-config targets limit. #7554 -* [ENHANCEMENT] Support composite durations in PromQL, config and UI, e.g. 1h30m. #7713 #7833 -* [ENHANCEMENT] DNS SD: Add SRV record target and port meta labels. #7678 -* [ENHANCEMENT] Docker Swarm SD: Support tasks and service without published ports. #7686 -* [ENHANCEMENT] PromQL: Reduce the amount of data queried by remote read when a subquery has an offset. #7667 -* [ENHANCEMENT] Promtool: Add `--time` option to query instant command. #7829 -* [ENHANCEMENT] UI: Respect the `--web.page-title` parameter in the React UI. #7607 -* [ENHANCEMENT] UI: Add duration, labels, annotations to alerts page in the React UI. #7605 -* [ENHANCEMENT] UI: Add duration on the React UI rules page, hide annotation and labels if empty. #7606 -* [BUGFIX] API: Deduplicate series in /api/v1/series. #7862 -* [BUGFIX] PromQL: Drop metric name in bool comparison between two instant vectors. #7819 -* [BUGFIX] PromQL: Exit with an error when time parameters can't be parsed. #7505 -* [BUGFIX] Remote read: Re-add accidentally removed tracing for remote-read requests. #7916 -* [BUGFIX] Rules: Detect extra fields in rule files. #7767 -* [BUGFIX] Rules: Disallow overwriting the metric name in the `labels` section of recording rules. #7787 -* [BUGFIX] Rules: Keep evaluation timestamp across reloads. #7775 -* [BUGFIX] Scrape: Do not stop scrapes in progress during reload. #7752 -* [BUGFIX] TSDB: Fix `chunks.HeadReadWriter: maxt of the files are not set` error. #7856 -* [BUGFIX] TSDB: Delete blocks atomically to prevent corruption when there is a panic/crash during deletion. #7772 -* [BUGFIX] Triton SD: Fix a panic when triton_sd_config is nil. #7671 -* [BUGFIX] UI: Fix react UI bug with series going on and off. #7804 -* [BUGFIX] UI: Fix styling bug for target labels with special names in React UI. #7902 -* [BUGFIX] Web: Stop CMUX and GRPC servers even with stale connections, preventing the server to stop on SIGTERM. #7810 - -## 2.20.1 / 2020-08-05 - -* [BUGFIX] SD: Reduce the Consul watch timeout to 2m and adjust the request timeout accordingly. #7724 - -## 2.20.0 / 2020-07-22 - -This release changes WAL compression from opt-in to default. WAL compression will prevent a downgrade to v2.10 or earlier without deleting the WAL. Disable WAL compression explicitly by setting the command line flag `--no-storage.tsdb.wal-compression` if you require downgrading to v2.10 or earlier. - -* [CHANGE] promtool: Changed rule numbering from 0-based to 1-based when reporting rule errors. #7495 -* [CHANGE] Remote read: Added `prometheus_remote_storage_read_queries_total` counter and `prometheus_remote_storage_read_request_duration_seconds` histogram, removed `prometheus_remote_storage_remote_read_queries_total` counter. -* [CHANGE] Remote write: Added buckets for longer durations to `prometheus_remote_storage_sent_batch_duration_seconds` histogram. -* [CHANGE] TSDB: WAL compression is enabled by default. #7410 -* [FEATURE] PromQL: Added `group()` aggregator. #7480 -* [FEATURE] SD: Added Docker Swarm SD. #7420 -* [FEATURE] SD: Added DigitalOcean SD. #7407 -* [FEATURE] SD: Added Openstack config option to query alternative endpoints. #7494 -* [ENHANCEMENT] Configuration: Exit early on invalid config file and signal it with exit code 2. #7399 -* [ENHANCEMENT] PromQL: `without` is now a valid metric identifier. #7533 -* [ENHANCEMENT] PromQL: Optimized regex label matching for literals within the pattern or as prefix/suffix. #7453 #7503 -* [ENHANCEMENT] promtool: Added time range parameters for labels API in promtool. #7463 -* [ENHANCEMENT] Remote write: Include samples waiting in channel in pending samples metric. Log number of dropped samples on hard shutdown. #7335 -* [ENHANCEMENT] Scrape: Ingest synthetic scrape report metrics atomically with the corresponding scraped metrics. #7562 -* [ENHANCEMENT] SD: Reduce timeouts for Openstack SD. #7507 -* [ENHANCEMENT] SD: Use 10m timeout for Consul watches. #7423 -* [ENHANCEMENT] SD: Added AMI meta label for EC2 SD. #7386 -* [ENHANCEMENT] TSDB: Increment WAL corruption metric also on WAL corruption during checkpointing. #7491 -* [ENHANCEMENT] TSDB: Improved query performance for high-cardinality labels. #7448 -* [ENHANCEMENT] UI: Display dates as well as timestamps in status page. #7544 -* [ENHANCEMENT] UI: Improved scrolling when following hash-fragment links. #7456 -* [ENHANCEMENT] UI: React UI renders numbers in alerts in a more human-readable way. #7426 -* [BUGFIX] API: Fixed error status code in the query API. #7435 -* [BUGFIX] PromQL: Fixed `avg` and `avg_over_time` for NaN, Inf, and float64 overflows. #7346 -* [BUGFIX] PromQL: Fixed off-by-one error in `histogram_quantile`. #7393 -* [BUGFIX] promtool: Support extended durations in rules unit tests. #6297 -* [BUGFIX] Scrape: Fix undercounting for `scrape_samples_post_metric_relabeling` in case of errors. #7342 -* [BUGFIX] TSDB: Don't panic on WAL corruptions. #7550 -* [BUGFIX] TSDB: Avoid leaving behind empty files in `chunks_head`, causing startup failures. #7573 -* [BUGFIX] TSDB: Fixed race between compact (gc, populate) and head append causing unknown symbol error. #7560 -* [BUGFIX] TSDB: Fixed unknown symbol error during head compaction. #7526 -* [BUGFIX] TSDB: Fixed panic during TSDB metric registration. #7501 -* [BUGFIX] TSDB: Fixed `--limit` command line flag in `tsdb` tool. #7430 - -## 2.19.3 / 2020-07-24 - -* [BUGFIX] TSDB: Don't panic on WAL corruptions. #7550 -* [BUGFIX] TSDB: Avoid leaving behind empty files in chunks_head, causing startup failures. #7573 - -## 2.19.2 / 2020-06-26 - -* [BUGFIX] Remote Write: Fix panic when reloading config with modified queue parameters. #7452 - -## 2.19.1 / 2020-06-18 - -* [BUGFIX] TSDB: Fix m-map file truncation leading to unsequential files. #7414 - -## 2.19.0 / 2020-06-09 - -* [FEATURE] TSDB: Memory-map full chunks of Head (in-memory) block from disk. This reduces memory footprint and makes restarts faster. #6679 -* [ENHANCEMENT] Discovery: Added discovery support for Triton global zones. #7250 -* [ENHANCEMENT] Increased alert resend delay to be more tolerant towards failures. #7228 -* [ENHANCEMENT] Remote Read: Added `prometheus_remote_storage_remote_read_queries_total` counter to count total number of remote read queries. #7328 -* [ENHANCEMEMT] Added time range parameters for label names and label values API. #7288 -* [ENHANCEMENT] TSDB: Reduced contention in isolation for high load. #7332 -* [BUGFIX] PromQL: Eliminated collision while checking for duplicate labels. #7058 -* [BUGFIX] React UI: Don't null out data when clicking on the current tab. #7243 -* [BUGFIX] PromQL: Correctly track number of samples for a query. #7307 -* [BUGFIX] PromQL: Return NaN when histogram buckets have 0 observations. #7318 - -## 2.18.2 / 2020-06-09 - -* [BUGFIX] TSDB: Fix incorrect query results when using Prometheus with remote reads configured #7361 - -## 2.18.1 / 2020-05-07 - -* [BUGFIX] TSDB: Fixed snapshot API. #7217 - -## 2.18.0 / 2020-05-05 - -* [CHANGE] Federation: Only use local TSDB for federation (ignore remote read). #7096 -* [CHANGE] Rules: `rule_evaluations_total` and `rule_evaluation_failures_total` have a `rule_group` label now. #7094 -* [FEATURE] Tracing: Added experimental Jaeger support #7148 -* [ENHANCEMENT] TSDB: Significantly reduce WAL size kept around after a block cut. #7098 -* [ENHANCEMENT] Discovery: Add `architecture` meta label for EC2. #7000 -* [BUGFIX] UI: Fixed wrong MinTime reported by /status. #7182 -* [BUGFIX] React UI: Fixed multiselect legend on OSX. #6880 -* [BUGFIX] Remote Write: Fixed blocked resharding edge case. #7122 -* [BUGFIX] Remote Write: Fixed remote write not updating on relabel configs change. #7073 - -## 2.17.2 / 2020-04-20 - -* [BUGFIX] Federation: Register federation metrics #7081 -* [BUGFIX] PromQL: Fix panic in parser error handling #7132 -* [BUGFIX] Rules: Fix reloads hanging when deleting a rule group that is being evaluated #7138 -* [BUGFIX] TSDB: Fix a memory leak when prometheus starts with an empty TSDB WAL #7135 -* [BUGFIX] TSDB: Make isolation more robust to panics in web handlers #7129 #7136 - -## 2.17.1 / 2020-03-26 - -* [BUGFIX] TSDB: Fix query performance regression that increased memory and CPU usage #7051 - -## 2.17.0 / 2020-03-24 - -This release implements isolation in TSDB. API queries and recording rules are -guaranteed to only see full scrapes and full recording rules. This comes with a -certain overhead in resource usage. Depending on the situation, there might be -some increase in memory usage, CPU usage, or query latency. - -* [FEATURE] TSDB: Support isolation #6841 -* [ENHANCEMENT] PromQL: Allow more keywords as metric names #6933 -* [ENHANCEMENT] React UI: Add normalization of localhost URLs in targets page #6794 -* [ENHANCEMENT] Remote read: Read from remote storage concurrently #6770 -* [ENHANCEMENT] Rules: Mark deleted rule series as stale after a reload #6745 -* [ENHANCEMENT] Scrape: Log scrape append failures as debug rather than warn #6852 -* [ENHANCEMENT] TSDB: Improve query performance for queries that partially hit the head #6676 -* [ENHANCEMENT] Consul SD: Expose service health as meta label #5313 -* [ENHANCEMENT] EC2 SD: Expose EC2 instance lifecycle as meta label #6914 -* [ENHANCEMENT] Kubernetes SD: Expose service type as meta label for K8s service role #6684 -* [ENHANCEMENT] Kubernetes SD: Expose label_selector and field_selector #6807 -* [ENHANCEMENT] Openstack SD: Expose hypervisor id as meta label #6962 -* [BUGFIX] PromQL: Do not escape HTML-like chars in query log #6834 #6795 -* [BUGFIX] React UI: Fix data table matrix values #6896 -* [BUGFIX] React UI: Fix new targets page not loading when using non-ASCII characters #6892 -* [BUGFIX] Remote read: Fix duplication of metrics read from remote storage with external labels #6967 #7018 -* [BUGFIX] Remote write: Register WAL watcher and live reader metrics for all remotes, not just the first one #6998 -* [BUGFIX] Scrape: Prevent removal of metric names upon relabeling #6891 -* [BUGFIX] Scrape: Fix 'superfluous response.WriteHeader call' errors when scrape fails under some circonstances #6986 -* [BUGFIX] Scrape: Fix crash when reloads are separated by two scrape intervals #7011 - -## 2.16.0 / 2020-02-13 - -* [FEATURE] React UI: Support local timezone on /graph #6692 -* [FEATURE] PromQL: add absent_over_time query function #6490 -* [FEATURE] Adding optional logging of queries to their own file #6520 -* [ENHANCEMENT] React UI: Add support for rules page and "Xs ago" duration displays #6503 -* [ENHANCEMENT] React UI: alerts page, replace filtering togglers tabs with checkboxes #6543 -* [ENHANCEMENT] TSDB: Export metric for WAL write errors #6647 -* [ENHANCEMENT] TSDB: Improve query performance for queries that only touch the most recent 2h of data. #6651 -* [ENHANCEMENT] PromQL: Refactoring in parser errors to improve error messages #6634 -* [ENHANCEMENT] PromQL: Support trailing commas in grouping opts #6480 -* [ENHANCEMENT] Scrape: Reduce memory usage on reloads by reusing scrape cache #6670 -* [ENHANCEMENT] Scrape: Add metrics to track bytes and entries in the metadata cache #6675 -* [ENHANCEMENT] promtool: Add support for line-column numbers for invalid rules output #6533 -* [ENHANCEMENT] Avoid restarting rule groups when it is unnecessary #6450 -* [BUGFIX] React UI: Send cookies on fetch() on older browsers #6553 -* [BUGFIX] React UI: adopt grafana flot fix for stacked graphs #6603 -* [BUFGIX] React UI: broken graph page browser history so that back button works as expected #6659 -* [BUGFIX] TSDB: ensure compactionsSkipped metric is registered, and log proper error if one is returned from head.Init #6616 -* [BUGFIX] TSDB: return an error on ingesting series with duplicate labels #6664 -* [BUGFIX] PromQL: Fix unary operator precedence #6579 -* [BUGFIX] PromQL: Respect query.timeout even when we reach query.max-concurrency #6712 -* [BUGFIX] PromQL: Fix string and parentheses handling in engine, which affected React UI #6612 -* [BUGFIX] PromQL: Remove output labels returned by absent() if they are produced by multiple identical label matchers #6493 -* [BUGFIX] Scrape: Validate that OpenMetrics input ends with `# EOF` #6505 -* [BUGFIX] Remote read: return the correct error if configs can't be marshal'd to JSON #6622 -* [BUGFIX] Remote write: Make remote client `Store` use passed context, which can affect shutdown timing #6673 -* [BUGFIX] Remote write: Improve sharding calculation in cases where we would always be consistently behind by tracking pendingSamples #6511 -* [BUGFIX] Ensure prometheus_rule_group metrics are deleted when a rule group is removed #6693 - -## 2.15.2 / 2020-01-06 - -* [BUGFIX] TSDB: Fixed support for TSDB blocks built with Prometheus before 2.1.0. #6564 -* [BUGFIX] TSDB: Fixed block compaction issues on Windows. #6547 - -## 2.15.1 / 2019-12-25 - -* [BUGFIX] TSDB: Fixed race on concurrent queries against same data. #6512 - -## 2.15.0 / 2019-12-23 - -* [CHANGE] Discovery: Removed `prometheus_sd_kubernetes_cache_*` metrics. Additionally `prometheus_sd_kubernetes_workqueue_latency_seconds` and `prometheus_sd_kubernetes_workqueue_work_duration_seconds` metrics now show correct values in seconds. #6393 -* [CHANGE] Remote write: Changed `query` label on `prometheus_remote_storage_*` metrics to `remote_name` and `url`. #6043 -* [FEATURE] API: Added new endpoint for exposing per metric metadata `/metadata`. #6420 #6442 -* [ENHANCEMENT] TSDB: Significantly reduced memory footprint of loaded TSDB blocks. #6418 #6461 -* [ENHANCEMENT] TSDB: Significantly optimized what we buffer during compaction which should result in lower memory footprint during compaction. #6422 #6452 #6468 #6475 -* [ENHANCEMENT] TSDB: Improve replay latency. #6230 -* [ENHANCEMENT] TSDB: WAL size is now used for size based retention calculation. #5886 -* [ENHANCEMENT] Remote read: Added query grouping and range hints to the remote read request #6401 -* [ENHANCEMENT] Remote write: Added `prometheus_remote_storage_sent_bytes_total` counter per queue. #6344 -* [ENHANCEMENT] promql: Improved PromQL parser performance. #6356 -* [ENHANCEMENT] React UI: Implemented missing pages like `/targets` #6276, TSDB status page #6281 #6267 and many other fixes and performance improvements. -* [ENHANCEMENT] promql: Prometheus now accepts spaces between time range and square bracket. e.g `[ 5m]` #6065 -* [BUGFIX] Config: Fixed alertmanager configuration to not miss targets when configurations are similar. #6455 -* [BUGFIX] Remote write: Value of `prometheus_remote_storage_shards_desired` gauge shows raw value of desired shards and it's updated correctly. #6378 -* [BUGFIX] Rules: Prometheus now fails the evaluation of rules and alerts where metric results collide with labels specified in `labels` field. #6469 -* [BUGFIX] API: Targets Metadata API `/targets/metadata` now accepts empty `match_targets` parameter as in the spec. #6303 - -## 2.14.0 / 2019-11-11 - -* [SECURITY/BUGFIX] UI: Ensure warnings from the API are escaped. #6279 -* [FEATURE] API: `/api/v1/status/runtimeinfo` and `/api/v1/status/buildinfo` endpoints added for use by the React UI. #6243 -* [FEATURE] React UI: implement the new experimental React based UI. #5694 and many more - * Can be found by under `/new`. - * Not all pages are implemented yet. -* [FEATURE] Status: Cardinality statistics added to the Runtime & Build Information page. #6125 -* [ENHANCEMENT/BUGFIX] Remote write: fix delays in remote write after a compaction. #6021 -* [ENHANCEMENT] UI: Alerts can be filtered by state. #5758 -* [BUGFIX] API: lifecycle endpoints return 403 when not enabled. #6057 -* [BUGFIX] Build: Fix Solaris build. #6149 -* [BUGFIX] Promtool: Remove false duplicate rule warnings when checking rule files with alerts. #6270 -* [BUGFIX] Remote write: restore use of deduplicating logger in remote write. #6113 -* [BUGFIX] Remote write: do not reshard when unable to send samples. #6111 -* [BUGFIX] Service discovery: errors are no longer logged on context cancellation. #6116, #6133 -* [BUGFIX] UI: handle null response from API properly. #6071 - -## 2.13.1 / 2019-10-16 - -* [BUGFIX] Fix panic in ARM builds of Prometheus. #6110 -* [BUGFIX] promql: fix potential panic in the query logger. #6094 -* [BUGFIX] Multiple errors of http: superfluous response.WriteHeader call in the logs. #6145 - -## 2.13.0 / 2019-10-04 - -* [SECURITY/BUGFIX] UI: Fix a Stored DOM XSS vulnerability with query history [CVE-2019-10215](http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-10215). #6098 -* [CHANGE] Metrics: renamed prometheus_sd_configs_failed_total to prometheus_sd_failed_configs and changed to Gauge #5254 -* [ENHANCEMENT] Include the tsdb tool in builds. #6089 -* [ENHANCEMENT] Service discovery: add new node address types for kubernetes. #5902 -* [ENHANCEMENT] UI: show warnings if query have returned some warnings. #5964 -* [ENHANCEMENT] Remote write: reduce memory usage of the series cache. #5849 -* [ENHANCEMENT] Remote read: use remote read streaming to reduce memory usage. #5703 -* [ENHANCEMENT] Metrics: added metrics for remote write max/min/desired shards to queue manager. #5787 -* [ENHANCEMENT] Promtool: show the warnings during label query. #5924 -* [ENHANCEMENT] Promtool: improve error messages when parsing bad rules. #5965 -* [ENHANCEMENT] Promtool: more promlint rules. #5515 -* [BUGFIX] Promtool: fix recording inconsistency due to duplicate labels. #6026 -* [BUGFIX] UI: fixes service-discovery view when accessed from unhealthy targets. #5915 -* [BUGFIX] Metrics format: OpenMetrics parser crashes on short input. #5939 -* [BUGFIX] UI: avoid truncated Y-axis values. #6014 - -## 2.12.0 / 2019-08-17 - -* [FEATURE] Track currently active PromQL queries in a log file. #5794 -* [FEATURE] Enable and provide binaries for `mips64` / `mips64le` architectures. #5792 -* [ENHANCEMENT] Improve responsiveness of targets web UI and API endpoint. #5740 -* [ENHANCEMENT] Improve remote write desired shards calculation. #5763 -* [ENHANCEMENT] Flush TSDB pages more precisely. tsdb#660 -* [ENHANCEMENT] Add `prometheus_tsdb_retention_limit_bytes` metric. tsdb#667 -* [ENHANCEMENT] Add logging during TSDB WAL replay on startup. tsdb#662 -* [ENHANCEMENT] Improve TSDB memory usage. tsdb#653, tsdb#643, tsdb#654, tsdb#642, tsdb#627 -* [BUGFIX] Check for duplicate label names in remote read. #5829 -* [BUGFIX] Mark deleted rules' series as stale on next evaluation. #5759 -* [BUGFIX] Fix JavaScript error when showing warning about out-of-sync server time. #5833 -* [BUGFIX] Fix `promtool test rules` panic when providing empty `exp_labels`. #5774 -* [BUGFIX] Only check last directory when discovering checkpoint number. #5756 -* [BUGFIX] Fix error propagation in WAL watcher helper functions. #5741 -* [BUGFIX] Correctly handle empty labels from alert templates. #5845 - -## 2.11.2 / 2019-08-14 - -* [BUGFIX/SECURITY] Fix a Stored DOM XSS vulnerability with query history. #5888 - -## 2.11.1 / 2019-07-10 - -* [BUGFIX] Fix potential panic when prometheus is watching multiple zookeeper paths. #5749 - -## 2.11.0 / 2019-07-09 - -* [CHANGE] Remove `max_retries` from queue_config (it has been unused since rewriting remote-write to utilize the write-ahead-log). #5649 -* [CHANGE] The meta file `BlockStats` no longer holds size information. This is now dynamically calculated and kept in memory. It also includes the meta file size which was not included before. tsdb#637 -* [CHANGE] Renamed metric from `prometheus_tsdb_wal_reader_corruption_errors` to `prometheus_tsdb_wal_reader_corruption_errors_total`. tsdb#622 -* [FEATURE] Add option to use Alertmanager API v2. #5482 -* [FEATURE] Added `humanizePercentage` function for templates. #5670 -* [FEATURE] Include InitContainers in Kubernetes Service Discovery. #5598 -* [FEATURE] Provide option to compress WAL records using Snappy. [#609](https://github.com/prometheus/tsdb/pull/609) -* [ENHANCEMENT] Create new clean segment when starting the WAL. tsdb#608 -* [ENHANCEMENT] Reduce allocations in PromQL aggregations. #5641 -* [ENHANCEMENT] Add storage warnings to LabelValues and LabelNames API results. #5673 -* [ENHANCEMENT] Add `prometheus_http_requests_total` metric. #5640 -* [ENHANCEMENT] Enable openbsd/arm build. #5696 -* [ENHANCEMENT] Remote-write allocation improvements. #5614 -* [ENHANCEMENT] Query performance improvement: Efficient iteration and search in HashForLabels and HashWithoutLabels. #5707 -* [ENHANCEMENT] Allow injection of arbitrary headers in promtool. #4389 -* [ENHANCEMENT] Allow passing `external_labels` in alert unit tests groups. #5608 -* [ENHANCEMENT] Allows globs for rules when unit testing. #5595 -* [ENHANCEMENT] Improved postings intersection matching. tsdb#616 -* [ENHANCEMENT] Reduced disk usage for WAL for small setups. tsdb#605 -* [ENHANCEMENT] Optimize queries using regexp for set lookups. tsdb#602 -* [BUGFIX] resolve race condition in maxGauge. #5647 -* [BUGFIX] Fix ZooKeeper connection leak. #5675 -* [BUGFIX] Improved atomicity of .tmp block replacement during compaction for usual case. tsdb#636 -* [BUGFIX] Fix "unknown series references" after clean shutdown. tsdb#623 -* [BUGFIX] Re-calculate block size when calling `block.Delete`. tsdb#637 -* [BUGFIX] Fix unsafe snapshots with head block. tsdb#641 -* [BUGFIX] `prometheus_tsdb_compactions_failed_total` is now incremented on any compaction failure. tsdb#613 - -## 2.10.0 / 2019-05-25 - -* [CHANGE/BUGFIX] API: Encode alert values as string to correctly represent Inf/NaN. #5582 -* [FEATURE] Template expansion: Make external labels available as `$externalLabels` in alert and console template expansion. #5463 -* [FEATURE] TSDB: Add `prometheus_tsdb_wal_segment_current` metric for the WAL segment index that TSDB is currently writing to. tsdb#601 -* [FEATURE] Scrape: Add `scrape_series_added` per-scrape metric. #5546 -* [ENHANCEMENT] Discovery/kubernetes: Add labels `__meta_kubernetes_endpoint_node_name` and `__meta_kubernetes_endpoint_hostname`. #5571 -* [ENHANCEMENT] Discovery/azure: Add label `__meta_azure_machine_public_ip`. #5475 -* [ENHANCEMENT] TSDB: Simplify mergedPostings.Seek, resulting in better performance if there are many posting lists. tsdb#595 -* [ENHANCEMENT] Log filesystem type on startup. #5558 -* [ENHANCEMENT] Cmd/promtool: Use POST requests for Query and QueryRange. client_golang#557 -* [ENHANCEMENT] Web: Sort alerts by group name. #5448 -* [ENHANCEMENT] Console templates: Add convenience variables `$rawParams`, `$params`, `$path`. #5463 -* [BUGFIX] TSDB: Don't panic when running out of disk space and recover nicely from the condition. tsdb#582 -* [BUGFIX] TSDB: Correctly handle empty labels. tsdb#594 -* [BUGFIX] TSDB: Don't crash on an unknown tombstone reference. tsdb#604 -* [BUGFIX] Storage/remote: Remove queue-manager specific metrics if queue no longer exists. #5445 #5485 #5555 -* [BUGFIX] PromQL: Correctly display `{__name__="a"}`. #5552 -* [BUGFIX] Discovery/kubernetes: Use `service` rather than `ingress` as the name for the service workqueue. #5520 -* [BUGFIX] Discovery/azure: Don't panic on a VM with a public IP. #5587 -* [BUGFIX] Discovery/triton: Always read HTTP body to completion. #5596 -* [BUGFIX] Web: Fixed Content-Type for js and css instead of using `/etc/mime.types`. #5551 - -## 2.9.2 / 2019-04-24 - -* [BUGFIX] Make sure subquery range is taken into account for selection #5467 -* [BUGFIX] Exhaust every request body before closing it #5166 -* [BUGFIX] Cmd/promtool: return errors from rule evaluations #5483 -* [BUGFIX] Remote Storage: string interner should not panic in release #5487 -* [BUGFIX] Fix memory allocation regression in mergedPostings.Seek tsdb#586 - -## 2.9.1 / 2019-04-16 - -* [BUGFIX] Discovery/kubernetes: fix missing label sanitization #5462 -* [BUGFIX] Remote_write: Prevent reshard concurrent with calling stop #5460 - -## 2.9.0 / 2019-04-15 - -This releases uses Go 1.12, which includes a change in how memory is released -to Linux. This will cause RSS to be reported as higher, however this is harmless -and the memory is available to the kernel when it needs it. - -* [CHANGE/ENHANCEMENT] Update Consul to support catalog.ServiceMultipleTags. #5151 -* [FEATURE] Add honor_timestamps scrape option. #5304 -* [ENHANCEMENT] Discovery/kubernetes: add present labels for labels/annotations. #5443 -* [ENHANCEMENT] OpenStack SD: Add ProjectID and UserID meta labels. #5431 -* [ENHANCEMENT] Add GODEBUG and retention to the runtime page. #5324 #5322 -* [ENHANCEMENT] Add support for POSTing to /series endpoint. #5422 -* [ENHANCEMENT] Support PUT methods for Lifecycle and Admin APIs. #5376 -* [ENHANCEMENT] Scrape: Add global jitter for HA server. #5181 -* [ENHANCEMENT] Check for cancellation on every step of a range evaluation. #5131 -* [ENHANCEMENT] String interning for labels & values in the remote_write path. #5316 -* [ENHANCEMENT] Don't lose the scrape cache on a failed scrape. #5414 -* [ENHANCEMENT] Reload cert files from disk automatically. common#173 -* [ENHANCEMENT] Use fixed length millisecond timestamp format for logs. common#172 -* [ENHANCEMENT] Performance improvements for postings. tsdb#509 tsdb#572 -* [BUGFIX] Remote Write: fix checkpoint reading. #5429 -* [BUGFIX] Check if label value is valid when unmarshaling external labels from YAML. #5316 -* [BUGFIX] Promparse: sort all labels when parsing. #5372 -* [BUGFIX] Reload rules: copy state on both name and labels. #5368 -* [BUGFIX] Exponentiation operator to drop metric name in result of operation. #5329 -* [BUGFIX] Config: resolve more file paths. #5284 -* [BUGFIX] Promtool: resolve relative paths in alert test files. #5336 -* [BUGFIX] Set TLSHandshakeTimeout in HTTP transport. common#179 -* [BUGFIX] Use fsync to be more resilient to machine crashes. tsdb#573 tsdb#578 -* [BUGFIX] Keep series that are still in WAL in checkpoints. tsdb#577 -* [BUGFIX] Fix output sample values for scalar-to-vector comparison operations. #5454 - -## 2.8.1 / 2019-03-28 - -* [BUGFIX] Display the job labels in `/targets` which was removed accidentally. #5406 - -## 2.8.0 / 2019-03-12 - -This release uses Write-Ahead Logging (WAL) for the remote_write API. This currently causes a slight increase in memory usage, which will be addressed in future releases. - -* [CHANGE] Default time retention is used only when no size based retention is specified. These are flags where time retention is specified by the flag `--storage.tsdb.retention` and size retention by `--storage.tsdb.retention.size`. #5216 -* [CHANGE] `prometheus_tsdb_storage_blocks_bytes_total` is now `prometheus_tsdb_storage_blocks_bytes`. prometheus/tsdb#506 -* [FEATURE] [EXPERIMENTAL] Time overlapping blocks are now allowed; vertical compaction and vertical query merge. It is an optional feature which is controlled by the `--storage.tsdb.allow-overlapping-blocks` flag, disabled by default. prometheus/tsdb#370 -* [ENHANCEMENT] Use the WAL for remote_write API. #4588 -* [ENHANCEMENT] Query performance improvements. prometheus/tsdb#531 -* [ENHANCEMENT] UI enhancements with upgrade to Bootstrap 4. #5226 -* [ENHANCEMENT] Reduce time that Alertmanagers are in flux when reloaded. #5126 -* [ENHANCEMENT] Limit number of metrics displayed on UI to 10000. #5139 -* [ENHANCEMENT] (1) Remember All/Unhealthy choice on target-overview when reloading page. (2) Resize text-input area on Graph page on mouseclick. #5201 -* [ENHANCEMENT] In `histogram_quantile` merge buckets with equivalent le values. #5158. -* [ENHANCEMENT] Show list of offending labels in the error message in many-to-many scenarios. #5189 -* [ENHANCEMENT] Show `Storage Retention` criteria in effect on `/status` page. #5322 -* [BUGFIX] Fix sorting of rule groups. #5260 -* [BUGFIX] Fix support for password_file and bearer_token_file in Kubernetes SD. #5211 -* [BUGFIX] Scrape: catch errors when creating HTTP clients #5182. Adds new metrics: - * `prometheus_target_scrape_pools_total` - * `prometheus_target_scrape_pools_failed_total` - * `prometheus_target_scrape_pool_reloads_total` - * `prometheus_target_scrape_pool_reloads_failed_total` -* [BUGFIX] Fix panic when aggregator param is not a literal. #5290 - -## 2.7.2 / 2019-03-02 - -* [BUGFIX] `prometheus_rule_group_last_evaluation_timestamp_seconds` is now a unix timestamp. #5186 - -## 2.7.1 / 2019-01-31 - -This release has a fix for a Stored DOM XSS vulnerability that can be triggered when using the query history functionality. Thanks to Dor Tumarkin from Checkmarx for reporting it. - -* [BUGFIX/SECURITY] Fix a Stored DOM XSS vulnerability with query history. #5163 -* [BUGFIX] `prometheus_rule_group_last_duration_seconds` now reports seconds instead of nanoseconds. #5153 -* [BUGFIX] Make sure the targets are consistently sorted in the targets page. #5161 - -## 2.7.0 / 2019-01-28 - -We're rolling back the Dockerfile changes introduced in 2.6.0. If you made changes to your docker deployment in 2.6.0, you will need to roll them back. This release also adds experimental support for disk size based retention. To accommodate that we are deprecating the flag `storage.tsdb.retention` in favour of `storage.tsdb.retention.time`. We print a warning if the flag is in use, but it will function without breaking until Prometheus 3.0. - -* [CHANGE] Rollback Dockerfile to version at 2.5.0. Rollback of the breaking change introduced in 2.6.0. #5122 -* [FEATURE] Add subqueries to PromQL. #4831 -* [FEATURE] [EXPERIMENTAL] Add support for disk size based retention. Note that we don't consider the WAL size which could be significant and the time based retention policy also applies. #5109 prometheus/tsdb#343 -* [FEATURE] Add CORS origin flag. #5011 -* [ENHANCEMENT] Consul SD: Add tagged address to the discovery metadata. #5001 -* [ENHANCEMENT] Kubernetes SD: Add service external IP and external name to the discovery metadata. #4940 -* [ENHANCEMENT] Azure SD: Add support for Managed Identity authentication. #4590 -* [ENHANCEMENT] Azure SD: Add tenant and subscription IDs to the discovery metadata. #4969 -* [ENHANCEMENT] OpenStack SD: Add support for application credentials based authentication. #4968 -* [ENHANCEMENT] Add metric for number of rule groups loaded. #5090 -* [BUGFIX] Avoid duplicate tests for alert unit tests. #4964 -* [BUGFIX] Don't depend on given order when comparing samples in alert unit testing. #5049 -* [BUGFIX] Make sure the retention period doesn't overflow. #5112 -* [BUGFIX] Make sure the blocks don't get very large. #5112 -* [BUGFIX] Don't generate blocks with no samples. prometheus/tsdb#374 -* [BUGFIX] Reintroduce metric for WAL corruptions. prometheus/tsdb#473 - -## 2.6.1 / 2019-01-15 - -* [BUGFIX] Azure SD: Fix discovery getting stuck sometimes. #5088 -* [BUGFIX] Marathon SD: Use `Tasks.Ports` when `RequirePorts` is `false`. #5026 -* [BUGFIX] Promtool: Fix "out-of-order sample" errors when testing rules. #5069 - -## 2.6.0 / 2018-12-17 - -* [CHANGE] Remove default flags from the container's entrypoint, run Prometheus from `/etc/prometheus` and symlink the storage directory to `/etc/prometheus/data`. #4976 -* [CHANGE] Promtool: Remove the `update` command. #3839 -* [FEATURE] Add JSON log format via the `--log.format` flag. #4876 -* [FEATURE] API: Add /api/v1/labels endpoint to get all label names. #4835 -* [FEATURE] Web: Allow setting the page's title via the `--web.ui-title` flag. #4841 -* [ENHANCEMENT] Add `prometheus_tsdb_lowest_timestamp_seconds`, `prometheus_tsdb_head_min_time_seconds` and `prometheus_tsdb_head_max_time_seconds` metrics. #4888 -* [ENHANCEMENT] Add `rule_group_last_evaluation_timestamp_seconds` metric. #4852 -* [ENHANCEMENT] Add `prometheus_template_text_expansion_failures_total` and `prometheus_template_text_expansions_total` metrics. #4747 -* [ENHANCEMENT] Set consistent User-Agent header in outgoing requests. #4891 -* [ENHANCEMENT] Azure SD: Error out at load time when authentication parameters are missing. #4907 -* [ENHANCEMENT] EC2 SD: Add the machine's private DNS name to the discovery metadata. #4693 -* [ENHANCEMENT] EC2 SD: Add the operating system's platform to the discovery metadata. #4663 -* [ENHANCEMENT] Kubernetes SD: Add the pod's phase to the discovery metadata. #4824 -* [ENHANCEMENT] Kubernetes SD: Log Kubernetes messages. #4931 -* [ENHANCEMENT] Promtool: Collect CPU and trace profiles. #4897 -* [ENHANCEMENT] Promtool: Support writing output as JSON. #4848 -* [ENHANCEMENT] Remote Read: Return available data if remote read fails partially. #4832 -* [ENHANCEMENT] Remote Write: Improve queue performance. #4772 -* [ENHANCEMENT] Remote Write: Add min_shards parameter to set the minimum number of shards. #4924 -* [ENHANCEMENT] TSDB: Improve WAL reading. #4953 -* [ENHANCEMENT] TSDB: Memory improvements. #4953 -* [ENHANCEMENT] Web: Log stack traces on panic. #4221 -* [ENHANCEMENT] Web UI: Add copy to clipboard button for configuration. #4410 -* [ENHANCEMENT] Web UI: Support console queries at specific times. #4764 -* [ENHANCEMENT] Web UI: group targets by job then instance. #4898 #4806 -* [BUGFIX] Deduplicate handler labels for HTTP metrics. #4732 -* [BUGFIX] Fix leaked queriers causing shutdowns to hang. #4922 -* [BUGFIX] Fix configuration loading panics on nil pointer slice elements. #4942 -* [BUGFIX] API: Correctly skip mismatching targets on /api/v1/targets/metadata. #4905 -* [BUGFIX] API: Better rounding for incoming query timestamps. #4941 -* [BUGFIX] Azure SD: Fix panic. #4867 -* [BUGFIX] Console templates: Fix hover when the metric has a null value. #4906 -* [BUGFIX] Discovery: Remove all targets when the scrape configuration gets empty. #4819 -* [BUGFIX] Marathon SD: Fix leaked connections. #4915 -* [BUGFIX] Marathon SD: Use 'hostPort' member of portMapping to construct target endpoints. #4887 -* [BUGFIX] PromQL: Fix a goroutine leak in the lexer/parser. #4858 -* [BUGFIX] Scrape: Pass through content-type for non-compressed output. #4912 -* [BUGFIX] Scrape: Fix deadlock in the scrape's manager. #4894 -* [BUGFIX] Scrape: Scrape targets at fixed intervals even after Prometheus restarts. #4926 -* [BUGFIX] TSDB: Support restored snapshots including the head properly. #4953 -* [BUGFIX] TSDB: Repair WAL when the last record in a segment is torn. #4953 -* [BUGFIX] TSDB: Fix unclosed file readers on Windows systems. #4997 -* [BUGFIX] Web: Avoid proxy to connect to the local gRPC server. #4572 - -## 2.5.0 / 2018-11-06 - -* [CHANGE] Group targets by scrape config instead of job name. #4806 #4526 -* [CHANGE] Marathon SD: Various changes to adapt to Marathon 1.5+. #4499 -* [CHANGE] Discovery: Split `prometheus_sd_discovered_targets` metric by scrape and notify (Alertmanager SD) as well as by section in the respective configuration. #4753 -* [FEATURE] Add OpenMetrics support for scraping (EXPERIMENTAL). #4700 -* [FEATURE] Add unit testing for rules. #4350 -* [FEATURE] Make maximum number of samples per query configurable via `--query.max-samples` flag. #4513 -* [FEATURE] Make maximum number of concurrent remote reads configurable via `--storage.remote.read-concurrent-limit` flag. #4656 -* [ENHANCEMENT] Support s390x platform for Linux. #4605 -* [ENHANCEMENT] API: Add `prometheus_api_remote_read_queries` metric tracking currently executed or waiting remote read API requests. #4699 -* [ENHANCEMENT] Remote Read: Add `prometheus_remote_storage_remote_read_queries` metric tracking currently in-flight remote read queries. #4677 -* [ENHANCEMENT] Remote Read: Reduced memory usage. #4655 -* [ENHANCEMENT] Discovery: Add `prometheus_sd_discovered_targets`, `prometheus_sd_received_updates_total`, `prometheus_sd_updates_delayed_total`, and `prometheus_sd_updates_total` metrics for discovery subsystem. #4667 -* [ENHANCEMENT] Discovery: Improve performance of previously slow updates of changes of targets. #4526 -* [ENHANCEMENT] Kubernetes SD: Add extended metrics. #4458 -* [ENHANCEMENT] OpenStack SD: Support discovering instances from all projects. #4682 -* [ENHANCEMENT] OpenStack SD: Discover all interfaces. #4649 -* [ENHANCEMENT] OpenStack SD: Support `tls_config` for the used HTTP client. #4654 -* [ENHANCEMENT] Triton SD: Add ability to filter triton_sd targets by pre-defined groups. #4701 -* [ENHANCEMENT] Web UI: Avoid browser spell-checking in expression field. #4728 -* [ENHANCEMENT] Web UI: Add scrape duration and last evaluation time in targets and rules pages. #4722 -* [ENHANCEMENT] Web UI: Improve rule view by wrapping lines. #4702 -* [ENHANCEMENT] Rules: Error out at load time for invalid templates, rather than at evaluation time. #4537 -* [ENHANCEMENT] TSDB: Add metrics for WAL operations. #4692 -* [BUGFIX] Change max/min over_time to handle NaNs properly. #4386 -* [BUGFIX] Check label name for `count_values` PromQL function. #4585 -* [BUGFIX] Ensure that vectors and matrices do not contain identical label-sets. #4589 - -## 2.4.3 / 2018-10-04 - -* [BUGFIX] Fix panic when using custom EC2 API for SD #4672 -* [BUGFIX] Fix panic when Zookeeper SD cannot connect to servers #4669 -* [BUGFIX] Make the skip_head an optional parameter for snapshot API #4674 - -## 2.4.2 / 2018-09-21 - -The last release didn't have bugfix included due to a vendoring error. - -* [BUGFIX] Handle WAL corruptions properly prometheus/tsdb#389 -* [BUGFIX] Handle WAL migrations correctly on Windows prometheus/tsdb#392 - -## 2.4.1 / 2018-09-19 - -* [ENHANCEMENT] New TSDB metrics prometheus/tsdb#375 prometheus/tsdb#363 -* [BUGFIX] Render UI correctly for Windows #4616 - -## 2.4.0 / 2018-09-11 - -This release includes multiple bugfixes and features. Further, the WAL implementation has been re-written so the storage is not forward compatible. Prometheus 2.3 storage will work on 2.4 but not vice-versa. - -* [CHANGE] Reduce remote write default retries #4279 -* [CHANGE] Remove /heap endpoint #4460 -* [FEATURE] Persist alert 'for' state across restarts #4061 -* [FEATURE] Add API providing per target metric metadata #4183 -* [FEATURE] Add API providing recording and alerting rules #4318 #4501 -* [ENHANCEMENT] Brand new WAL implementation for TSDB. Forwards incompatible with previous WAL. -* [ENHANCEMENT] Show rule evaluation errors in UI #4457 -* [ENHANCEMENT] Throttle resends of alerts to Alertmanager #4538 -* [ENHANCEMENT] Send EndsAt along with the alert to Alertmanager #4550 -* [ENHANCEMENT] Limit the samples returned by remote read endpoint #4532 -* [ENHANCEMENT] Limit the data read in through remote read #4239 -* [ENHANCEMENT] Coalesce identical SD configurations #3912 -* [ENHANCEMENT] `promtool`: Add new commands for debugging and querying #4247 #4308 #4346 #4454 -* [ENHANCEMENT] Update console examples for node_exporter v0.16.0 #4208 -* [ENHANCEMENT] Optimize PromQL aggregations #4248 -* [ENHANCEMENT] Remote read: Add Offset to hints #4226 -* [ENHANCEMENT] `consul_sd`: Add support for ServiceMeta field #4280 -* [ENHANCEMENT] `ec2_sd`: Maintain order of subnet_id label #4405 -* [ENHANCEMENT] `ec2_sd`: Add support for custom endpoint to support EC2 compliant APIs #4333 -* [ENHANCEMENT] `ec2_sd`: Add instance_owner label #4514 -* [ENHANCEMENT] `azure_sd`: Add support for VMSS discovery and multiple environments #4202 #4569 -* [ENHANCEMENT] `gce_sd`: Add instance_id label #4488 -* [ENHANCEMENT] Forbid rule-abiding robots from indexing #4266 -* [ENHANCEMENT] Log virtual memory limits on startup #4418 -* [BUGFIX] Wait for service discovery to stop before exiting #4508 -* [BUGFIX] Render SD configs properly #4338 -* [BUGFIX] Only add LookbackDelta to vector selectors #4399 -* [BUGFIX] `ec2_sd`: Handle panic-ing nil pointer #4469 -* [BUGFIX] `consul_sd`: Stop leaking connections #4443 -* [BUGFIX] Use templated labels also to identify alerts #4500 -* [BUGFIX] Reduce floating point errors in stddev and related functions #4533 -* [BUGFIX] Log errors while encoding responses #4359 - -## 2.3.2 / 2018-07-12 - -* [BUGFIX] Fix various tsdb bugs #4369 -* [BUGFIX] Reorder startup and shutdown to prevent panics. #4321 -* [BUGFIX] Exit with non-zero code on error #4296 -* [BUGFIX] discovery/kubernetes/ingress: fix scheme discovery #4329 -* [BUGFIX] Fix race in zookeeper sd #4355 -* [BUGFIX] Better timeout handling in promql #4291 #4300 -* [BUGFIX] Propagate errors when selecting series from the tsdb #4136 - -## 2.3.1 / 2018-06-19 - -* [BUGFIX] Avoid infinite loop on duplicate NaN values. #4275 -* [BUGFIX] Fix nil pointer deference when using various API endpoints #4282 -* [BUGFIX] config: set target group source index during unmarshaling #4245 -* [BUGFIX] discovery/file: fix logging #4178 -* [BUGFIX] kubernetes_sd: fix namespace filtering #4285 -* [BUGFIX] web: restore old path prefix behavior #4273 -* [BUGFIX] web: remove security headers added in 2.3.0 #4259 - -## 2.3.0 / 2018-06-05 - -* [CHANGE] `marathon_sd`: use `auth_token` and `auth_token_file` for token-based authentication instead of `bearer_token` and `bearer_token_file` respectively. -* [CHANGE] Metric names for HTTP server metrics changed -* [FEATURE] Add query commands to promtool -* [FEATURE] Add security headers to HTTP server responses -* [FEATURE] Pass query hints via remote read API -* [FEATURE] Basic auth passwords can now be configured via file across all configuration -* [ENHANCEMENT] Optimize PromQL and API serialization for memory usage and allocations -* [ENHANCEMENT] Limit number of dropped targets in web UI -* [ENHANCEMENT] Consul and EC2 service discovery allow using server-side filtering for performance improvement -* [ENHANCEMENT] Add advanced filtering configuration to EC2 service discovery -* [ENHANCEMENT] `marathon_sd`: adds support for basic and bearer authentication, plus all other common HTTP client options (TLS config, proxy URL, etc.) -* [ENHANCEMENT] Provide machine type metadata and labels in GCE service discovery -* [ENHANCEMENT] Add pod controller kind and name to Kubernetes service discovery data -* [ENHANCEMENT] Move TSDB to flock-based log file that works with Docker containers -* [BUGFIX] Properly propagate storage errors in PromQL -* [BUGFIX] Fix path prefix for web pages -* [BUGFIX] Fix goroutine leak in Consul service discovery -* [BUGFIX] Fix races in scrape manager -* [BUGFIX] Fix OOM for very large k in PromQL topk() queries -* [BUGFIX] Make remote write more resilient to unavailable receivers -* [BUGFIX] Make remote write shutdown cleanly -* [BUGFIX] Don't leak files on errors in TSDB's tombstone cleanup -* [BUGFIX] Unary minus expressions now removes the metric name from results -* [BUGFIX] Fix bug that lead to wrong amount of samples considered for time range expressions - -## 2.2.1 / 2018-03-13 - -* [BUGFIX] Fix data loss in TSDB on compaction -* [BUGFIX] Correctly stop timer in remote-write path -* [BUGFIX] Fix deadlock triggered by loading targets page -* [BUGFIX] Fix incorrect buffering of samples on range selection queries -* [BUGFIX] Handle large index files on windows properly - -## 2.2.0 / 2018-03-08 - -* [CHANGE] Rename file SD mtime metric. -* [CHANGE] Send target update on empty pod IP in Kubernetes SD. -* [FEATURE] Add API endpoint for flags. -* [FEATURE] Add API endpoint for dropped targets. -* [FEATURE] Display annotations on alerts page. -* [FEATURE] Add option to skip head data when taking snapshots. -* [ENHANCEMENT] Federation performance improvement. -* [ENHANCEMENT] Read bearer token file on every scrape. -* [ENHANCEMENT] Improve typeahead on `/graph` page. -* [ENHANCEMENT] Change rule file formatting. -* [ENHANCEMENT] Set consul server default to `localhost:8500`. -* [ENHANCEMENT] Add dropped Alertmanagers to API info endpoint. -* [ENHANCEMENT] Add OS type meta label to Azure SD. -* [ENHANCEMENT] Validate required fields in SD configuration. -* [BUGFIX] Prevent stack overflow on deep recursion in TSDB. -* [BUGFIX] Correctly read offsets in index files that are greater than 4GB. -* [BUGFIX] Fix scraping behavior for empty labels. -* [BUGFIX] Drop metric name for bool modifier. -* [BUGFIX] Fix races in discovery. -* [BUGFIX] Fix Kubernetes endpoints SD for empty subsets. -* [BUGFIX] Throttle updates from SD providers, which caused increased CPU usage and allocations. -* [BUGFIX] Fix TSDB block reload issue. -* [BUGFIX] Fix PromQL printing of empty `without()`. -* [BUGFIX] Don't reset FiredAt for inactive alerts. -* [BUGFIX] Fix erroneous file version changes and repair existing data. - -## 2.1.0 / 2018-01-19 - -* [FEATURE] New Service Discovery UI showing labels before and after relabelling. -* [FEATURE] New Admin APIs added to v1 to delete, snapshot and remove tombstones. -* [ENHANCEMENT] The graph UI autocomplete now includes your previous queries. -* [ENHANCEMENT] Federation is now much faster for large numbers of series. -* [ENHANCEMENT] Added new metrics to measure rule timings. -* [ENHANCEMENT] Rule evaluation times added to the rules UI. -* [ENHANCEMENT] Added metrics to measure modified time of file SD files. -* [ENHANCEMENT] Kubernetes SD now includes POD UID in discovery metadata. -* [ENHANCEMENT] The Query APIs now return optional stats on query execution times. -* [ENHANCEMENT] The index now no longer has the 4GiB size limit and is also smaller. -* [BUGFIX] Remote read `read_recent` option is now false by default. -* [BUGFIX] Pass the right configuration to each Alertmanager (AM) when using multiple AM configs. -* [BUGFIX] Fix not-matchers not selecting series with labels unset. -* [BUGFIX] tsdb: Fix occasional panic in head block. -* [BUGFIX] tsdb: Close files before deletion to fix retention issues on Windows and NFS. -* [BUGFIX] tsdb: Cleanup and do not retry failing compactions. -* [BUGFIX] tsdb: Close WAL while shutting down. - -## 2.0.0 / 2017-11-08 - -This release includes a completely rewritten storage, huge performance -improvements, but also many backwards incompatible changes. For more -information, read the announcement blog post and migration guide. - - - - -* [CHANGE] Completely rewritten storage layer, with WAL. This is not backwards compatible with 1.x storage, and many flags have changed/disappeared. -* [CHANGE] New staleness behavior. Series now marked stale after target scrapes no longer return them, and soon after targets disappear from service discovery. -* [CHANGE] Rules files use YAML syntax now. Conversion tool added to promtool. -* [CHANGE] Removed `count_scalar`, `drop_common_labels` functions and `keep_common` modifier from PromQL. -* [CHANGE] Rewritten exposition format parser with much higher performance. The Protobuf exposition format is no longer supported. -* [CHANGE] Example console templates updated for new storage and metrics names. Examples other than node exporter and Prometheus removed. -* [CHANGE] Admin and lifecycle APIs now disabled by default, can be re-enabled via flags -* [CHANGE] Flags switched to using Kingpin, all flags are now --flagname rather than -flagname. -* [FEATURE/CHANGE] Remote read can be configured to not read data which is available locally. This is enabled by default. -* [FEATURE] Rules can be grouped now. Rules within a rule group are executed sequentially. -* [FEATURE] Added experimental GRPC apis -* [FEATURE] Add timestamp() function to PromQL. -* [ENHANCEMENT] Remove remote read from the query path if no remote storage is configured. -* [ENHANCEMENT] Bump Consul HTTP client timeout to not match the Consul SD watch timeout. -* [ENHANCEMENT] Go-conntrack added to provide HTTP connection metrics. -* [BUGFIX] Fix connection leak in Consul SD. - -## 1.8.2 / 2017-11-04 - -* [BUGFIX] EC2 service discovery: Do not crash if tags are empty. - -## 1.8.1 / 2017-10-19 - -* [BUGFIX] Correctly handle external labels on remote read endpoint - -## 1.8.0 / 2017-10-06 - -* [CHANGE] Rule links link to the _Console_ tab rather than the _Graph_ tab to - not trigger expensive range queries by default. -* [FEATURE] Ability to act as a remote read endpoint for other Prometheus - servers. -* [FEATURE] K8s SD: Support discovery of ingresses. -* [FEATURE] Consul SD: Support for node metadata. -* [FEATURE] Openstack SD: Support discovery of hypervisors. -* [FEATURE] Expose current Prometheus config via `/status/config`. -* [FEATURE] Allow to collapse jobs on `/targets` page. -* [FEATURE] Add `/-/healthy` and `/-/ready` endpoints. -* [FEATURE] Add color scheme support to console templates. -* [ENHANCEMENT] Remote storage connections use HTTP keep-alive. -* [ENHANCEMENT] Improved logging about remote storage. -* [ENHANCEMENT] Relaxed URL validation. -* [ENHANCEMENT] Openstack SD: Handle instances without IP. -* [ENHANCEMENT] Make remote storage queue manager configurable. -* [ENHANCEMENT] Validate metrics returned from remote read. -* [ENHANCEMENT] EC2 SD: Set a default region. -* [ENHANCEMENT] Changed help link to `https://prometheus.io/docs`. -* [BUGFIX] Fix floating-point precision issue in `deriv` function. -* [BUGFIX] Fix pprof endpoints when -web.route-prefix or -web.external-url is - used. -* [BUGFIX] Fix handling of `null` target groups in file-based SD. -* [BUGFIX] Set the sample timestamp in date-related PromQL functions. -* [BUGFIX] Apply path prefix to redirect from deprecated graph URL. -* [BUGFIX] Fixed tests on MS Windows. -* [BUGFIX] Check for invalid UTF-8 in label values after relabeling. - -## 1.7.2 / 2017-09-26 - -* [BUGFIX] Correctly remove all targets from DNS service discovery if the - corresponding DNS query succeeds and returns an empty result. -* [BUGFIX] Correctly parse resolution input in expression browser. -* [BUGFIX] Consistently use UTC in the date picker of the expression browser. -* [BUGFIX] Correctly handle multiple ports in Marathon service discovery. -* [BUGFIX] Fix HTML escaping so that HTML templates compile with Go1.9. -* [BUGFIX] Prevent number of remote write shards from going negative. -* [BUGFIX] In the graphs created by the expression browser, render very large - and small numbers in a readable way. -* [BUGFIX] Fix a rarely occurring iterator issue in varbit encoded chunks. - -## 1.7.1 / 2017-06-12 - -* [BUGFIX] Fix double prefix redirect. - -## 1.7.0 / 2017-06-06 - -* [CHANGE] Compress remote storage requests and responses with unframed/raw snappy. -* [CHANGE] Properly elide secrets in config. -* [FEATURE] Add OpenStack service discovery. -* [FEATURE] Add ability to limit Kubernetes service discovery to certain namespaces. -* [FEATURE] Add metric for discovered number of Alertmanagers. -* [ENHANCEMENT] Print system information (uname) on start up. -* [ENHANCEMENT] Show gaps in graphs on expression browser. -* [ENHANCEMENT] Promtool linter checks counter naming and more reserved labels. -* [BUGFIX] Fix broken Mesos discovery. -* [BUGFIX] Fix redirect when external URL is set. -* [BUGFIX] Fix mutation of active alert elements by notifier. -* [BUGFIX] Fix HTTP error handling for remote write. -* [BUGFIX] Fix builds for Solaris/Illumos. -* [BUGFIX] Fix overflow checking in global config. -* [BUGFIX] Fix log level reporting issue. -* [BUGFIX] Fix ZooKeeper serverset discovery can become out-of-sync. - -## 1.6.3 / 2017-05-18 - -* [BUGFIX] Fix disappearing Alertmanager targets in Alertmanager discovery. -* [BUGFIX] Fix panic with remote_write on ARMv7. -* [BUGFIX] Fix stacked graphs to adapt min/max values. - -## 1.6.2 / 2017-05-11 - -* [BUGFIX] Fix potential memory leak in Kubernetes service discovery - -## 1.6.1 / 2017-04-19 - -* [BUGFIX] Don't panic if storage has no FPs even after initial wait - -## 1.6.0 / 2017-04-14 - -* [CHANGE] Replaced the remote write implementations for various backends by a - generic write interface with example adapter implementation for various - backends. Note that both the previous and the current remote write - implementations are **experimental**. -* [FEATURE] New flag `-storage.local.target-heap-size` to tell Prometheus about - the desired heap size. This deprecates the flags - `-storage.local.memory-chunks` and `-storage.local.max-chunks-to-persist`, - which are kept for backward compatibility. -* [FEATURE] Add `check-metrics` to `promtool` to lint metric names. -* [FEATURE] Add Joyent Triton discovery. -* [FEATURE] `X-Prometheus-Scrape-Timeout-Seconds` header in HTTP scrape - requests. -* [FEATURE] Remote read interface, including example for InfluxDB. **Experimental.** -* [FEATURE] Enable Consul SD to connect via TLS. -* [FEATURE] Marathon SD supports multiple ports. -* [FEATURE] Marathon SD supports bearer token for authentication. -* [FEATURE] Custom timeout for queries. -* [FEATURE] Expose `buildQueryUrl` in `graph.js`. -* [FEATURE] Add `rickshawGraph` property to the graph object in console - templates. -* [FEATURE] New metrics exported by Prometheus itself: - * Summary `prometheus_engine_query_duration_seconds` - * Counter `prometheus_evaluator_iterations_missed_total` - * Counter `prometheus_evaluator_iterations_total` - * Gauge `prometheus_local_storage_open_head_chunks` - * Gauge `prometheus_local_storage_target_heap_size` -* [ENHANCEMENT] Reduce shut-down time by interrupting an ongoing checkpoint - before starting the final checkpoint. -* [ENHANCEMENT] Auto-tweak times between checkpoints to limit time spent in - checkpointing to 50%. -* [ENHANCEMENT] Improved crash recovery deals better with certain index - corruptions. -* [ENHANCEMENT] Graphing deals better with constant time series. -* [ENHANCEMENT] Retry remote writes on recoverable errors. -* [ENHANCEMENT] Evict unused chunk descriptors during crash recovery to limit - memory usage. -* [ENHANCEMENT] Smoother disk usage during series maintenance. -* [ENHANCEMENT] Targets on targets page sorted by instance within a job. -* [ENHANCEMENT] Sort labels in federation. -* [ENHANCEMENT] Set `GOGC=40` by default, which results in much better memory - utilization at the price of slightly higher CPU usage. If `GOGC` is set by - the user, it is still honored as usual. -* [ENHANCEMENT] Close head chunks after being idle for the duration of the - configured staleness delta. This helps to persist and evict head chunk of - stale series more quickly. -* [ENHANCEMENT] Stricter checking of relabel config. -* [ENHANCEMENT] Cache busters for static web content. -* [ENHANCEMENT] Send Prometheus-specific user-agent header during scrapes. -* [ENHANCEMENT] Improved performance of series retention cut-off. -* [ENHANCEMENT] Mitigate impact of non-atomic sample ingestion on - `histogram_quantile` by enforcing buckets to be monotonic. -* [ENHANCEMENT] Released binaries built with Go 1.8.1. -* [BUGFIX] Send `instance=""` with federation if `instance` not set. -* [BUGFIX] Update to new `client_golang` to get rid of unwanted quantile - metrics in summaries. -* [BUGFIX] Introduce several additional guards against data corruption. -* [BUGFIX] Mark storage dirty and increment - `prometheus_local_storage_persist_errors_total` on all relevant errors. -* [BUGFIX] Propagate storage errors as 500 in the HTTP API. -* [BUGFIX] Fix int64 overflow in timestamps in the HTTP API. -* [BUGFIX] Fix deadlock in Zookeeper SD. -* [BUGFIX] Fix fuzzy search problems in the web-UI auto-completion. - -## 1.5.3 / 2017-05-11 - -* [BUGFIX] Fix potential memory leak in Kubernetes service discovery - -## 1.5.2 / 2017-02-10 - -* [BUGFIX] Fix series corruption in a special case of series maintenance where - the minimum series-file-shrink-ratio kicks in. -* [BUGFIX] Fix two panic conditions both related to processing a series - scheduled to be quarantined. -* [ENHANCEMENT] Binaries built with Go1.7.5. - -## 1.5.1 / 2017-02-07 - -* [BUGFIX] Don't lose fully persisted memory series during checkpointing. -* [BUGFIX] Fix intermittently failing relabeling. -* [BUGFIX] Make `-storage.local.series-file-shrink-ratio` work. -* [BUGFIX] Remove race condition from TestLoop. - -## 1.5.0 / 2017-01-23 - -* [CHANGE] Use lexicographic order to sort alerts by name. -* [FEATURE] Add Joyent Triton discovery. -* [FEATURE] Add scrape targets and alertmanager targets API. -* [FEATURE] Add various persistence related metrics. -* [FEATURE] Add various query engine related metrics. -* [FEATURE] Add ability to limit scrape samples, and related metrics. -* [FEATURE] Add labeldrop and labelkeep relabelling actions. -* [FEATURE] Display current working directory on status-page. -* [ENHANCEMENT] Strictly use ServiceAccount for in cluster configuration on Kubernetes. -* [ENHANCEMENT] Various performance and memory-management improvements. -* [BUGFIX] Fix basic auth for alertmanagers configured via flag. -* [BUGFIX] Don't panic on decoding corrupt data. -* [BUGFIX] Ignore dotfiles in data directory. -* [BUGFIX] Abort on intermediate federation errors. - -## 1.4.1 / 2016-11-28 - -* [BUGFIX] Fix Consul service discovery - -## 1.4.0 / 2016-11-25 - -* [FEATURE] Allow configuring Alertmanagers via service discovery -* [FEATURE] Display used Alertmanagers on runtime page in the UI -* [FEATURE] Support profiles in AWS EC2 service discovery configuration -* [ENHANCEMENT] Remove duplicated logging of Kubernetes client errors -* [ENHANCEMENT] Add metrics about Kubernetes service discovery -* [BUGFIX] Update alert annotations on re-evaluation -* [BUGFIX] Fix export of group modifier in PromQL queries -* [BUGFIX] Remove potential deadlocks in several service discovery implementations -* [BUGFIX] Use proper float64 modulo in PromQL `%` binary operations -* [BUGFIX] Fix crash bug in Kubernetes service discovery - -## 1.3.1 / 2016-11-04 - -This bug-fix release pulls in the fixes from the 1.2.3 release. - -* [BUGFIX] Correctly handle empty Regex entry in relabel config. -* [BUGFIX] MOD (`%`) operator doesn't panic with small floating point numbers. -* [BUGFIX] Updated miekg/dns vendoring to pick up upstream bug fixes. -* [ENHANCEMENT] Improved DNS error reporting. - -## 1.2.3 / 2016-11-04 - -Note that this release is chronologically after 1.3.0. - -* [BUGFIX] Correctly handle end time before start time in range queries. -* [BUGFIX] Error on negative `-storage.staleness-delta` -* [BUGFIX] Correctly handle empty Regex entry in relabel config. -* [BUGFIX] MOD (`%`) operator doesn't panic with small floating point numbers. -* [BUGFIX] Updated miekg/dns vendoring to pick up upstream bug fixes. -* [ENHANCEMENT] Improved DNS error reporting. - -## 1.3.0 / 2016-11-01 - -This is a breaking change to the Kubernetes service discovery. - -* [CHANGE] Rework Kubernetes SD. -* [FEATURE] Add support for interpolating `target_label`. -* [FEATURE] Add GCE metadata as Prometheus meta labels. -* [ENHANCEMENT] Add EC2 SD metrics. -* [ENHANCEMENT] Add Azure SD metrics. -* [ENHANCEMENT] Add fuzzy search to `/graph` textarea. -* [ENHANCEMENT] Always show instance labels on target page. -* [BUGFIX] Validate query end time is not before start time. -* [BUGFIX] Error on negative `-storage.staleness-delta` - -## 1.2.2 / 2016-10-30 - -* [BUGFIX] Correctly handle on() in alerts. -* [BUGFIX] UI: Deal properly with aborted requests. -* [BUGFIX] UI: Decode URL query parameters properly. -* [BUGFIX] Storage: Deal better with data corruption (non-monotonic timestamps). -* [BUGFIX] Remote storage: Re-add accidentally removed timeout flag. -* [BUGFIX] Updated a number of vendored packages to pick up upstream bug fixes. - -## 1.2.1 / 2016-10-10 - -* [BUGFIX] Count chunk evictions properly so that the server doesn't - assume it runs out of memory and subsequently throttles ingestion. -* [BUGFIX] Use Go1.7.1 for prebuilt binaries to fix issues on MacOS Sierra. - -## 1.2.0 / 2016-10-07 - -* [FEATURE] Cleaner encoding of query parameters in `/graph` URLs. -* [FEATURE] PromQL: Add `minute()` function. -* [FEATURE] Add GCE service discovery. -* [FEATURE] Allow any valid UTF-8 string as job name. -* [FEATURE] Allow disabling local storage. -* [FEATURE] EC2 service discovery: Expose `ec2_instance_state`. -* [ENHANCEMENT] Various performance improvements in local storage. -* [BUGFIX] Zookeeper service discovery: Remove deleted nodes. -* [BUGFIX] Zookeeper service discovery: Resync state after Zookeeper failure. -* [BUGFIX] Remove JSON from HTTP Accept header. -* [BUGFIX] Fix flag validation of Alertmanager URL. -* [BUGFIX] Fix race condition on shutdown. -* [BUGFIX] Do not fail Consul discovery on Prometheus startup when Consul - is down. -* [BUGFIX] Handle NaN in `changes()` correctly. -* [CHANGE] **Experimental** remote write path: Remove use of gRPC. -* [CHANGE] **Experimental** remote write path: Configuration via config file - rather than command line flags. -* [FEATURE] **Experimental** remote write path: Add HTTP basic auth and TLS. -* [FEATURE] **Experimental** remote write path: Support for relabelling. - -## 1.1.3 / 2016-09-16 - -* [ENHANCEMENT] Use golang-builder base image for tests in CircleCI. -* [ENHANCEMENT] Added unit tests for federation. -* [BUGFIX] Correctly de-dup metric families in federation output. - -## 1.1.2 / 2016-09-08 - -* [BUGFIX] Allow label names that coincide with keywords. - -## 1.1.1 / 2016-09-07 - -* [BUGFIX] Fix IPv6 escaping in service discovery integrations -* [BUGFIX] Fix default scrape port assignment for IPv6 - -## 1.1.0 / 2016-09-03 - -* [FEATURE] Add `quantile` and `quantile_over_time`. -* [FEATURE] Add `stddev_over_time` and `stdvar_over_time`. -* [FEATURE] Add various time and date functions. -* [FEATURE] Added `toUpper` and `toLower` formatting to templates. -* [FEATURE] Allow relabeling of alerts. -* [FEATURE] Allow URLs in targets defined via a JSON file. -* [FEATURE] Add idelta function. -* [FEATURE] 'Remove graph' button on the /graph page. -* [FEATURE] Kubernetes SD: Add node name and host IP to pod discovery. -* [FEATURE] New remote storage write path. EXPERIMENTAL! -* [ENHANCEMENT] Improve time-series index lookups. -* [ENHANCEMENT] Forbid invalid relabel configurations. -* [ENHANCEMENT] Improved various tests. -* [ENHANCEMENT] Add crash recovery metric 'started_dirty'. -* [ENHANCEMENT] Fix (and simplify) populating series iterators. -* [ENHANCEMENT] Add job link on target page. -* [ENHANCEMENT] Message on empty Alerts page. -* [ENHANCEMENT] Various internal code refactorings and clean-ups. -* [ENHANCEMENT] Various improvements in the build system. -* [BUGFIX] Catch errors when unmarshaling delta/doubleDelta encoded chunks. -* [BUGFIX] Fix data race in lexer and lexer test. -* [BUGFIX] Trim stray whitespace from bearer token file. -* [BUGFIX] Avoid divide-by-zero panic on query_range?step=0. -* [BUGFIX] Detect invalid rule files at startup. -* [BUGFIX] Fix counter reset treatment in PromQL. -* [BUGFIX] Fix rule HTML escaping issues. -* [BUGFIX] Remove internal labels from alerts sent to AM. - -## 1.0.2 / 2016-08-24 - -* [BUGFIX] Clean up old targets after config reload. - -## 1.0.1 / 2016-07-21 - -* [BUGFIX] Exit with error on non-flag command-line arguments. -* [BUGFIX] Update example console templates to new HTTP API. -* [BUGFIX] Re-add logging flags. - -## 1.0.0 / 2016-07-18 - -* [CHANGE] Remove deprecated query language keywords -* [CHANGE] Change Kubernetes SD to require specifying Kubernetes role -* [CHANGE] Use service address in Consul SD if available -* [CHANGE] Standardize all Prometheus internal metrics to second units -* [CHANGE] Remove unversioned legacy HTTP API -* [CHANGE] Remove legacy ingestion of JSON metric format -* [CHANGE] Remove deprecated `target_groups` configuration -* [FEATURE] Add binary power operation to PromQL -* [FEATURE] Add `count_values` aggregator -* [FEATURE] Add `-web.route-prefix` flag -* [FEATURE] Allow `on()`, `by()`, `without()` in PromQL with empty label sets -* [ENHANCEMENT] Make `topk/bottomk` query functions aggregators -* [BUGFIX] Fix annotations in alert rule printing -* [BUGFIX] Expand alert templating at evaluation time -* [BUGFIX] Fix edge case handling in crash recovery -* [BUGFIX] Hide testing package flags from help output - -## 0.20.0 / 2016-06-15 - -This release contains multiple breaking changes to the configuration schema. - -* [FEATURE] Allow configuring multiple Alertmanagers -* [FEATURE] Add server name to TLS configuration -* [FEATURE] Add labels for all node addresses and discover node port if available in Kubernetes SD -* [ENHANCEMENT] More meaningful configuration errors -* [ENHANCEMENT] Round scraping timestamps to milliseconds in web UI -* [ENHANCEMENT] Make number of storage fingerprint locks configurable -* [BUGFIX] Fix date parsing in console template graphs -* [BUGFIX] Fix static console files in Docker images -* [BUGFIX] Fix console JS XHR requests for IE11 -* [BUGFIX] Add missing path prefix in new status page -* [CHANGE] Rename `target_groups` to `static_configs` in config files -* [CHANGE] Rename `names` to `files` in file SD configuration -* [CHANGE] Remove kubelet port config option in Kubernetes SD configuration - -## 0.19.3 / 2016-06-14 - -* [BUGFIX] Handle Marathon apps with zero ports -* [BUGFIX] Fix startup panic in retrieval layer - -## 0.19.2 / 2016-05-29 - -* [BUGFIX] Correctly handle `GROUP_LEFT` and `GROUP_RIGHT` without labels in - string representation of expressions and in rules. -* [BUGFIX] Use `-web.external-url` for new status endpoints. - -## 0.19.1 / 2016-05-25 - -* [BUGFIX] Handle service discovery panic affecting Kubernetes SD -* [BUGFIX] Fix web UI display issue in some browsers - -## 0.19.0 / 2016-05-24 - -This version contains a breaking change to the query language. Please read -the documentation on the grouping behavior of vector matching: - - - -* [FEATURE] Add experimental Microsoft Azure service discovery -* [FEATURE] Add `ignoring` modifier for binary operations -* [FEATURE] Add pod discovery to Kubernetes service discovery -* [CHANGE] Vector matching takes grouping labels from one-side -* [ENHANCEMENT] Support time range on /api/v1/series endpoint -* [ENHANCEMENT] Partition status page into individual pages -* [BUGFIX] Fix issue of hanging target scrapes - -## 0.18.0 / 2016-04-18 - -* [BUGFIX] Fix operator precedence in PromQL -* [BUGFIX] Never drop still open head chunk -* [BUGFIX] Fix missing 'keep_common' when printing AST node -* [CHANGE/BUGFIX] Target identity considers path and parameters additionally to host and port -* [CHANGE] Rename metric `prometheus_local_storage_invalid_preload_requests_total` to `prometheus_local_storage_non_existent_series_matches_total` -* [CHANGE] Support for old alerting rule syntax dropped -* [FEATURE] Deduplicate targets within the same scrape job -* [FEATURE] Add varbit chunk encoding (higher compression, more CPU usage – disabled by default) -* [FEATURE] Add `holt_winters` query function -* [FEATURE] Add relative complement `unless` operator to PromQL -* [ENHANCEMENT] Quarantine series file if data corruption is encountered (instead of crashing) -* [ENHANCEMENT] Validate Alertmanager URL -* [ENHANCEMENT] Use UTC for build timestamp -* [ENHANCEMENT] Improve index query performance (especially for active time series) -* [ENHANCEMENT] Instrument configuration reload duration -* [ENHANCEMENT] Instrument retrieval layer -* [ENHANCEMENT] Add Go version to `prometheus_build_info` metric - -## 0.17.0 / 2016-03-02 - -This version no longer works with Alertmanager 0.0.4 and earlier! -The alerting rule syntax has changed as well but the old syntax is supported -up until version 0.18. - -All regular expressions in PromQL are anchored now, matching the behavior of -regular expressions in config files. - -* [CHANGE] Integrate with Alertmanager 0.1.0 and higher -* [CHANGE] Degraded storage mode renamed to rushed mode -* [CHANGE] New alerting rule syntax -* [CHANGE] Add label validation on ingestion -* [CHANGE] Regular expression matchers in PromQL are anchored -* [FEATURE] Add `without` aggregation modifier -* [FEATURE] Send alert resolved notifications to Alertmanager -* [FEATURE] Allow millisecond precision in configuration file -* [FEATURE] Support AirBnB's Smartstack Nerve for service discovery -* [ENHANCEMENT] Storage switches less often between regular and rushed mode. -* [ENHANCEMENT] Storage switches into rushed mode if there are too many memory chunks. -* [ENHANCEMENT] Added more storage instrumentation -* [ENHANCEMENT] Improved instrumentation of notification handler -* [BUGFIX] Do not count head chunks as chunks waiting for persistence -* [BUGFIX] Handle OPTIONS HTTP requests to the API correctly -* [BUGFIX] Parsing of ranges in PromQL fixed -* [BUGFIX] Correctly validate URL flag parameters -* [BUGFIX] Log argument parse errors -* [BUGFIX] Properly handle creation of target with bad TLS config -* [BUGFIX] Fix of checkpoint timing issue - -## 0.16.2 / 2016-01-18 - -* [FEATURE] Multiple authentication options for EC2 discovery added -* [FEATURE] Several meta labels for EC2 discovery added -* [FEATURE] Allow full URLs in static target groups (used e.g. by the `blackbox_exporter`) -* [FEATURE] Add Graphite remote-storage integration -* [FEATURE] Create separate Kubernetes targets for services and their endpoints -* [FEATURE] Add `clamp_{min,max}` functions to PromQL -* [FEATURE] Omitted time parameter in API query defaults to now -* [ENHANCEMENT] Less frequent time series file truncation -* [ENHANCEMENT] Instrument number of manually deleted time series -* [ENHANCEMENT] Ignore lost+found directory during storage version detection -* [CHANGE] Kubernetes `masters` renamed to `api_servers` -* [CHANGE] "Healthy" and "unhealthy" targets are now called "up" and "down" in the web UI -* [CHANGE] Remove undocumented 2nd argument of the `delta` function. - (This is a BREAKING CHANGE for users of the undocumented 2nd argument.) -* [BUGFIX] Return proper HTTP status codes on API errors -* [BUGFIX] Fix Kubernetes authentication configuration -* [BUGFIX] Fix stripped OFFSET from in rule evaluation and display -* [BUGFIX] Do not crash on failing Consul SD initialization -* [BUGFIX] Revert changes to metric auto-completion -* [BUGFIX] Add config overflow validation for TLS configuration -* [BUGFIX] Skip already watched Zookeeper nodes in serverset SD -* [BUGFIX] Don't federate stale samples -* [BUGFIX] Move NaN to end of result for `topk/bottomk/sort/sort_desc/min/max` -* [BUGFIX] Limit extrapolation of `delta/rate/increase` -* [BUGFIX] Fix unhandled error in rule evaluation - -Some changes to the Kubernetes service discovery were integration since -it was released as a beta feature. - -## 0.16.1 / 2015-10-16 - -* [FEATURE] Add `irate()` function. -* [ENHANCEMENT] Improved auto-completion in expression browser. -* [CHANGE] Kubernetes SD moves node label to instance label. -* [BUGFIX] Escape regexes in console templates. - -## 0.16.0 / 2015-10-09 - -BREAKING CHANGES: - -* Release tarballs now contain the built binaries in a nested directory. -* The `hash_mod` relabeling action now uses MD5 hashes instead of FNV hashes to - achieve a better distribution. -* The DNS-SD meta label `__meta_dns_srv_name` was renamed to `__meta_dns_name` - to reflect support for DNS record types other than `SRV`. -* The default full refresh interval for the file-based service discovery has been - increased from 30 seconds to 5 minutes. -* In relabeling, parts of a source label that weren't matched by - the specified regular expression are no longer included in the replacement - output. -* Queries no longer interpolate between two data points. Instead, the resulting - value will always be the latest value before the evaluation query timestamp. -* Regular expressions supplied via the configuration are now anchored to match - full strings instead of substrings. -* Global labels are not appended upon storing time series anymore. Instead, - they are only appended when communicating with external systems - (Alertmanager, remote storages, federation). They have thus also been renamed - from `global.labels` to `global.external_labels`. -* The names and units of metrics related to remote storage sample appends have - been changed. -* The experimental support for writing to InfluxDB has been updated to work - with InfluxDB 0.9.x. 0.8.x versions of InfluxDB are not supported anymore. -* Escape sequences in double- and single-quoted string literals in rules or query - expressions are now interpreted like escape sequences in Go string literals - (). - -Future breaking changes / deprecated features: - -* The `delta()` function had an undocumented optional second boolean argument - to make it behave like `increase()`. This second argument will be removed in - the future. Migrate any occurrences of `delta(x, 1)` to use `increase(x)` - instead. -* Support for filter operators between two scalar values (like `2 > 1`) will be - removed in the future. These will require a `bool` modifier on the operator, - e.g. `2 > bool 1`. - -All changes: - -* [CHANGE] Renamed `global.labels` to `global.external_labels`. -* [CHANGE] Vendoring is now done via govendor instead of godep. -* [CHANGE] Change web UI root page to show the graphing interface instead of - the server status page. -* [CHANGE] Append global labels only when communicating with external systems - instead of storing them locally. -* [CHANGE] Change all regexes in the configuration to do full-string matches - instead of substring matches. -* [CHANGE] Remove interpolation of vector values in queries. -* [CHANGE] For alert `SUMMARY`/`DESCRIPTION` template fields, cast the alert - value to `float64` to work with common templating functions. -* [CHANGE] In relabeling, don't include unmatched source label parts in the - replacement. -* [CHANGE] Change default full refresh interval for the file-based service - discovery from 30 seconds to 5 minutes. -* [CHANGE] Rename the DNS-SD meta label `__meta_dns_srv_name` to - `__meta_dns_name` to reflect support for other record types than `SRV`. -* [CHANGE] Release tarballs now contain the binaries in a nested directory. -* [CHANGE] Update InfluxDB write support to work with InfluxDB 0.9.x. -* [FEATURE] Support full "Go-style" escape sequences in strings and add raw - string literals. -* [FEATURE] Add EC2 service discovery support. -* [FEATURE] Allow configuring TLS options in scrape configurations. -* [FEATURE] Add instrumentation around configuration reloads. -* [FEATURE] Add `bool` modifier to comparison operators to enable boolean - (`0`/`1`) output instead of filtering. -* [FEATURE] In Zookeeper serverset discovery, provide `__meta_serverset_shard` - label with the serverset shard number. -* [FEATURE] Provide `__meta_consul_service_id` meta label in Consul service - discovery. -* [FEATURE] Allow scalar expressions in recording rules to enable use cases - such as building constant metrics. -* [FEATURE] Add `label_replace()` and `vector()` query language functions. -* [FEATURE] In Consul service discovery, fill in the `__meta_consul_dc` - datacenter label from the Consul agent when it's not set in the Consul SD - config. -* [FEATURE] Scrape all services upon empty services list in Consul service - discovery. -* [FEATURE] Add `labelmap` relabeling action to map a set of input labels to a - set of output labels using regular expressions. -* [FEATURE] Introduce `__tmp` as a relabeling label prefix that is guaranteed - to not be used by Prometheus internally. -* [FEATURE] Kubernetes-based service discovery. -* [FEATURE] Marathon-based service discovery. -* [FEATURE] Support multiple series names in console graphs JavaScript library. -* [FEATURE] Allow reloading configuration via web handler at `/-/reload`. -* [FEATURE] Updates to promtool to reflect new Prometheus configuration - features. -* [FEATURE] Add `proxy_url` parameter to scrape configurations to enable use of - proxy servers. -* [FEATURE] Add console templates for Prometheus itself. -* [FEATURE] Allow relabeling the protocol scheme of targets. -* [FEATURE] Add `predict_linear()` query language function. -* [FEATURE] Support for authentication using bearer tokens, client certs, and - CA certs. -* [FEATURE] Implement unary expressions for vector types (`-foo`, `+foo`). -* [FEATURE] Add console templates for the SNMP exporter. -* [FEATURE] Make it possible to relabel target scrape query parameters. -* [FEATURE] Add support for `A` and `AAAA` records in DNS service discovery. -* [ENHANCEMENT] Fix several flaky tests. -* [ENHANCEMENT] Switch to common routing package. -* [ENHANCEMENT] Use more resilient metric decoder. -* [ENHANCEMENT] Update vendored dependencies. -* [ENHANCEMENT] Add compression to more HTTP handlers. -* [ENHANCEMENT] Make -web.external-url flag help string more verbose. -* [ENHANCEMENT] Improve metrics around remote storage queues. -* [ENHANCEMENT] Use Go 1.5.1 instead of Go 1.4.2 in builds. -* [ENHANCEMENT] Update the architecture diagram in the `README.md`. -* [ENHANCEMENT] Time out sample appends in retrieval layer if the storage is - backlogging. -* [ENHANCEMENT] Make `hash_mod` relabeling action use MD5 instead of FNV to - enable better hash distribution. -* [ENHANCEMENT] Better tracking of targets between same service discovery - mechanisms in one scrape configuration. -* [ENHANCEMENT] Handle parser and query evaluation runtime panics more - gracefully. -* [ENHANCEMENT] Add IDs to H2 tags on status page to allow anchored linking. -* [BUGFIX] Fix watching multiple paths with Zookeeper serverset discovery. -* [BUGFIX] Fix high CPU usage on configuration reload. -* [BUGFIX] Fix disappearing `__params` on configuration reload. -* [BUGFIX] Make `labelmap` action available through configuration. -* [BUGFIX] Fix direct access of protobuf fields. -* [BUGFIX] Fix panic on Consul request error. -* [BUGFIX] Redirect of graph endpoint for prefixed setups. -* [BUGFIX] Fix series file deletion behavior when purging archived series. -* [BUGFIX] Fix error checking and logging around checkpointing. -* [BUGFIX] Fix map initialization in target manager. -* [BUGFIX] Fix draining of file watcher events in file-based service discovery. -* [BUGFIX] Add `POST` handler for `/debug` endpoints to fix CPU profiling. -* [BUGFIX] Fix several flaky tests. -* [BUGFIX] Fix busylooping in case a scrape configuration has no target - providers defined. -* [BUGFIX] Fix exit behavior of static target provider. -* [BUGFIX] Fix configuration reloading loop upon shutdown. -* [BUGFIX] Add missing check for nil expression in expression parser. -* [BUGFIX] Fix error handling bug in test code. -* [BUGFIX] Fix Consul port meta label. -* [BUGFIX] Fix lexer bug that treated non-Latin Unicode digits as digits. -* [CLEANUP] Remove obsolete federation example from console templates. -* [CLEANUP] Remove duplicated Bootstrap JS inclusion on graph page. -* [CLEANUP] Switch to common log package. -* [CLEANUP] Update build environment scripts and Makefiles to work better with - native Go build mechanisms and new Go 1.5 experimental vendoring support. -* [CLEANUP] Remove logged notice about 0.14.x configuration file format change. -* [CLEANUP] Move scrape-time metric label modification into SampleAppenders. -* [CLEANUP] Switch from `github.com/client_golang/model` to - `github.com/common/model` and related type cleanups. -* [CLEANUP] Switch from `github.com/client_golang/extraction` to - `github.com/common/expfmt` and related type cleanups. -* [CLEANUP] Exit Prometheus when the web server encounters a startup error. -* [CLEANUP] Remove non-functional alert-silencing links on alerting page. -* [CLEANUP] General cleanups to comments and code, derived from `golint`, - `go vet`, or otherwise. -* [CLEANUP] When entering crash recovery, tell users how to cleanly shut down - Prometheus. -* [CLEANUP] Remove internal support for multi-statement queries in query engine. -* [CLEANUP] Update AUTHORS.md. -* [CLEANUP] Don't warn/increment metric upon encountering equal timestamps for - the same series upon append. -* [CLEANUP] Resolve relative paths during configuration loading. - -## 0.15.1 / 2015-07-27 - -* [BUGFIX] Fix vector matching behavior when there is a mix of equality and - non-equality matchers in a vector selector and one matcher matches no series. -* [ENHANCEMENT] Allow overriding `GOARCH` and `GOOS` in Makefile.INCLUDE. -* [ENHANCEMENT] Update vendored dependencies. - -## 0.15.0 / 2015-07-21 - -BREAKING CHANGES: - -* Relative paths for rule files are now evaluated relative to the config file. -* External reachability flags (`-web.*`) consolidated. -* The default storage directory has been changed from `/tmp/metrics` - to `data` in the local directory. -* The `rule_checker` tool has been replaced by `promtool` with - different flags and more functionality. -* Empty labels are now removed upon ingestion into the - storage. Matching empty labels is now equivalent to matching unset - labels (`mymetric{label=""}` now matches series that don't have - `label` set at all). -* The special `__meta_consul_tags` label in Consul service discovery - now starts and ends with tag separators to enable easier regex - matching. -* The default scrape interval has been changed back from 1 minute to - 10 seconds. - -All changes: - -* [CHANGE] Change default storage directory to `data` in the current - working directory. -* [CHANGE] Consolidate external reachability flags (`-web.*`)into one. -* [CHANGE] Deprecate `keeping_extra` modifier keyword, rename it to - `keep_common`. -* [CHANGE] Improve label matching performance and treat unset labels - like empty labels in label matchers. -* [CHANGE] Remove `rule_checker` tool and add generic `promtool` CLI - tool which allows checking rules and configuration files. -* [CHANGE] Resolve rule files relative to config file. -* [CHANGE] Restore default ScrapeInterval of 1 minute instead of 10 seconds. -* [CHANGE] Surround `__meta_consul_tags` value with tag separators. -* [CHANGE] Update node disk console for new filesystem labels. -* [FEATURE] Add Consul's `ServiceAddress`, `Address`, and `ServicePort` as - meta labels to enable setting a custom scrape address if needed. -* [FEATURE] Add `hashmod` relabel action to allow for horizontal - sharding of Prometheus servers. -* [FEATURE] Add `honor_labels` scrape configuration option to not - overwrite any labels exposed by the target. -* [FEATURE] Add basic federation support on `/federate`. -* [FEATURE] Add optional `RUNBOOK` field to alert statements. -* [FEATURE] Add pre-relabel target labels to status page. -* [FEATURE] Add version information endpoint under `/version`. -* [FEATURE] Added initial stable API version 1 under `/api/v1`, - including ability to delete series and query more metadata. -* [FEATURE] Allow configuring query parameters when scraping metrics endpoints. -* [FEATURE] Allow deleting time series via the new v1 API. -* [FEATURE] Allow individual ingested metrics to be relabeled. -* [FEATURE] Allow loading rule files from an entire directory. -* [FEATURE] Allow scalar expressions in range queries, improve error messages. -* [FEATURE] Support Zookeeper Serversets as a service discovery mechanism. -* [ENHANCEMENT] Add circleci yaml for Dockerfile test build. -* [ENHANCEMENT] Always show selected graph range, regardless of available data. -* [ENHANCEMENT] Change expression input field to multi-line textarea. -* [ENHANCEMENT] Enforce strict monotonicity of time stamps within a series. -* [ENHANCEMENT] Export build information as metric. -* [ENHANCEMENT] Improve UI of `/alerts` page. -* [ENHANCEMENT] Improve display of target labels on status page. -* [ENHANCEMENT] Improve initialization and routing functionality of web service. -* [ENHANCEMENT] Improve target URL handling and display. -* [ENHANCEMENT] New dockerfile using alpine-glibc base image and make. -* [ENHANCEMENT] Other minor fixes. -* [ENHANCEMENT] Preserve alert state across reloads. -* [ENHANCEMENT] Prettify flag help output even more. -* [ENHANCEMENT] README.md updates. -* [ENHANCEMENT] Raise error on unknown config parameters. -* [ENHANCEMENT] Refine v1 HTTP API output. -* [ENHANCEMENT] Show original configuration file contents on status - page instead of serialized YAML. -* [ENHANCEMENT] Start HUP signal handler earlier to not exit upon HUP - during startup. -* [ENHANCEMENT] Updated vendored dependencies. -* [BUGFIX] Do not panic in `StringToDuration()` on wrong duration unit. -* [BUGFIX] Exit on invalid rule files on startup. -* [BUGFIX] Fix a regression in the `.Path` console template variable. -* [BUGFIX] Fix chunk descriptor loading. -* [BUGFIX] Fix consoles "Prometheus" link to point to / -* [BUGFIX] Fix empty configuration file cases -* [BUGFIX] Fix float to int conversions in chunk encoding, which were - broken for some architectures. -* [BUGFIX] Fix overflow detection for serverset config. -* [BUGFIX] Fix race conditions in retrieval layer. -* [BUGFIX] Fix shutdown deadlock in Consul SD code. -* [BUGFIX] Fix the race condition targets in the Makefile. -* [BUGFIX] Fix value display error in web console. -* [BUGFIX] Hide authentication credentials in config `String()` output. -* [BUGFIX] Increment dirty counter metric in storage only if - `setDirty(true)` is called. -* [BUGFIX] Periodically refresh services in Consul to recover from - missing events. -* [BUGFIX] Prevent overwrite of default global config when loading a - configuration. -* [BUGFIX] Properly lex `\r` as whitespace in expression language. -* [BUGFIX] Validate label names in JSON target groups. -* [BUGFIX] Validate presence of regex field in relabeling configurations. -* [CLEANUP] Clean up initialization of remote storage queues. -* [CLEANUP] Fix `go vet` and `golint` violations. -* [CLEANUP] General cleanup of rules and query language code. -* [CLEANUP] Improve and simplify Dockerfile build steps. -* [CLEANUP] Improve and simplify build infrastructure, use go-bindata - for web assets. Allow building without git. -* [CLEANUP] Move all utility packages into common `util` subdirectory. -* [CLEANUP] Refactor main, flag handling, and web package. -* [CLEANUP] Remove unused methods from `Rule` interface. -* [CLEANUP] Simplify default config handling. -* [CLEANUP] Switch human-readable times on web UI to UTC. -* [CLEANUP] Use `templates.TemplateExpander` for all page templates. -* [CLEANUP] Use new v1 HTTP API for querying and graphing. - -## 0.14.0 / 2015-06-01 - -* [CHANGE] Configuration format changed and switched to YAML. - (See the provided [migration tool](https://github.com/prometheus/migrate/releases).) -* [ENHANCEMENT] Redesign of state-preserving target discovery. -* [ENHANCEMENT] Allow specifying scrape URL scheme and basic HTTP auth for non-static targets. -* [FEATURE] Allow attaching meaningful labels to targets via relabeling. -* [FEATURE] Configuration/rule reloading at runtime. -* [FEATURE] Target discovery via file watches. -* [FEATURE] Target discovery via Consul. -* [ENHANCEMENT] Simplified binary operation evaluation. -* [ENHANCEMENT] More stable component initialization. -* [ENHANCEMENT] Added internal expression testing language. -* [BUGFIX] Fix graph links with path prefix. -* [ENHANCEMENT] Allow building from source without git. -* [ENHANCEMENT] Improve storage iterator performance. -* [ENHANCEMENT] Change logging output format and flags. -* [BUGFIX] Fix memory alignment bug for 32bit systems. -* [ENHANCEMENT] Improve web redirection behavior. -* [ENHANCEMENT] Allow overriding default hostname for Prometheus URLs. -* [BUGFIX] Fix double slash in URL sent to alertmanager. -* [FEATURE] Add resets() query function to count counter resets. -* [FEATURE] Add changes() query function to count the number of times a gauge changed. -* [FEATURE] Add increase() query function to calculate a counter's increase. -* [ENHANCEMENT] Limit retrievable samples to the storage's retention window. - -## 0.13.4 / 2015-05-23 - -* [BUGFIX] Fix a race while checkpointing fingerprint mappings. - -## 0.13.3 / 2015-05-11 - -* [BUGFIX] Handle fingerprint collisions properly. -* [CHANGE] Comments in rules file must start with `#`. (The undocumented `//` - and `/*...*/` comment styles are no longer supported.) -* [ENHANCEMENT] Switch to custom expression language parser and evaluation - engine, which generates better error messages, fixes some parsing edge-cases, - and enables other future enhancements (like the ones below). -* [ENHANCEMENT] Limit maximum number of concurrent queries. -* [ENHANCEMENT] Terminate running queries during shutdown. - -## 0.13.2 / 2015-05-05 - -* [MAINTENANCE] Updated vendored dependencies to their newest versions. -* [MAINTENANCE] Include rule_checker and console templates in release tarball. -* [BUGFIX] Sort NaN as the lowest value. -* [ENHANCEMENT] Add square root, stddev and stdvar functions. -* [BUGFIX] Use scrape_timeout for scrape timeout, not scrape_interval. -* [ENHANCEMENT] Improve chunk and chunkDesc loading, increase performance when - reading from disk. -* [BUGFIX] Show correct error on wrong DNS response. - -## 0.13.1 / 2015-04-09 - -* [BUGFIX] Treat memory series with zero chunks correctly in series maintenance. -* [ENHANCEMENT] Improve readability of usage text even more. - -## 0.13.0 / 2015-04-08 - -* [ENHANCEMENT] Double-delta encoding for chunks, saving typically 40% of - space, both in RAM and on disk. -* [ENHANCEMENT] Redesign of chunk persistence queuing, increasing performance - on spinning disks significantly. -* [ENHANCEMENT] Redesign of sample ingestion, increasing ingestion performance. -* [FEATURE] Added ln, log2, log10 and exp functions to the query language. -* [FEATURE] Experimental write support to InfluxDB. -* [FEATURE] Allow custom timestamps in instant query API. -* [FEATURE] Configurable path prefix for URLs to support proxies. -* [ENHANCEMENT] Increase of rule_checker CLI usability. -* [CHANGE] Show special float values as gaps. -* [ENHANCEMENT] Made usage output more readable. -* [ENHANCEMENT] Increased resilience of the storage against data corruption. -* [ENHANCEMENT] Various improvements around chunk encoding. -* [ENHANCEMENT] Nicer formatting of target health table on /status. -* [CHANGE] Rename UNREACHABLE to UNHEALTHY, ALIVE to HEALTHY. -* [BUGFIX] Strip trailing slash in alertmanager URL. -* [BUGFIX] Avoid +InfYs and similar, just display +Inf. -* [BUGFIX] Fixed HTML-escaping at various places. -* [BUGFIX] Fixed special value handling in division and modulo of the query - language. -* [BUGFIX] Fix embed-static.sh. -* [CLEANUP] Added initial HTTP API tests. -* [CLEANUP] Misc. other code cleanups. -* [MAINTENANCE] Updated vendored dependencies to their newest versions. - -## 0.12.0 / 2015-03-04 - -* [CHANGE] Use client_golang v0.3.1. THIS CHANGES FINGERPRINTING AND INVALIDATES - ALL PERSISTED FINGERPRINTS. You have to wipe your storage to use this or - later versions. There is a version guard in place that will prevent you to - run Prometheus with the stored data of an older Prometheus. -* [BUGFIX] The change above fixes a weakness in the fingerprinting algorithm. -* [ENHANCEMENT] The change above makes fingerprinting faster and less allocation - intensive. -* [FEATURE] OR operator and vector matching options. See docs for details. -* [ENHANCEMENT] Scientific notation and special float values (Inf, NaN) now - supported by the expression language. -* [CHANGE] Dockerfile makes Prometheus use the Docker volume to store data - (rather than /tmp/metrics). -* [CHANGE] Makefile uses Go 1.4.2. - -## 0.11.1 / 2015-02-27 - -* [BUGFIX] Make series maintenance complete again. (Ever since 0.9.0rc4, - or commit 0851945, series would not be archived, chunk descriptors would - not be evicted, and stale head chunks would never be closed. This happened - due to accidental deletion of a line calling a (well tested :) function. -* [BUGFIX] Do not double count head chunks read from checkpoint on startup. - Also fix a related but less severe bug in counting chunk descriptors. -* [BUGFIX] Check last time in head chunk for head chunk timeout, not first. -* [CHANGE] Update vendoring due to vendoring changes in client_golang. -* [CLEANUP] Code cleanups. -* [ENHANCEMENT] Limit the number of 'dirty' series counted during checkpointing. - -## 0.11.0 / 2015-02-23 - -* [FEATURE] Introduce new metric type Histogram with server-side aggregation. -* [FEATURE] Add offset operator. -* [FEATURE] Add floor, ceil and round functions. -* [CHANGE] Change instance identifiers to be host:port. -* [CHANGE] Dependency management and vendoring changed/improved. -* [CHANGE] Flag name changes to create consistency between various Prometheus - binaries. -* [CHANGE] Show unlimited number of metrics in autocomplete. -* [CHANGE] Add query timeout. -* [CHANGE] Remove labels on persist error counter. -* [ENHANCEMENT] Various performance improvements for sample ingestion. -* [ENHANCEMENT] Various Makefile improvements. -* [ENHANCEMENT] Various console template improvements, including - proof-of-concept for federation via console templates. -* [ENHANCEMENT] Fix graph JS glitches and simplify graphing code. -* [ENHANCEMENT] Dramatically decrease resources for file embedding. -* [ENHANCEMENT] Crash recovery saves lost series data in 'orphaned' directory. -* [BUGFIX] Fix aggregation grouping key calculation. -* [BUGFIX] Fix Go download path for various architectures. -* [BUGFIX] Fixed the link of the Travis build status image. -* [BUGFIX] Fix Rickshaw/D3 version mismatch. -* [CLEANUP] Various code cleanups. - -## 0.10.0 / 2015-01-26 - -* [CHANGE] More efficient JSON result format in query API. This requires - up-to-date versions of PromDash and prometheus_cli, too. -* [ENHANCEMENT] Excluded non-minified Bootstrap assets and the Bootstrap maps - from embedding into the binary. Those files are only used for debugging, - and then you can use -web.use-local-assets. By including fewer files, the - RAM usage during compilation is much more manageable. -* [ENHANCEMENT] Help link points to now. -* [FEATURE] Consoles for haproxy and cloudwatch. -* [BUGFIX] Several fixes to graphs in consoles. -* [CLEANUP] Removed a file size check that did not check anything. - -## 0.9.0 / 2015-01-23 - -* [CHANGE] Reworked command line flags, now more consistent and taking into - account needs of the new storage backend (see below). -* [CHANGE] Metric names are dropped after certain transformations. -* [CHANGE] Changed partitioning of summary metrics exported by Prometheus. -* [CHANGE] Got rid of Gerrit as a review tool. -* [CHANGE] 'Tabular' view now the default (rather than 'Graph') to avoid - running very expensive queries accidentally. -* [CHANGE] On-disk format for stored samples changed. For upgrading, you have - to nuke your old files completely. See "Complete rewrite of the storage -* [CHANGE] Removed 2nd argument from `delta`. -* [FEATURE] Added a `deriv` function. -* [FEATURE] Console templates. -* [FEATURE] Added `absent` function. -* [FEATURE] Allow omitting the metric name in queries. -* [BUGFIX] Removed all known race conditions. -* [BUGFIX] Metric mutations now handled correctly in all cases. -* [ENHANCEMENT] Proper double-start protection. -* [ENHANCEMENT] Complete rewrite of the storage layer. Benefits include: - * Better query performance. - * More samples in less RAM. - * Better memory management. - * Scales up to millions of time series and thousands of samples ingested - per second. - * Purging of obsolete samples much cleaner now, up to completely - "forgetting" obsolete time series. - * Proper instrumentation to diagnose the storage layer with... well... - Prometheus. - * Pure Go implementation, no need for cgo and shared C libraries anymore. - * Better concurrency. -* [ENHANCEMENT] Copy-on-write semantics in the AST layer. -* [ENHANCEMENT] Switched from Go 1.3 to Go 1.4. -* [ENHANCEMENT] Vendored external dependencies with godeps. -* [ENHANCEMENT] Numerous Web UI improvements, moved to Bootstrap3 and - Rickshaw 1.5.1. -* [ENHANCEMENT] Improved Docker integration. -* [ENHANCEMENT] Simplified the Makefile contraption. -* [CLEANUP] Put meta-data files into proper shape (LICENSE, README.md etc.) -* [CLEANUP] Removed all legitimate 'go vet' and 'golint' warnings. -* [CLEANUP] Removed dead code. - -## 0.8.0 / 2014-09-04 - -* [ENHANCEMENT] Stagger scrapes to spread out load. -* [BUGFIX] Correctly quote HTTP Accept header. - -## 0.7.0 / 2014-08-06 - -* [FEATURE] Added new functions: abs(), topk(), bottomk(), drop_common_labels(). -* [FEATURE] Let console templates get graph links from expressions. -* [FEATURE] Allow console templates to dynamically include other templates. -* [FEATURE] Template consoles now have access to their URL. -* [BUGFIX] Fixed time() function to return evaluation time, not wallclock time. -* [BUGFIX] Fixed HTTP connection leak when targets returned a non-200 status. -* [BUGFIX] Fixed link to console templates in UI. -* [PERFORMANCE] Removed extra memory copies while scraping targets. -* [ENHANCEMENT] Switched from Go 1.2.1 to Go 1.3. -* [ENHANCEMENT] Made metrics exported by Prometheus itself more consistent. -* [ENHANCEMENT] Removed incremental backoffs for unhealthy targets. -* [ENHANCEMENT] Dockerfile also builds Prometheus support tools now. - -## 0.6.0 / 2014-06-30 - -* [FEATURE] Added console and alert templates support, along with various template functions. -* [PERFORMANCE] Much faster and more memory-efficient flushing to disk. -* [ENHANCEMENT] Query results are now only logged when debugging. -* [ENHANCEMENT] Upgraded to new Prometheus client library for exposing metrics. -* [BUGFIX] Samples are now kept in memory until fully flushed to disk. -* [BUGFIX] Non-200 target scrapes are now treated as an error. -* [BUGFIX] Added installation step for missing dependency to Dockerfile. -* [BUGFIX] Removed broken and unused "User Dashboard" link. - -## 0.5.0 / 2014-05-28 - -* [BUGFIX] Fixed next retrieval time display on status page. -* [BUGFIX] Updated some variable references in tools subdir. -* [FEATURE] Added support for scraping metrics via the new text format. -* [PERFORMANCE] Improved label matcher performance. -* [PERFORMANCE] Removed JSON indentation in query API, leading to smaller response sizes. -* [ENHANCEMENT] Added internal check to verify temporal order of streams. -* [ENHANCEMENT] Some internal refactorings. - -## 0.4.0 / 2014-04-17 - -* [FEATURE] Vectors and scalars may now be reversed in binary operations (` `). -* [FEATURE] It's possible to shutdown Prometheus via a `/-/quit` web endpoint now. -* [BUGFIX] Fix for a deadlock race condition in the memory storage. -* [BUGFIX] Mac OS X build fixed. -* [BUGFIX] Built from Go 1.2.1, which has internal fixes to race conditions in garbage collection handling. -* [ENHANCEMENT] Internal storage interface refactoring that allows building e.g. the `rule_checker` tool without LevelDB dynamic library dependencies. -* [ENHANCEMENT] Cleanups around shutdown handling. -* [PERFORMANCE] Preparations for better memory reuse during marshaling / unmarshaling. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9b1b286ccf..37ae0a4471 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,102 +1,33 @@ -# Contributing +# How to Contribute -Prometheus uses GitHub to manage reviews of pull requests. +We'd love to accept your patches and contributions to this project. -* If you are a new contributor see: [Steps to Contribute](#steps-to-contribute) +## Before you begin -* If you have a trivial fix or improvement, go ahead and create a pull request, - addressing (with `@...`) a suitable maintainer of this repository (see - [MAINTAINERS.md](MAINTAINERS.md)) in the description of the pull request. +### Sign our Contributor License Agreement -* If you plan to do something more involved, first discuss your ideas - on our [mailing list](https://groups.google.com/forum/?fromgroups#!forum/prometheus-developers). - This will avoid unnecessary work and surely give you and us a good deal - of inspiration. Also please see our [non-goals issue](https://github.com/prometheus/docs/issues/149) on areas that the Prometheus community doesn't plan to work on. +Contributions to this project must be accompanied by a +[Contributor License Agreement](https://cla.developers.google.com/about) (CLA). +You (or your employer) retain the copyright to your contribution; this simply +gives us permission to use and redistribute your contributions as part of the +project. -* Relevant coding style guidelines are the [Go Code Review - Comments](https://code.google.com/p/go-wiki/wiki/CodeReviewComments) - and the _Formatting and style_ section of Peter Bourgon's [Go: Best - Practices for Production - Environments](https://peter.bourgon.org/go-in-production/#formatting-and-style). +If you or your current employer have already signed the Google CLA (even if it +was for a different project), you probably don't need to do it again. -* Be sure to sign off on the [DCO](https://github.com/probot/dco#how-it-works). +Visit to see your current agreements or to +sign a new one. -## Steps to Contribute +### Review our Community Guidelines -Should you wish to work on an issue, please claim it first by commenting on the GitHub issue that you want to work on it. This is to prevent duplicated efforts from contributors on the same issue. +This project follows +[Google's Open Source Community Guidelines](https://opensource.google/conduct/). -Please check the [`low-hanging-fruit`](https://github.com/prometheus/prometheus/issues?q=is%3Aissue+is%3Aopen+label%3A%22low+hanging+fruit%22) label to find issues that are good for getting started. If you have questions about one of the issues, with or without the tag, please comment on them and one of the maintainers will clarify it. For a quicker response, contact us over [IRC](https://prometheus.io/community). +## Contribution process -You can [spin up a prebuilt dev environment](https://gitpod.io/#https://github.com/prometheus/prometheus) using Gitpod.io. +### Code Reviews -For complete instructions on how to compile see: [Building From Source](https://github.com/prometheus/prometheus#building-from-source) - -For quickly compiling and testing your changes do: - -```bash -# For building. -go build ./cmd/prometheus/ -./prometheus - -# For testing. -make test # Make sure all the tests pass before you commit and push :) -``` - -To run a collection of Go linters through [`golangci-lint`](https://github.com/golangci/golangci-lint), do: -```bash -make lint -``` - -If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. See [this section of the golangci-lint documentation](https://golangci-lint.run/usage/false-positives/#nolint-directive) for more information. - -All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labeling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions). - -## Pull Request Checklist - -* Branch from the main branch and, if needed, rebase to the current main branch before submitting your pull request. If it doesn't merge cleanly with main you may be asked to rebase your changes. - -* Commits should be as small as possible, while ensuring that each commit is correct independently (i.e., each commit should compile and pass tests). - -* If your patch is not getting reviewed or you need a specific person to review it, you can @-reply a reviewer asking for a review in the pull request or a comment, or you can ask for a review on the IRC channel [#prometheus-dev](https://web.libera.chat/?channels=#prometheus-dev) on irc.libera.chat (for the easiest start, [join via Element](https://app.element.io/#/room/#prometheus-dev:matrix.org)). - -* Add tests relevant to the fixed bug or new feature. - -## Dependency management - -The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. - -To add or update a new dependency, use the `go get` command: - -```bash -# Pick the latest tagged release. -go get example.com/some/module/pkg@latest - -# Pick a specific version. -go get example.com/some/module/pkg@vX.Y.Z -``` - -Tidy up the `go.mod` and `go.sum` files: - -```bash -# The GO111MODULE variable can be omitted when the code isn't located in GOPATH. -GO111MODULE=on go mod tidy -``` - -You have to commit the changes to `go.mod` and `go.sum` before submitting the pull request. - -## Working with the PromQL parser - -The PromQL parser grammar is located in `promql/parser/generated_parser.y` and it can be built using `make parser`. -The parser is built using [goyacc](https://pkg.go.dev/golang.org/x/tools/cmd/goyacc) - -If doing some sort of debugging, then it is possible to add some verbose output. After generating the parser, then you -can modify the `./promql/parser/generated_parser.y.go` manually. - -```golang -// As of writing this was somewhere around line 600. -var ( - yyDebug = 0 // This can be a number 0 -> 5. - yyErrorVerbose = false // This can be set to true. -) - -``` +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. diff --git a/Dockerfile b/Dockerfile index 31e863d8a0..941415cf7f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,24 +1,69 @@ -ARG ARCH="amd64" -ARG OS="linux" -FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest -LABEL maintainer="The Prometheus Authors " -LABEL org.opencontainers.image.source="https://github.com/prometheus/prometheus" - -ARG ARCH="amd64" -ARG OS="linux" -COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus -COPY .build/${OS}-${ARCH}/promtool /bin/promtool +ARG IMAGE_BUILD_NODEJS=launcher.gcr.io/google/nodejs +ARG IMAGE_BUILD_GO=google-go.pkg.dev/golang:1.24.5@sha256:579ae701e259b1bb4200e2f5713751c62401220277df267c6914e06df12f1a9b + +ARG IMAGE_BASE_DEBUG=gcr.io/distroless/static-debian12:debug +ARG IMAGE_BASE=gcr.io/distroless/static-debian12:nonroot + +FROM ${IMAGE_BUILD_GO} AS gobase + +# Compile the UI assets. +FROM ${IMAGE_BUILD_NODEJS} AS assets +# To build the UI we need a recent node version and the go toolchain. +RUN install_node v17.9.0 +COPY --from=gobase /usr/local/go /usr/local/ +ENV PATH="/usr/local/go/bin:${PATH}" +WORKDIR /app +COPY . ./ +RUN pwd +# Only build the UI but don't run ui-install as we vendor node_modules. +RUN make ui-build +RUN scripts/compress_assets.sh +RUN make npm_licenses + +# Build the actual Go binary. +FROM gobase AS buildbase +WORKDIR /app +COPY --from=assets /app ./ +ENV GOEXPERIMENT=noboringcrypto +ENV CGO_ENABLED=0 +ENV GOFIPS140=latest +RUN CGO_ENABLED=0 go build \ + -tags builtinassets -mod=vendor \ + -ldflags="-X github.com/prometheus/common/version.Version=$(cat VERSION) \ + -X github.com/prometheus/common/version.BuildDate=$(date --iso-8601=seconds)" \ + ./cmd/prometheus +RUN CGO_ENABLED=0 go build \ + -mod=vendor \ + -ldflags="-X github.com/prometheus/common/version.Version=$(cat VERSION) \ + -X github.com/prometheus/common/version.BuildDate=$(date --iso-8601=seconds)" \ + ./cmd/promtool + +# Configure distroless base image like the upstream Prometheus image. +# Since the directory and symlink setup needs shell access, we need yet another +# intermediate stage. +FROM ${IMAGE_BASE_DEBUG} AS appbase + COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml -COPY LICENSE /LICENSE -COPY NOTICE /NOTICE -COPY npm_licenses.tar.bz2 /npm_licenses.tar.bz2 +COPY console_libraries/ /usr/share/prometheus/console_libraries/ +COPY consoles/ /usr/share/prometheus/consoles/ +RUN ["/busybox/sh", "-c", "ln -s /usr/share/prometheus/console_libraries /usr/share/prometheus/consoles/ /etc/prometheus/"] +RUN ["/busybox/sh", "-c", "mkdir -p /prometheus"] + +FROM ${IMAGE_BASE} -WORKDIR /prometheus -RUN chown -R nobody:nobody /etc/prometheus /prometheus && chmod g+w /prometheus +COPY --from=buildbase /app/prometheus /bin/prometheus +COPY --from=appbase --chown=nobody:nobody /etc/prometheus /etc/prometheus +COPY --from=appbase --chown=nobody:nobody /prometheus /prometheus +COPY --from=appbase /usr/share/prometheus /usr/share/prometheus +COPY LICENSE /LICENSE +COPY NOTICE /NOTICE +COPY --from=assets /app/npm_licenses.tar.bz2 /npm_licenses.tar.bz2 USER nobody EXPOSE 9090 VOLUME [ "/prometheus" ] ENTRYPOINT [ "/bin/prometheus" ] CMD [ "--config.file=/etc/prometheus/prometheus.yml", \ - "--storage.tsdb.path=/prometheus" ] + "--storage.tsdb.path=/prometheus", \ + "--web.console.libraries=/usr/share/prometheus/console_libraries", \ + "--web.console.templates=/usr/share/prometheus/consoles" ] diff --git a/MAINTAINERS.md b/MAINTAINERS.md deleted file mode 100644 index 8d10a8fbca..0000000000 --- a/MAINTAINERS.md +++ /dev/null @@ -1,42 +0,0 @@ -# Maintainers - -General maintainers: -* Bryan Boreham (bjboreham@gmail.com / @bboreham) -* Ayoub Mrini (ayoubmrini424@gmail.com / @machine424) -* Julien Pivotto (roidelapluie@prometheus.io / @roidelapluie) - -Maintainers for specific parts of the codebase: -* `cmd` - * `promtool`: David Leadbeater ( / @dgl) -* `discovery` - * `azure`: Jan-Otto Kröpke ( / @jkroepke) - * `k8s`: Frederic Branczyk ( / @brancz) - * `stackit`: Jan-Otto Kröpke ( / @jkroepke) -* `documentation` - * `prometheus-mixin`: Matthias Loibl ( / @metalmatze) -* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), -George Krajcsovits ( / @krajorama) -* `storage` - * `remote`: Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( / @npazosmendez), Alex Greenbank ( / @alexgreenbank) - * `otlptranslator`: Arthur Silva Sens ( / @ArthurSens), Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez) -* `tsdb`: Ganesh Vernekar ( / @codesome), Bartłomiej Płotka ( / @bwplotka), Jesús Vázquez ( / @jesusvazquez) -* `web` - * `ui`: Julius Volz ( / @juliusv) - * `module`: Augustin Husson ( @nexucis) -* `Makefile` and related build configuration: Simon Pasquier ( / @simonpasquier), Ben Kochie ( / @SuperQ) - -For the sake of brevity, not all subtrees are explicitly listed. Due to the -size of this repository, the natural changes in focus of maintainers over time, -and nuances of where particular features live, this list will always be -incomplete and out of date. However the listed maintainer(s) should be able to -direct a PR/question to the right person. - -v3 release coordinators: -* Alex Greenbank ( / @alexgreenbank) -* Carrie Edwards ( / @carrieedwards) -* Fiona Liao ( / @fionaliao) -* Jan Fajerski ( / @jan--f) -* Jesús Vázquez ( / @jesusvazquez) -* Nico Pazos ( / @npazosmendez) -* Owen Williams ( / @ywwg) -* Tom Braack ( / @sh0rez) diff --git a/README.md b/README.md index 26262734c0..ec31405941 100644 --- a/README.md +++ b/README.md @@ -1,205 +1,10 @@ -

- Prometheus
Prometheus -

+## Google Cloud Managed Service for Prometheus (GMP) Fork -

Visit prometheus.io for the full documentation, -examples and guides.

+> NOTICE: This repository is a fork of [github.com/prometheus/prometheus](https://github.com/prometheus/prometheus) that includes support for GMP. +> +> We actively work on ensuring vanilla Prometheus can work with GMP; this fork will +> be significantly reduced. Notably, the custom GCM export will be removed. -
+For GMP specific documentation and to get started, go to [g.co/cloud/managedprometheus](https://g.co/cloud/managedprometheus). -[![CI](https://github.com/prometheus/prometheus/actions/workflows/ci.yml/badge.svg)](https://github.com/prometheus/prometheus/actions/workflows/ci.yml) -[![Docker Repository on Quay](https://quay.io/repository/prometheus/prometheus/status)][quay] -[![Docker Pulls](https://img.shields.io/docker/pulls/prom/prometheus.svg?maxAge=604800)][hub] -[![Go Report Card](https://goreportcard.com/badge/github.com/prometheus/prometheus)](https://goreportcard.com/report/github.com/prometheus/prometheus) -[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/486/badge)](https://bestpractices.coreinfrastructure.org/projects/486) -[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus/badge)](https://securityscorecards.dev/viewer/?uri=github.com/prometheus/prometheus) -[![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/prometheus/badge)](https://clomonitor.io/projects/cncf/prometheus) -[![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/prometheus/prometheus) -[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/prometheus.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:prometheus) - -
- -Prometheus, a [Cloud Native Computing Foundation](https://cncf.io/) project, is a systems and service monitoring system. It collects metrics -from configured targets at given intervals, evaluates rule expressions, -displays the results, and can trigger alerts when specified conditions are observed. - -The features that distinguish Prometheus from other metrics and monitoring systems are: - -* A **multi-dimensional** data model (time series defined by metric name and set of key/value dimensions) -* PromQL, a **powerful and flexible query language** to leverage this dimensionality -* No dependency on distributed storage; **single server nodes are autonomous** -* An HTTP **pull model** for time series collection -* **Pushing time series** is supported via an intermediary gateway for batch jobs -* Targets are discovered via **service discovery** or **static configuration** -* Multiple modes of **graphing and dashboarding support** -* Support for hierarchical and horizontal **federation** - -## Architecture overview - -![Architecture overview](documentation/images/architecture.svg) - -## Install - -There are various ways of installing Prometheus. - -### Precompiled binaries - -Precompiled binaries for released versions are available in the -[*download* section](https://prometheus.io/download/) -on [prometheus.io](https://prometheus.io). Using the latest production release binary -is the recommended way of installing Prometheus. -See the [Installing](https://prometheus.io/docs/introduction/install/) -chapter in the documentation for all the details. - -### Docker images - -Docker images are available on [Quay.io](https://quay.io/repository/prometheus/prometheus) or [Docker Hub](https://hub.docker.com/r/prom/prometheus/). - -You can launch a Prometheus container for trying it out with - -```bash -docker run --name prometheus -d -p 127.0.0.1:9090:9090 prom/prometheus -``` - -Prometheus will now be reachable at . - -### Building from source - -To build Prometheus from source code, You need: - -* Go [version 1.22 or greater](https://golang.org/doc/install). -* NodeJS [version 22 or greater](https://nodejs.org/). -* npm [version 8 or greater](https://www.npmjs.com/). - -Start by cloning the repository: - -```bash -git clone https://github.com/prometheus/prometheus.git -cd prometheus -``` - -You can use the `go` tool to build and install the `prometheus` -and `promtool` binaries into your `GOPATH`: - -```bash -GO111MODULE=on go install github.com/prometheus/prometheus/cmd/... -prometheus --config.file=your_config.yml -``` - -*However*, when using `go install` to build Prometheus, Prometheus will expect to be able to -read its web assets from local filesystem directories under `web/ui/static` and -`web/ui/templates`. In order for these assets to be found, you will have to run Prometheus -from the root of the cloned repository. Note also that these directories do not include the -React UI unless it has been built explicitly using `make assets` or `make build`. - -An example of the above configuration file can be found [here.](https://github.com/prometheus/prometheus/blob/main/documentation/examples/prometheus.yml) - -You can also build using `make build`, which will compile in the web assets so that -Prometheus can be run from anywhere: - -```bash -make build -./prometheus --config.file=your_config.yml -``` - -The Makefile provides several targets: - -* *build*: build the `prometheus` and `promtool` binaries (includes building and compiling in web assets) -* *test*: run the tests -* *test-short*: run the short tests -* *format*: format the source code -* *vet*: check the source code for common errors -* *assets*: build the React UI - -### Service discovery plugins - -Prometheus is bundled with many service discovery plugins. -When building Prometheus from source, you can edit the [plugins.yml](./plugins.yml) -file to disable some service discoveries. The file is a yaml-formatted list of go -import path that will be built into the Prometheus binary. - -After you have changed the file, you -need to run `make build` again. - -If you are using another method to compile Prometheus, `make plugins` will -generate the plugins file accordingly. - -If you add out-of-tree plugins, which we do not endorse at the moment, -additional steps might be needed to adjust the `go.mod` and `go.sum` files. As -always, be extra careful when loading third party code. - -### Building the Docker image - -You can build a docker image locally with the following commands: - -```bash -make promu -promu crossbuild -p linux/amd64 -make npm_licenses -make common-docker-amd64 -``` - -The `make docker` target is intended only for use in our CI system and will not -produce a fully working image when run locally. - -## Using Prometheus as a Go Library - -### Remote Write - -We are publishing our Remote Write protobuf independently at -[buf.build](https://buf.build/prometheus/prometheus/assets). - -You can use that as a library: - -```shell -go get buf.build/gen/go/prometheus/prometheus/protocolbuffers/go@latest -``` - -This is experimental. - -### Prometheus code base - -In order to comply with [go mod](https://go.dev/ref/mod#versions) rules, -Prometheus release number do not exactly match Go module releases. - -For the -Prometheus v3.y.z releases, we are publishing equivalent v0.3y.z tags. The y in v0.3y.z is always padded to two digits, with a leading zero if needed. - -Therefore, a user that would want to use Prometheus v3.0.0 as a library could do: - -```shell -go get github.com/prometheus/prometheus@v0.300.0 -``` - -For the -Prometheus v2.y.z releases, we published the equivalent v0.y.z tags. - -Therefore, a user that would want to use Prometheus v2.35.0 as a library could do: - -```shell -go get github.com/prometheus/prometheus@v0.35.0 -``` - -This solution makes it clear that we might break our internal Go APIs between -minor user-facing releases, as [breaking changes are allowed in major version -zero](https://semver.org/#spec-item-4). - -## React UI Development - -For more information on building, running, and developing on the React-based UI, see the React app's [README.md](web/ui/README.md). - -## More information - -* Godoc documentation is available via [pkg.go.dev](https://pkg.go.dev/github.com/prometheus/prometheus). Due to peculiarities of Go Modules, v3.y.z will be displayed as v0.3y.z (the y in v0.3y.z is always padded to two digits, with a leading zero if needed), while v2.y.z will be displayed as v0.y.z. -* See the [Community page](https://prometheus.io/community) for how to reach the Prometheus developers and users on various communication channels. - -## Contributing - -Refer to [CONTRIBUTING.md](https://github.com/prometheus/prometheus/blob/main/CONTRIBUTING.md) - -## License - -Apache License 2.0, see [LICENSE](https://github.com/prometheus/prometheus/blob/main/LICENSE). - -[hub]: https://hub.docker.com/r/prom/prometheus/ -[quay]: https://quay.io/repository/prometheus/prometheus +Otherwise, refer to https://github.com/prometheus/prometheus documentation. diff --git a/RELEASE.md b/RELEASE.md deleted file mode 100644 index a7032bd95e..0000000000 --- a/RELEASE.md +++ /dev/null @@ -1,179 +0,0 @@ -# Releases - -This page describes the release process and the currently planned schedule for upcoming releases as well as the respective release shepherd. Release shepherds are chosen on a voluntary basis. - -## Release schedule - -Release cadence of first pre-releases being cut is 6 weeks. -Please see [the v2.55 RELEASE.md](https://github.com/prometheus/prometheus/blob/release-2.55/RELEASE.md) for the v2 release series schedule. - -| release series | date of first pre-release (year-month-day) | release shepherd | -|----------------|--------------------------------------------|------------------------------------| -| v3.0 | 2024-11-14 | Jan Fajerski (GitHub: @jan--f) | -| v3.1 | 2024-12-17 | Bryan Boreham (GitHub: @bboreham) | -| v3.2 | 2025-01-28 | Jan Fajerski (GitHub: @jan--f) | -| v3.3 | 2025-03-11 | Ayoub Mrini (Github: @machine424) | -| v3.4 | 2025-04-29 | Jan-Otto Kröpke (Github: @jkroepke)| -| v3.5 LTS | 2025-06-03 | Bryan Boreham (GitHub: @bboreham) | -| v3.6 | 2025-07-15 | **volunteer welcome** | - -If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice. - -## Release shepherd responsibilities - -The release shepherd is responsible for the entire release series of a minor release, meaning all pre- and patch releases of a minor release. The process formally starts with the initial pre-release, but some preparations should be done a few days in advance. - -* We aim to keep the main branch in a working state at all times. In principle, it should be possible to cut a release from main at any time. In practice, things might not work out as nicely. A few days before the pre-release is scheduled, the shepherd should check the state of main. Following their best judgement, the shepherd should try to expedite bug fixes that are still in progress but should make it into the release. On the other hand, the shepherd may hold back merging last-minute invasive and risky changes that are better suited for the next minor release. -* On the date listed in the table above, the release shepherd cuts the first pre-release (using the suffix `-rc.0`) and creates a new branch called `release-.` starting at the commit tagged for the pre-release. In general, a pre-release is considered a release candidate (that's what `rc` stands for) and should therefore not contain any known bugs that are planned to be fixed in the final release. -* With the pre-release, the release shepherd is responsible for running and monitoring a benchmark run of the pre-release for 3 days, after which, if successful, the pre-release is promoted to a stable release. -* If regressions or critical bugs are detected, they need to get fixed before cutting a new pre-release (called `-rc.1`, `-rc.2`, etc.). - -See the next section for details on cutting an individual release. - -## How to cut an individual release - -These instructions are currently valid for the Prometheus server, i.e. the [prometheus/prometheus repository](https://github.com/prometheus/prometheus). Applicability to other Prometheus repositories depends on the current state of each repository. We aspire to unify the release procedures as much as possible. - -### Branch management and versioning strategy - -We use [Semantic Versioning](https://semver.org/). - -We maintain a separate branch for each minor release, named `release-.`, e.g. `release-1.1`, `release-2.0`. - -Note that branch protection kicks in automatically for any branches whose name starts with `release-`. Never use names starting with `release-` for branches that are not release branches. - -The usual flow is to merge new features and changes into the main branch and to merge bug fixes into the latest release branch. Bug fixes are then merged into main from the latest release branch. The main branch should always contain all commits from the latest release branch. As long as main hasn't deviated from the release branch, new commits can also go to main, followed by merging main back into the release branch. - -If a bug fix got accidentally merged into main after non-bug-fix changes in main, the bug-fix commits have to be cherry-picked into the release branch, which then have to be merged back into main. Try to avoid that situation. - -Maintaining the release branches for older minor releases happens on a best effort basis. - -### 0. Updating dependencies and promoting/demoting experimental features - -A few days before a major or minor release, consider updating the dependencies. - -Note that we use [Dependabot](.github/dependabot.yml) to continuously update most things automatically. Therefore, most dependencies should be up to date. -Check the [dependencies GitHub label](https://github.com/prometheus/prometheus/labels/dependencies) to see if there are any pending updates. - -This bot currently does not manage `+incompatible` and `v0.0.0` in the version specifier for Go modules. - -Note that after a dependency update, you should look out for any weirdness that -might have happened. Such weirdnesses include but are not limited to: flaky -tests, differences in resource usage, panic. - -In case of doubt or issues that can't be solved in a reasonable amount of time, -you can skip the dependency update or only update select dependencies. In such a -case, you have to create an issue or pull request in the GitHub project for -later follow-up. - -This is also a good time to consider any experimental features and feature -flags for promotion to stable or for deprecation or ultimately removal. Do any -of these in pull requests, one per feature. - -> NOTE: As a validation step check if all security alerts are closed after this step: https://github.com/prometheus/prometheus/security/dependabot. Sometimes it's ok -> if not critical and e.g. fix is not released yet (or it does not relate to -> upgrading) or when we are unaffected. - -#### Manually updating Go dependencies - -This is usually only needed for `+incompatible` and `v0.0.0` non-semver updates. - -```bash -make update-all-go-deps -``` - -#### Manually updating React dependencies - -The React application recently moved to a monorepo system with multiple internal npm packages. Dependency upgrades are -quite sensitive for the time being. - -In case you want to update the UI dependencies, you can run the following command: - -```bash -make update-npm-deps -``` - -Once this step completes, please verify that no additional `node_modules` directory was created in any of the module subdirectories -(which could indicate conflicting dependency versions across modules). Then run `make ui-build` to verify that the build is still working. - -Note: Once in a while, the npm dependencies should also be updated to their latest release versions (major or minor) with `make upgrade-npm-deps`, -though this may be done at convenient times (e.g. by the UI maintainers) that are out-of-sync with Prometheus releases. - -### 1. Prepare your release - -At the start of a new major or minor release cycle create the corresponding release branch based on the main branch. For example if we're releasing `2.17.0` and the previous stable release is `2.16.0` we need to create a `release-2.17` branch. Note that all releases are handled in protected release branches, see the above `Branch management and versioning` section. Release candidates and patch releases for any given major or minor release happen in the same `release-.` branch. Do not create `release-` for patch or release candidate releases. - -Changes for a patch release or release candidate should be merged into the previously mentioned release branch via pull request. - -Bump the version in the `VERSION` file and update `CHANGELOG.md`. Do this in a proper PR pointing to the release branch as this gives others the opportunity to chime in on the release in general and on the addition to the changelog in particular. For a release candidate, append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.). - -When updating the `CHANGELOG.md` look at all PRs included in the release since the last release and verify if they need a changelog entry. - -Note that `CHANGELOG.md` should only document changes relevant to users of Prometheus, including external API changes, performance improvements, and new features. Do not document changes of internal interfaces, code refactorings and clean-ups, changes to the build process, etc. People interested in these are asked to refer to the git history. - -For release candidates still update `CHANGELOG.md`, but when you cut the final release later, merge all the changes from the pre-releases into the one final update. - -Entries in the `CHANGELOG.md` are meant to be in this order: - -* `[SECURITY]` - A bugfix that specifically fixes a security issue. -* `[CHANGE]` -* `[FEATURE]` -* `[ENHANCEMENT]` -* `[BUGFIX]` - -Then bump the UI module version: - -```bash -make ui-bump-version -``` - -### 2. Draft the new release - -Tag the new release via the following commands: - -```bash -tag="v$(< VERSION)" -git tag -s "${tag}" -m "${tag}" -git push origin "${tag}" -``` - -Alternatively, you can use this handy `.gitconfig` alias. - -```ini -[alias] - tag-release = "!f() { tag=v${1:-$(cat VERSION)} ; git tag -s ${tag} -m ${tag} && git push origin ${tag}; }; f" -``` - -Then release with `git tag-release`. - -Signing a tag with a GPG key is appreciated, but in case you can't add a GPG key to your Github account using the following [procedure](https://help.github.com/articles/generating-a-gpg-key/), you can replace the `-s` flag by `-a` flag of the `git tag` command to only annotate the tag without signing. - -Once a tag is created, the release process through Github Actions will be triggered for this tag and Github Actions will draft the GitHub release using the `prombot` account. - -Finally, wait for the build step for the tag to finish. The point here is to wait for tarballs to be uploaded to the Github release and the container images to be pushed to the Docker Hub and Quay.io. Once that has happened, click _Publish release_, which will make the release publicly visible and create a GitHub notification. -**Note:** for a release candidate version ensure the _This is a pre-release_ box is checked when drafting the release in the Github UI. The CI job should take care of this but it's a good idea to double check before clicking _Publish release_.` - -### 3. Tag the library release - -Go modules versioning requires strict use of semver. Because we do not commit to -avoid code-level breaking changes for the libraries between minor releases of -the Prometheus server, we use major version zero releases for the libraries. - -Tagging the new library release works similar to the normal release tagging, -but without the subsequent build and publish steps. Use the following commands: - -```bash -tag="v$(./scripts/get_module_version.sh)" -git tag -s "${tag}" -m "${tag}" -git push origin "${tag}" -``` - -### 4. Wrapping up - -For release candidate versions (`v2.16.0-rc.0`), run the benchmark for 3 days using the `/prombench vX.Y.Z` command, `vX.Y.Z` being the latest stable patch release's tag of the previous minor release series, such as `v2.15.2`. - -If the release has happened in the latest release branch, merge the changes into main. - -Once the binaries have been uploaded, announce the release on `prometheus-announce@googlegroups.com`. (Please do not use `prometheus-users@googlegroups.com` for announcements anymore.) Check out previous announcement mails for inspiration. - -Finally, in case there is no release shepherd listed for the next release yet, find a volunteer. diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index ed7aa52c8a..0ccca82c8d 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -43,14 +43,19 @@ import ( "github.com/grafana/regexp" "github.com/mwitkow/go-conntrack" "github.com/oklog/run" + "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" + common_config "github.com/prometheus/common/config" // ? "github.com/prometheus/common/model" "github.com/prometheus/common/promslog" promslogflag "github.com/prometheus/common/promslog/flag" "github.com/prometheus/common/version" toolkit_web "github.com/prometheus/exporter-toolkit/web" + "github.com/prometheus/prometheus/google/export" + gcm_export "github.com/prometheus/prometheus/google/export/setup" + "github.com/prometheus/prometheus/google/secrets" "go.uber.org/atomic" "go.uber.org/automaxprocs/maxprocs" "k8s.io/klog" @@ -157,21 +162,21 @@ func init() { // serverOnlyFlag creates server-only kingpin flag. func serverOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagClause { return app.Flag(name, fmt.Sprintf("%s Use with server mode only.", help)). - PreAction(func(_ *kingpin.ParseContext) error { - // This will be invoked only if flag is actually provided by user. - serverOnlyFlags = append(serverOnlyFlags, "--"+name) - return nil - }) + PreAction(func(_ *kingpin.ParseContext) error { + // This will be invoked only if flag is actually provided by user. + serverOnlyFlags = append(serverOnlyFlags, "--"+name) + return nil + }) } // agentOnlyFlag creates agent-only kingpin flag. func agentOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagClause { return app.Flag(name, fmt.Sprintf("%s Use with agent mode only.", help)). - PreAction(func(_ *kingpin.ParseContext) error { - // This will be invoked only if flag is actually provided by user. - agentOnlyFlags = append(agentOnlyFlags, "--"+name) - return nil - }) + PreAction(func(_ *kingpin.ParseContext) error { + // This will be invoked only if flag is actually provided by user. + agentOnlyFlags = append(agentOnlyFlags, "--"+name) + return nil + }) } type flagConfig struct { @@ -208,6 +213,7 @@ type flagConfig struct { // for ease of use. enablePerStepStats bool enableConcurrentRuleEval bool + enableKubeSecretProvider bool prometheusURL string corsRegexString string @@ -275,6 +281,9 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error { case "promql-delayed-name-removal": c.promqlEnableDelayedNameRemoval = true logger.Info("Experimental PromQL delayed name removal enabled.") + case "google-kubernetes-secret-provider": + c.enableKubeSecretProvider = true + logger.Info("Experimental (Google) Kubernetes secret provider enabled.") case "": continue case "old-ui": @@ -566,6 +575,10 @@ func main() { a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode) promslogflag.AddFlags(a, &cfg.promslogConfig) + // Set defaults to empty to ensure this command is deterministic. + a.GetFlag("export.label.project-id").Default("") + a.GetFlag("export.label.cluster").Default("") + a.GetFlag("export.label.location").Default("") a.Flag("write-documentation", "Generate command line documentation. Internal use.").Hidden().Action(func(_ *kingpin.ParseContext) error { if err := documentcli.GenerateMarkdown(a.Model(), os.Stdout); err != nil { @@ -576,7 +589,25 @@ func main() { return nil }).Bool() - _, err := a.Parse(os.Args[1:]) + // GMP fork flags. + var deleteDataOnStart bool + a.Flag("gmp.storage.delete-data-on-start", "[GMP fork experimental flag] If true, all the storage related data (e.g. blocks, lock file, WAL, head chunks) in the --storage.tsdb.path or --storage.agent.path (depending on the mode) will be deleted, right before opening the DB. As a result, all previously collected samples will be uncoverably dropped. Use it in setups where the availability is more important than the persistence between restarts, as replaying data can take time and resources. This flag is especially useful on Kubernetes with ephemeral storage (for consistency between pod vs container restart), remote write use cases that prioritize live data and when you want to auto-recover from the OOM crashloops without changing memory limits for Prometheus (see https://github.com/prometheus/prometheus/issues/13939)."). + Default("false").BoolVar(&deleteDataOnStart) + + opts := gcm_export.Opts{ + ExporterOpts: export.ExporterOpts{ + UserAgentProduct: fmt.Sprintf("prometheus/%s", version.Version), + }, + } + opts.SetupFlags(a) + + extraArgs, err := gcm_export.ExtraArgs() + if err != nil { + fmt.Fprintln(os.Stderr, fmt.Errorf("Error parsing commandline arguments: %w", err)) + a.Usage(os.Args[1:]) + os.Exit(2) + } + _, err = a.Parse(append(os.Args[1:], extraArgs...)) if err != nil { fmt.Fprintf(os.Stderr, "Error parsing command line arguments: %s\n", err) a.Usage(os.Args[1:]) @@ -616,6 +647,16 @@ func main() { localStoragePath = cfg.agentStoragePath } + // NOTE(bwplotka): This opt-in functionality exists in our fork, relevant + // discussion in the upstream is here: https://github.com/prometheus/prometheus/issues/13939 + if deleteDataOnStart { + logger.Info("The --gmp.storage.delete-data-on-start flag was set, deleting relevant storage files in the storage path", "path", localStoragePath) + if err := deleteStorageData(agentMode, localStoragePath); err != nil { + fmt.Fprintln(os.Stderr, fmt.Errorf("failed to delete storage data as requested: %w", err)) + os.Exit(1) + } + } + cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddresses[0]) if err != nil { fmt.Fprintln(os.Stderr, fmt.Errorf("parse external URL %q: %w", cfg.prometheusURL, err)) @@ -788,6 +829,7 @@ func main() { var ( ctxWeb, cancelWeb = context.WithCancel(context.Background()) ctxRule = context.Background() + ctxSecrets = context.Background() notifierManager = notifier.NewManager(&cfg.notifier, logger.With("component", "notifier")) @@ -838,6 +880,21 @@ func main() { os.Exit(1) } + var secretManager *secrets.Manager + if cfg.enableKubeSecretProvider { + manager := secrets.NewManager( + ctxSecrets, + prometheus.DefaultRegisterer, + secrets.ProviderOptions{ + Logger: log.With(logger, "component", "secret manager"), + }, + ) + secretManager = &manager + defer secretManager.Close(prometheus.DefaultRegisterer) + + cfg.scrape.HTTPClientOptions = append(cfg.scrape.HTTPClientOptions, common_config.WithSecretManager(secretManager)) + } + var ( tracingManager = tracing.NewManager(logger) @@ -982,6 +1039,20 @@ func main() { } return discoveryManagerScrape.ApplyConfig(c) }, + }, { + name: "secret", + reloader: func(cfg *config.Config) error { + if secretManager == nil { + if len(cfg.SecretConfigs) > 0 { + return errors.New("secret providers are disabled") + } + return nil + } + kConfig := secrets.WatchSPConfig{ + ClientConfig: cfg.ClientConfig, + } + return secretManager.ApplyConfig(&kConfig, cfg.SecretConfigs) + }, }, { name: "notify", reloader: notifierManager.ApplyConfig, @@ -1023,6 +1094,12 @@ func main() { }, { name: "tracing", reloader: tracingManager.ApplyConfig, + }, { + name: "gcm_export", + reloader: func(cfg *config.Config) error { + // Call in closure to not call Global() before it's initialized below. + return gcm_export.Global().ApplyConfig(cfg) + }, }, } @@ -1088,6 +1165,29 @@ func main() { }, ) } + { + exporterLogger := log.With(logger, "component", "gcm_exporter") + ctx, cancel := context.WithCancel(context.Background()) + exporter, err := opts.NewExporter(ctx, exporterLogger, prometheus.DefaultRegisterer) + if err != nil { + logger.Error("Unable to init Google Cloud Monitoring exporter", "err", err) + os.Exit(2) + } + + if err := gcm_export.SetGlobal(exporter); err != nil { + logger.Error("Unable to set Google Cloud Monitoring exporter", "err", err) + os.Exit(2) + } + + g.Add( + func() error { + return gcm_export.Global().Run() + }, + func(err error) { + cancel() + }, + ) + } { // Scrape discovery manager. g.Add( @@ -1553,7 +1653,7 @@ func updateGoGC(conf *config.Config, logger *slog.Logger) { func startsOrEndsWithQuote(s string) bool { return strings.HasPrefix(s, "\"") || strings.HasPrefix(s, "'") || - strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'") + strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'") } // compileCORSRegexString compiles given string and adds anchors. @@ -1948,3 +2048,35 @@ func (p *rwProtoMsgFlagParser) Set(opt string) error { *p.msgs = append(*p.msgs, t) return nil } + +func deleteStorageData(agentMode bool, dataPath string) error { + if agentMode { + for _, f := range []string{"wal", "lock"} { + if err := os.RemoveAll(filepath.Join(dataPath, f)); err != nil { + return err + } + } + return nil + } + + files, err := os.ReadDir(dataPath) + if err != nil { + return fmt.Errorf("can't read dir %v: %w", dataPath, err) + } + for _, f := range files { + switch f.Name() { + case "wal", "lock", "chunks_head": + if err := os.RemoveAll(filepath.Join(dataPath, f.Name())); err != nil { + return err + } + continue + } + if _, err := ulid.Parse(f.Name()); err == nil { + // It's a TSDB block, remove. + if err := os.RemoveAll(filepath.Join(dataPath, f.Name())); err != nil { + return err + } + } + } + return nil +} diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index e4262f1b3b..87d2ebf7c0 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -135,7 +135,7 @@ func TestFailedStartupExitCode(t *testing.T) { fakeInputFile := "fake-input-file" expectedExitStatus := 2 - prom := exec.Command(promPath, "-test.main", "--web.listen-address=0.0.0.0:0", "--config.file="+fakeInputFile) + prom := exec.Command(promPath, "-test.main", "--web.listen-address=0.0.0.0:0", "--config.file="+fakeInputFile, "--export.debug.disable-auth") err := prom.Run() require.Error(t, err) @@ -246,7 +246,7 @@ func TestWALSegmentSizeBounds(t *testing.T) { } { t.Run(tc.size, func(t *testing.T) { t.Parallel() - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"), "--export.debug.disable-auth") // Log stderr in case of failure. stderr, err := prom.StderrPipe() @@ -310,7 +310,7 @@ func TestMaxBlockChunkSegmentSizeBounds(t *testing.T) { } { t.Run(tc.size, func(t *testing.T) { t.Parallel() - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"), "--export.debug.disable-auth") // Log stderr in case of failure. stderr, err := prom.StderrPipe() @@ -414,10 +414,75 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames return res } +func TestDeleteStorageDataOnStart(t *testing.T) { + for _, agentMode := range []bool{false, true} { + t.Run(fmt.Sprintf("%v", agentMode), func(t *testing.T) { + t.Run("empty", func(t *testing.T) { + dir := t.TempDir() + + require.NoError(t, deleteStorageData(agentMode, dir)) + requireEmptyDir(t, dir) + }) + t.Run("partial data", func(t *testing.T) { + dir := t.TempDir() + + if !agentMode { + require.NoError(t, os.Mkdir(filepath.Join(dir, "chunks_head"), os.ModePerm)) + } + require.NoError(t, os.Mkdir(filepath.Join(dir, "wal"), os.ModePerm)) + require.NoError(t, os.Mkdir(filepath.Join(dir, "wal", "checkpoint.00000003"), os.ModePerm)) + + require.NoError(t, deleteStorageData(agentMode, dir)) + requireEmptyDir(t, dir) + }) + t.Run("full data", func(t *testing.T) { + dir := t.TempDir() + + if !agentMode { + require.NoError(t, os.Mkdir(filepath.Join(dir, "chunks_head"), os.ModePerm)) + require.NoError(t, os.Mkdir(filepath.Join(dir, "01HTHFTV0ZK2KQ85DXQK9TGA7Z"), os.ModePerm)) + + } + require.NoError(t, os.Mkdir(filepath.Join(dir, "wal"), os.ModePerm)) + require.NoError(t, os.Mkdir(filepath.Join(dir, "wal", "checkpoint.00000003"), os.ModePerm)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "lock"), []byte{1}, os.ModePerm)) + + require.NoError(t, deleteStorageData(agentMode, dir)) + requireEmptyDir(t, dir) + }) + }) + } +} + +func requireEmptyDir(t *testing.T, dir string) { + t.Helper() + files, err := os.ReadDir(dir) + require.NoError(t, err) + require.Empty(t, files, "%v contains unexpected files", dir) +} + +func TestAgentDeleteDataOnStart(t *testing.T) { + prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig, "--export.debug.disable-auth") + require.NoError(t, prom.Start()) + + actualExitStatus := 0 + done := make(chan error, 1) + + go func() { done <- prom.Wait() }() + select { + case err := <-done: + t.Logf("prometheus agent should be still running: %v", err) + actualExitStatus = prom.ProcessState.ExitCode() + case <-time.After(startupTime): + prom.Process.Kill() + } + require.Equal(t, 0, actualExitStatus) +} + func TestAgentSuccessfulStartup(t *testing.T) { t.Parallel() - prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig, "--export.debug.disable-auth") require.NoError(t, prom.Start()) actualExitStatus := 0 @@ -437,7 +502,7 @@ func TestAgentSuccessfulStartup(t *testing.T) { func TestAgentFailedStartupWithServerFlag(t *testing.T) { t.Parallel() - prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) + prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--export.debug.disable-auth") output := bytes.Buffer{} prom.Stderr = &output @@ -466,7 +531,7 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) { func TestAgentFailedStartupWithInvalidConfig(t *testing.T) { t.Parallel() - prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--export.debug.disable-auth") require.NoError(t, prom.Start()) actualExitStatus := 0 @@ -503,7 +568,7 @@ func TestModeSpecificFlags(t *testing.T) { for _, tc := range testcases { t.Run(fmt.Sprintf("%s mode with option %s", tc.mode, tc.arg), func(t *testing.T) { t.Parallel() - args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"} + args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0", "--export.debug.disable-auth"} if tc.mode == "agent" { args = append(args, "--agent", "--config.file="+agentConfig) @@ -558,6 +623,8 @@ func TestModeSpecificFlags(t *testing.T) { } func TestDocumentation(t *testing.T) { + t.Skip("google: We don't maintain docs in our fork, so nothing to regenerate and test.") + if runtime.GOOS == "windows" { t.SkipNow() } diff --git a/cmd/prometheus/main_unix_test.go b/cmd/prometheus/main_unix_test.go index 94eec27e79..42cd65614a 100644 --- a/cmd/prometheus/main_unix_test.go +++ b/cmd/prometheus/main_unix_test.go @@ -38,7 +38,7 @@ func TestStartupInterrupt(t *testing.T) { port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) - prom := exec.Command(promPath, "-test.main", "--config.file="+promConfig, "--storage.tsdb.path="+t.TempDir(), "--web.listen-address=0.0.0.0"+port) + prom := exec.Command(promPath, "-test.main", "--config.file="+promConfig, "--storage.tsdb.path="+t.TempDir(), "--web.listen-address=0.0.0.0"+port, "--export.debug.disable-auth") err := prom.Start() require.NoError(t, err) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index 7c073b59d0..56da01ab0a 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -87,8 +87,14 @@ func (p *queryLogTest) setQueryLog(t *testing.T, queryLogFile string) { require.NoError(t, err) _, err = p.configFile.Seek(0, 0) require.NoError(t, err) + commonGlobal := ` + # GMP requires settings project_id and location labels. + external_labels: + project_id: example-project + location: us-central-1 +` if queryLogFile != "" { - _, err = fmt.Fprintf(p.configFile, "global:\n query_log_file: %s\n", queryLogFile) + _, err = fmt.Fprintf(p.configFile, "global:\n query_log_file: %s\n%s", queryLogFile, commonGlobal) require.NoError(t, err) } _, err = p.configFile.Write([]byte(p.configuration())) @@ -298,6 +304,7 @@ func (p *queryLogTest) run(t *testing.T) { "--web.enable-lifecycle", fmt.Sprintf("--web.listen-address=%s:%d", p.host, p.port), "--storage.tsdb.path=" + dir, + "--export.debug.disable-auth", }, p.params()...) prom := exec.Command(promPath, params...) diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index f922d18c4e..2fd6f7e9a6 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -296,9 +296,9 @@ func TestCheckConfigSyntax(t *testing.T) { file: "config_with_tls_files.yml", syntaxOnly: false, err: "error checking client cert file \"testdata/nonexistent_cert_file.yml\": " + - "stat testdata/nonexistent_cert_file.yml: no such file or directory", + "stat testdata/nonexistent_cert_file.yml: no such file or directory", errWindows: "error checking client cert file \"testdata\\\\nonexistent_cert_file.yml\": " + - "CreateFile testdata\\nonexistent_cert_file.yml: The system cannot find the file specified.", + "CreateFile testdata\\nonexistent_cert_file.yml: The system cannot find the file specified.", }, { name: "check with syntax only succeeds with nonexistent credentials file", @@ -312,9 +312,9 @@ func TestCheckConfigSyntax(t *testing.T) { file: "authorization_credentials_file.bad.yml", syntaxOnly: false, err: "error checking authorization credentials or bearer token file \"/random/file/which/does/not/exist.yml\": " + - "stat /random/file/which/does/not/exist.yml: no such file or directory", + "stat /random/file/which/does/not/exist.yml: no such file or directory", errWindows: "error checking authorization credentials or bearer token file \"testdata\\\\random\\\\file\\\\which\\\\does\\\\not\\\\exist.yml\": " + - "CreateFile testdata\\random\\file\\which\\does\\not\\exist.yml: The system cannot find the path specified.", + "CreateFile testdata\\random\\file\\which\\does\\not\\exist.yml: The system cannot find the path specified.", }, } for _, test := range cases { @@ -463,6 +463,8 @@ func TestExitCodes(t *testing.T) { } func TestDocumentation(t *testing.T) { + t.Skip("google: We don't maintain docs in our fork, so nothing to regenerate and test.") + if runtime.GOOS == "windows" { t.SkipNow() } diff --git a/config/config.go b/config/config.go index a60cfcb6b9..7e0dd8ae90 100644 --- a/config/config.go +++ b/config/config.go @@ -33,6 +33,8 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/sigv4" "gopkg.in/yaml.v2" + gcm_exportconfig "github.com/prometheus/prometheus/google/export/config" + gcm_secrets "github.com/prometheus/prometheus/google/secrets" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/model/labels" @@ -277,10 +279,16 @@ type Config struct { StorageConfig StorageConfig `yaml:"storage,omitempty"` TracingConfig TracingConfig `yaml:"tracing,omitempty"` + // Secret management: + gcm_secrets.ClientConfig `yaml:"kubernetes_sp_config,omitempty"` + SecretConfigs []gcm_secrets.SecretConfig `yaml:"kubernetes_secrets,omitempty"` + RemoteWriteConfigs []*RemoteWriteConfig `yaml:"remote_write,omitempty"` RemoteReadConfigs []*RemoteReadConfig `yaml:"remote_read,omitempty"` OTLPConfig OTLPConfig `yaml:"otlp,omitempty"` + GoogleCloud gcm_exportconfig.GoogleCloudConfig `yaml:"google_cloud,omitempty"` + loaded bool // Certain methods require configuration to use Load validation. } @@ -645,15 +653,15 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // isZero returns true iff the global config is the zero value. func (c *GlobalConfig) isZero() bool { return c.ExternalLabels.IsEmpty() && - c.ScrapeInterval == 0 && - c.ScrapeTimeout == 0 && - c.EvaluationInterval == 0 && - c.RuleQueryOffset == 0 && - c.QueryLogFile == "" && - c.ScrapeFailureLogFile == "" && - c.ScrapeProtocols == nil && - !c.ConvertClassicHistogramsToNHCB && - !c.AlwaysScrapeClassicHistograms + c.ScrapeInterval == 0 && + c.ScrapeTimeout == 0 && + c.EvaluationInterval == 0 && + c.RuleQueryOffset == 0 && + c.QueryLogFile == "" && + c.ScrapeFailureLogFile == "" && + c.ScrapeProtocols == nil && + !c.ConvertClassicHistogramsToNHCB && + !c.AlwaysScrapeClassicHistograms } const DefaultGoGCPercentage = 75 @@ -1218,7 +1226,7 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er } httpClientConfigAuthEnabled := c.HTTPClientConfig.BasicAuth != nil || - c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil + c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil if httpClientConfigAuthEnabled && c.SigV4Config != nil { return errors.New("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured") diff --git a/docs/command-line/index.md b/docs/command-line/index.md deleted file mode 100644 index 53786fbb20..0000000000 --- a/docs/command-line/index.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Command Line -sort_rank: 9 ---- diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md deleted file mode 100644 index e90a7574ba..0000000000 --- a/docs/command-line/prometheus.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -title: prometheus ---- - -The Prometheus monitoring server - - -## Flags - -| Flag | Description | Default | -| --- | --- | --- | -| -h, --help | Show context-sensitive help (also try --help-long and --help-man). | | -| --version | Show application version. | | -| --config.file | Prometheus configuration file path. | `prometheus.yml` | -| --config.auto-reload-interval | Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes. | `30s` | -| --web.listen-address ... | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` | -| --auto-gomaxprocs | Automatically set GOMAXPROCS to match Linux container CPU quota | `true` | -| --auto-gomemlimit | Automatically set GOMEMLIMIT to match Linux container or system memory limit | `true` | -| --auto-gomemlimit.ratio | The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` | -| --web.config.file | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | | -| --web.read-timeout | Maximum duration before timing out read of the request, and closing idle connections. | `5m` | -| --web.max-connections | Maximum number of simultaneous connections across all listeners. | `512` | -| --web.max-notifications-subscribers | Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close. | `16` | -| --web.external-url | The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically. | | -| --web.route-prefix | Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url. | | -| --web.user-assets | Path to static asset directory, available at /user. | | -| --web.enable-lifecycle | Enable shutdown and reload via HTTP request. | `false` | -| --web.enable-admin-api | Enable API endpoints for admin control actions. | `false` | -| --web.enable-remote-write-receiver | Enable API endpoint accepting remote write requests. | `false` | -| --web.remote-write-receiver.accepted-protobuf-messages | List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: prometheus.WriteRequest, io.prometheus.write.v2.Request | `prometheus.WriteRequest` | -| --web.enable-otlp-receiver | Enable API endpoint accepting OTLP write requests. | `false` | -| --web.console.templates | Path to the console template directory, available at /consoles. | `consoles` | -| --web.console.libraries | Path to the console library directory. | `console_libraries` | -| --web.page-title | Document title of Prometheus instance. | `Prometheus Time Series Collection and Processing Server` | -| --web.cors.origin | Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1\|domain2)\.com' | `.*` | -| --storage.tsdb.path | Base path for metrics storage. Use with server mode only. | `data/` | -| --storage.tsdb.retention.time | How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | | -| --storage.tsdb.retention.size | Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Use with server mode only. | | -| --storage.tsdb.no-lockfile | Do not create lockfile in data directory. Use with server mode only. | `false` | -| --storage.tsdb.head-chunks-write-queue-size | Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental. Use with server mode only. | `0` | -| --storage.agent.path | Base path for metrics storage. Use with agent mode only. | `data-agent/` | -| --storage.agent.wal-compression | Compress the agent WAL. If false, the --storage.agent.wal-compression-type flag is ignored. Use with agent mode only. | `true` | -| --storage.agent.retention.min-time | Minimum age samples may be before being considered for deletion when the WAL is truncated Use with agent mode only. | | -| --storage.agent.retention.max-time | Maximum age samples may be before being forcibly deleted when the WAL is truncated Use with agent mode only. | | -| --storage.agent.no-lockfile | Do not create lockfile in data directory. Use with agent mode only. | `false` | -| --storage.remote.flush-deadline | How long to wait flushing sample on shutdown or config reload. | `1m` | -| --storage.remote.read-sample-limit | Maximum overall number of samples to return via the remote read interface, in a single query. 0 means no limit. This limit is ignored for streamed response types. Use with server mode only. | `5e7` | -| --storage.remote.read-concurrent-limit | Maximum number of concurrent remote read calls. 0 means no limit. Use with server mode only. | `10` | -| --storage.remote.read-max-bytes-in-frame | Maximum number of bytes in a single frame for streaming remote read response types before marshalling. Note that client might have limit on frame size as well. 1MB as recommended by protobuf by default. Use with server mode only. | `1048576` | -| --rules.alert.for-outage-tolerance | Max time to tolerate prometheus outage for restoring "for" state of alert. Use with server mode only. | `1h` | -| --rules.alert.for-grace-period | Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. Use with server mode only. | `10m` | -| --rules.alert.resend-delay | Minimum amount of time to wait before resending an alert to Alertmanager. Use with server mode only. | `1m` | -| --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. When set, "query.max-concurrency" may need to be adjusted accordingly. Use with server mode only. | `4` | -| --alertmanager.notification-queue-capacity | The capacity of the queue for pending Alertmanager notifications. Use with server mode only. | `10000` | -| --alertmanager.notification-batch-size | The maximum number of notifications per batch to send to the Alertmanager. Use with server mode only. | `256` | -| --alertmanager.drain-notification-queue-on-shutdown | Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down. Use with server mode only. | `true` | -| --query.lookback-delta | The maximum lookback duration for retrieving metrics during expression evaluations and federation. Use with server mode only. | `5m` | -| --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | -| --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | -| --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | -| --enable-feature ... | Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | -| --agent | Run Prometheus in 'Agent mode'. | | -| --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | -| --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | - - diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md deleted file mode 100644 index 92e0ac0030..0000000000 --- a/docs/command-line/promtool.md +++ /dev/null @@ -1,760 +0,0 @@ ---- -title: promtool ---- - -Tooling for the Prometheus monitoring system. - - -## Flags - -| Flag | Description | -| --- | --- | -| -h, --help | Show context-sensitive help (also try --help-long and --help-man). | -| --version | Show application version. | -| --experimental | Enable experimental commands. | -| --enable-feature ... | Comma separated feature names to enable. Valid options: promql-experimental-functions, promql-delayed-name-removal. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details | - - - - -## Commands - -| Command | Description | -| --- | --- | -| help | Show help. | -| check | Check the resources for validity. | -| query | Run query against a Prometheus server. | -| debug | Fetch debug information. | -| push | Push to a Prometheus server. | -| test | Unit testing. | -| tsdb | Run tsdb commands. | -| promql | PromQL formatting and editing. Requires the --experimental flag. | - - - - -### `promtool help` - -Show help. - - - -#### Arguments - -| Argument | Description | -| --- | --- | -| command | Show help on command. | - - - - -### `promtool check` - -Check the resources for validity. - - - -#### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --query.lookback-delta | The server's maximum query lookback duration. | `5m` | -| --extended | Print extended information related to the cardinality of the metrics. | | - - - - -##### `promtool check service-discovery` - -Perform service discovery for the given job name and report the results, including relabeling. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --timeout | The time to wait for discovery results. | `30s` | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| config-file | The prometheus config file. | Yes | -| job | The job to run service discovery for. | Yes | - - - - -##### `promtool check config` - -Check if the config files are valid or not. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --syntax-only | Only check the config file syntax, ignoring file and content validation referenced in the config | | -| --lint | Linting checks to apply to the rules/scrape configs specified in the config. Available options are: all, duplicate-rules, none, too-long-scrape-interval. Use --lint=none to disable linting | `duplicate-rules` | -| --lint-fatal | Make lint errors exit with exit code 3. | `false` | -| --ignore-unknown-fields | Ignore unknown fields in the rule groups read by the config files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default. | `false` | -| --agent | Check config file for Prometheus in Agent mode. | | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| config-files | The config files to check. | Yes | - - - - -##### `promtool check web-config` - -Check if the web config files are valid or not. - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| web-config-files | The config files to check. | Yes | - - - - -##### `promtool check healthy` - -Check if the Prometheus server is healthy. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --http.config.file | HTTP client configuration file for promtool to connect to Prometheus. | | -| --url | The URL for the Prometheus server. | `http://localhost:9090` | - - - - -##### `promtool check ready` - -Check if the Prometheus server is ready. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --http.config.file | HTTP client configuration file for promtool to connect to Prometheus. | | -| --url | The URL for the Prometheus server. | `http://localhost:9090` | - - - - -##### `promtool check rules` - -Check if the rule files are valid or not. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --lint | Linting checks to apply. Available options are: all, duplicate-rules, none. Use --lint=none to disable linting | `duplicate-rules` | -| --lint-fatal | Make lint errors exit with exit code 3. | `false` | -| --ignore-unknown-fields | Ignore unknown fields in the rule files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default. | `false` | - - - - -###### Arguments - -| Argument | Description | -| --- | --- | -| rule-files | The rule files to check, default is read from standard input. | - - - - -##### `promtool check metrics` - -Pass Prometheus metrics over stdin to lint them for consistency and correctness. - -examples: - -$ cat metrics.prom | promtool check metrics - -$ curl -s http://localhost:9090/metrics | promtool check metrics - - - -### `promtool query` - -Run query against a Prometheus server. - - - -#### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| -o, --format | Output format of the query. | `promql` | -| --http.config.file | HTTP client configuration file for promtool to connect to Prometheus. | | - - - - -##### `promtool query instant` - -Run instant query. - - - -###### Flags - -| Flag | Description | -| --- | --- | -| --time | Query evaluation time (RFC3339 or Unix timestamp). | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| server | Prometheus server to query. | Yes | -| expr | PromQL query expression. | Yes | - - - - -##### `promtool query range` - -Run range query. - - - -###### Flags - -| Flag | Description | -| --- | --- | -| --header | Extra headers to send to server. | -| --start | Query range start time (RFC3339 or Unix timestamp). | -| --end | Query range end time (RFC3339 or Unix timestamp). | -| --step | Query step size (duration). | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| server | Prometheus server to query. | Yes | -| expr | PromQL query expression. | Yes | - - - - -##### `promtool query series` - -Run series query. - - - -###### Flags - -| Flag | Description | -| --- | --- | -| --match ... | Series selector. Can be specified multiple times. | -| --start | Start time (RFC3339 or Unix timestamp). | -| --end | End time (RFC3339 or Unix timestamp). | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| server | Prometheus server to query. | Yes | - - - - -##### `promtool query labels` - -Run labels query. - - - -###### Flags - -| Flag | Description | -| --- | --- | -| --start | Start time (RFC3339 or Unix timestamp). | -| --end | End time (RFC3339 or Unix timestamp). | -| --match ... | Series selector. Can be specified multiple times. | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| server | Prometheus server to query. | Yes | -| name | Label name to provide label values for. | Yes | - - - - -##### `promtool query analyze` - -Run queries against your Prometheus to analyze the usage pattern of certain metrics. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --server | Prometheus server to query. | | -| --type | Type of metric: histogram. | | -| --duration | Time frame to analyze. | `1h` | -| --time | Query time (RFC3339 or Unix timestamp), defaults to now. | | -| --match ... | Series selector. Can be specified multiple times. | | - - - - -### `promtool debug` - -Fetch debug information. - - - -##### `promtool debug pprof` - -Fetch profiling debug information. - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| server | Prometheus server to get pprof files from. | Yes | - - - - -##### `promtool debug metrics` - -Fetch metrics debug information. - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| server | Prometheus server to get metrics from. | Yes | - - - - -##### `promtool debug all` - -Fetch all debug information. - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| server | Prometheus server to get all debug information from. | Yes | - - - - -### `promtool push` - -Push to a Prometheus server. - - - -#### Flags - -| Flag | Description | -| --- | --- | -| --http.config.file | HTTP client configuration file for promtool to connect to Prometheus. | - - - - -##### `promtool push metrics` - -Push metrics to a prometheus remote write (for testing purpose only). - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --label | Label to attach to metrics. Can be specified multiple times. | `job=promtool` | -| --timeout | The time to wait for pushing metrics. | `30s` | -| --header | Prometheus remote write header. | | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| remote-write-url | Prometheus remote write url to push metrics. | Yes | -| metric-files | The metric files to push, default is read from standard input. | | - - - - -### `promtool test` - -Unit testing. - - - -#### Flags - -| Flag | Description | -| --- | --- | -| --junit | File path to store JUnit XML test results. | - - - - -##### `promtool test rules` - -Unit tests for rules. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --run ... | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | | -| --debug | Enable unit test debugging. | `false` | -| --diff | [Experimental] Print colored differential output between expected & received output. | `false` | -| --ignore-unknown-fields | Ignore unknown fields in the test files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default. | `false` | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| test-rule-file | The unit test file. | Yes | - - - - -### `promtool tsdb` - -Run tsdb commands. - - - -##### `promtool tsdb bench` - -Run benchmarks. - - - -##### `promtool tsdb bench write` - -Run a write performance benchmark. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --out | Set the output path. | `benchout` | -| --metrics | Number of metrics to read. | `10000` | -| --scrapes | Number of scrapes to simulate. | `3000` | - - - - -###### Arguments - -| Argument | Description | Default | -| --- | --- | --- | -| file | Input file with samples data, default is (../../tsdb/testdata/20kseries.json). | `../../tsdb/testdata/20kseries.json` | - - - - -##### `promtool tsdb analyze` - -Analyze churn, label pair cardinality and compaction efficiency. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --limit | How many items to show in each list. | `20` | -| --extended | Run extended analysis. | | -| --match | Series selector to analyze. Only 1 set of matchers is supported now. | | - - - - -###### Arguments - -| Argument | Description | Default | -| --- | --- | --- | -| db path | Database path (default is data/). | `data/` | -| block id | Block to analyze (default is the last block). | | - - - - -##### `promtool tsdb list` - -List tsdb blocks. - - - -###### Flags - -| Flag | Description | -| --- | --- | -| -r, --human-readable | Print human readable values. | - - - - -###### Arguments - -| Argument | Description | Default | -| --- | --- | --- | -| db path | Database path (default is data/). | `data/` | - - - - -##### `promtool tsdb dump` - -Dump samples from a TSDB. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --sandbox-dir-root | Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end. | | -| --min-time | Minimum timestamp to dump, in milliseconds since the Unix epoch. | `-9223372036854775808` | -| --max-time | Maximum timestamp to dump, in milliseconds since the Unix epoch. | `9223372036854775807` | -| --match ... | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | - - - - -###### Arguments - -| Argument | Description | Default | -| --- | --- | --- | -| db path | Database path (default is data/). | `data/` | - - - - -##### `promtool tsdb dump-openmetrics` - -[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --sandbox-dir-root | Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end. | | -| --min-time | Minimum timestamp to dump, in milliseconds since the Unix epoch. | `-9223372036854775808` | -| --max-time | Maximum timestamp to dump, in milliseconds since the Unix epoch. | `9223372036854775807` | -| --match ... | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | - - - - -###### Arguments - -| Argument | Description | Default | -| --- | --- | --- | -| db path | Database path (default is data/). | `data/` | - - - - -##### `promtool tsdb create-blocks-from` - -[Experimental] Import samples from input and produce TSDB blocks. Please refer to the storage docs for more details. - - - -###### Flags - -| Flag | Description | -| --- | --- | -| -r, --human-readable | Print human readable values. | -| -q, --quiet | Do not print created blocks. | - - - - -##### `promtool tsdb create-blocks-from openmetrics` - -Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details. - - - -###### Flags - -| Flag | Description | -| --- | --- | -| --label | Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value | - - - - -###### Arguments - -| Argument | Description | Default | Required | -| --- | --- | --- | --- | -| input file | OpenMetrics file to read samples from. | | Yes | -| output directory | Output directory for generated blocks. | `data/` | | - - - - -##### `promtool tsdb create-blocks-from rules` - -Create blocks of data for new recording rules. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| --http.config.file | HTTP client configuration file for promtool to connect to Prometheus. | | -| --url | The URL for the Prometheus API with the data where the rule will be backfilled from. | `http://localhost:9090` | -| --start | The time to start backfilling the new rule from. Must be a RFC3339 formatted date or Unix timestamp. Required. | | -| --end | If an end time is provided, all recording rules in the rule files provided will be backfilled to the end time. Default will backfill up to 3 hours ago. Must be a RFC3339 formatted date or Unix timestamp. | | -| --output-dir | Output directory for generated blocks. | `data/` | -| --eval-interval | How frequently to evaluate rules when backfilling if a value is not set in the recording rule files. | `60s` | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| rule-files | A list of one or more files containing recording rules to be backfilled. All recording rules listed in the files will be backfilled. Alerting rules are not evaluated. | Yes | - - - - -### `promtool promql` - -PromQL formatting and editing. Requires the `--experimental` flag. - - - -##### `promtool promql format` - -Format PromQL query to pretty printed form. - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| query | PromQL query. | Yes | - - - - -##### `promtool promql label-matchers` - -Edit label matchers contained within an existing PromQL query. - - - -##### `promtool promql label-matchers set` - -Set a label matcher in the query. - - - -###### Flags - -| Flag | Description | Default | -| --- | --- | --- | -| -t, --type | Type of the label matcher to set. | `=` | - - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| query | PromQL query. | Yes | -| name | Name of the label matcher to set. | Yes | -| value | Value of the label matcher to set. | Yes | - - - - -##### `promtool promql label-matchers delete` - -Delete a label from the query. - - - -###### Arguments - -| Argument | Description | Required | -| --- | --- | --- | -| query | PromQL query. | Yes | -| name | Name of the label to delete. | Yes | - - diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md deleted file mode 100644 index faffad56f2..0000000000 --- a/docs/configuration/alerting_rules.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Alerting rules -sort_rank: 3 ---- - -Alerting rules allow you to define alert conditions based on Prometheus -expression language expressions and to send notifications about firing alerts -to an external service. Whenever the alert expression results in one or more -vector elements at a given point in time, the alert counts as active for these -elements' label sets. - -## Defining alerting rules - -Alerting rules are configured in Prometheus in the same way as [recording -rules](recording_rules.md). - -An example rules file with an alert would be: - -```yaml -groups: -- name: example - labels: - team: myteam - rules: - - alert: HighRequestLatency - expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 - for: 10m - keep_firing_for: 5m - labels: - severity: page - annotations: - summary: High request latency -``` - -The optional `for` clause causes Prometheus to wait for a certain duration -between first encountering a new expression output vector element and counting -an alert as firing for this element. In this case, Prometheus will check that -the alert continues to be active during each evaluation for 10 minutes before -firing the alert. Elements that are active, but not firing yet, are in the pending state. -Alerting rules without the `for` clause will become active on the first evaluation. - -There is also an optional `keep_firing_for` clause that tells Prometheus to keep -this alert firing for the specified duration after the firing condition was last met. -This can be used to prevent situations such as flapping alerts, false resolutions -due to lack of data loss, etc. Alerting rules without the `keep_firing_for` clause -will deactivate on the first evaluation where the condition is not met (assuming -any optional `for` duration described above has been satisfied). - -The `labels` clause allows specifying a set of additional labels to be attached -to the alert. Any existing conflicting labels will be overwritten. The label -values can be templated. - -The `annotations` clause specifies a set of informational labels that can be used to store longer additional information such as alert descriptions or runbook links. The annotation values can be templated. - -### Templating - -Label and annotation values can be templated using [console -templates](https://prometheus.io/docs/visualization/consoles). The `$labels` -variable holds the label key/value pairs of an alert instance. The configured -external labels can be accessed via the `$externalLabels` variable. The -`$value` variable holds the evaluated value of an alert instance. - - # To insert a firing element's label values: - {{ $labels. }} - # To insert the numeric expression value of the firing element: - {{ $value }} - -Examples: - -```yaml -groups: -- name: example - rules: - - # Alert for any instance that is unreachable for >5 minutes. - - alert: InstanceDown - expr: up == 0 - for: 5m - labels: - severity: page - annotations: - summary: "Instance {{ $labels.instance }} down" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." - - # Alert for any instance that has a median request latency >1s. - - alert: APIHighRequestLatency - expr: api_http_request_latencies_second{quantile="0.5"} > 1 - for: 10m - annotations: - summary: "High request latency on {{ $labels.instance }}" - description: "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)" -``` - -## Inspecting alerts during runtime - -To manually inspect which alerts are active (pending or firing), navigate to -the "Alerts" tab of your Prometheus instance. This will show you the exact -label sets for which each defined alert is currently active. - -For pending and firing alerts, Prometheus also stores synthetic time series of -the form `ALERTS{alertname="", alertstate="", }`. -The sample value is set to `1` as long as the alert is in the indicated active -(pending or firing) state, and the series is marked stale when this is no -longer the case. - -## Sending alert notifications - -Prometheus's alerting rules are good at figuring what is broken *right now*, but -they are not a fully-fledged notification solution. Another layer is needed to -add summarization, notification rate limiting, silencing and alert dependencies -on top of the simple alert definitions. In Prometheus's ecosystem, the -[Alertmanager](https://prometheus.io/docs/alerting/alertmanager/) takes on this -role. Thus, Prometheus may be configured to periodically send information about -alert states to an Alertmanager instance, which then takes care of dispatching -the right notifications. -Prometheus can be [configured](configuration.md) to automatically discover available -Alertmanager instances through its service discovery integrations. diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md deleted file mode 100644 index afda28fb8c..0000000000 --- a/docs/configuration/configuration.md +++ /dev/null @@ -1,3184 +0,0 @@ ---- -title: Configuration -sort_rank: 1 ---- - -Prometheus is configured via command-line flags and a configuration file. While -the command-line flags configure immutable system parameters (such as storage -locations, amount of data to keep on disk and in memory, etc.), the -configuration file defines everything related to scraping [jobs and their -instances](https://prometheus.io/docs/concepts/jobs_instances/), as well as -which [rule files to load](recording_rules.md#configuring-rules). - -To view all available command-line flags, run `./prometheus -h`. - -Prometheus can reload its configuration at runtime. If the new configuration -is not well-formed, the changes will not be applied. -A configuration reload is triggered by sending a `SIGHUP` to the Prometheus process or -sending a HTTP POST request to the `/-/reload` endpoint (when the `--web.enable-lifecycle` flag is enabled). -This will also reload any configured rule files. - -## Configuration file - -To specify which configuration file to load, use the `--config.file` flag. - -The file is written in [YAML format](https://en.wikipedia.org/wiki/YAML), -defined by the scheme described below. -Brackets indicate that a parameter is optional. For non-list parameters the -value is set to the specified default. - -Generic placeholders are defined as follows: - -* ``: a boolean that can take the values `true` or `false` -* ``: a duration matching the regular expression `((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0)`, e.g. `1d`, `1h30m`, `5m`, `10s` -* ``: a valid path in the current working directory -* ``: a floating-point number -* ``: a valid string consisting of a hostname or IP followed by an optional port number -* ``: an integer value -* ``: a string matching the regular expression `[a-zA-Z_][a-zA-Z0-9_]*`. Any other unsupported character in the source label should be converted to an underscore. For example, the label `app.kubernetes.io/name` should be written as `app_kubernetes_io_name`. -* ``: a string of unicode characters -* ``: a valid URL path -* ``: a string that can take the values `http` or `https` -* ``: a regular string that is a secret, such as a password -* ``: a regular string -* ``: a size in bytes, e.g. `512MB`. A unit is required. Supported units: B, KB, MB, GB, TB, PB, EB. -* ``: a string which is template-expanded before usage - -The other placeholders are specified separately. - -A valid example file can be found [here](/config/testdata/conf.good.yml). - -The global configuration specifies parameters that are valid in all other configuration -contexts. They also serve as defaults for other configuration sections. - -```yaml -global: - # How frequently to scrape targets by default. - [ scrape_interval: | default = 1m ] - - # How long until a scrape request times out. - # It cannot be greater than the scrape interval. - [ scrape_timeout: | default = 10s ] - - # The protocols to negotiate during a scrape with the client. - # Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, - # OpenMetricsText1.0.0, PrometheusText0.0.4. - # The default value changes to [ PrometheusProto, OpenMetricsText1.0.0, OpenMetricsText0.0.1, PrometheusText0.0.4 ] - # when native_histogram feature flag is set. - [ scrape_protocols: [, ...] | default = [ OpenMetricsText1.0.0, OpenMetricsText0.0.1, PrometheusText0.0.4 ] ] - - # How frequently to evaluate rules. - [ evaluation_interval: | default = 1m ] - - # Offset the rule evaluation timestamp of this particular group by the - # specified duration into the past to ensure the underlying metrics have - # been received. Metric availability delays are more likely to occur when - # Prometheus is running as a remote write target, but can also occur when - # there's anomalies with scraping. - [ rule_query_offset: | default = 0s ] - - # The labels to add to any time series or alerts when communicating with - # external systems (federation, remote storage, Alertmanager). - # Environment variable references `${var}` or `$var` are replaced according - # to the values of the current environment variables. - # References to undefined variables are replaced by the empty string. - # The `$` character can be escaped by using `$$`. - external_labels: - [ : ... ] - - # File to which PromQL queries are logged. - # Reloading the configuration will reopen the file. - [ query_log_file: ] - - # File to which scrape failures are logged. - # Reloading the configuration will reopen the file. - [ scrape_failure_log_file: ] - - # An uncompressed response body larger than this many bytes will cause the - # scrape to fail. 0 means no limit. Example: 100MB. - # This is an experimental feature, this behaviour could - # change or be removed in the future. - [ body_size_limit: | default = 0 ] - - # Per-scrape limit on the number of scraped samples that will be accepted. - # If more than this number of samples are present after metric relabeling - # the entire scrape will be treated as failed. 0 means no limit. - [ sample_limit: | default = 0 ] - - # Limit on the number of labels that will be accepted per sample. If more - # than this number of labels are present on any sample post metric-relabeling, - # the entire scrape will be treated as failed. 0 means no limit. - [ label_limit: | default = 0 ] - - # Limit on the length (in bytes) of each individual label name. If any label - # name in a scrape is longer than this number post metric-relabeling, the - # entire scrape will be treated as failed. Note that label names are UTF-8 - # encoded, and characters can take up to 4 bytes. 0 means no limit. - [ label_name_length_limit: | default = 0 ] - - # Limit on the length (in bytes) of each individual label value. If any label - # value in a scrape is longer than this number post metric-relabeling, the - # entire scrape will be treated as failed. Note that label values are UTF-8 - # encoded, and characters can take up to 4 bytes. 0 means no limit. - [ label_value_length_limit: | default = 0 ] - - # Limit per scrape config on number of unique targets that will be - # accepted. If more than this number of targets are present after target - # relabeling, Prometheus will mark the targets as failed without scraping them. - # 0 means no limit. This is an experimental feature, this behaviour could - # change in the future. - [ target_limit: | default = 0 ] - - # Limit per scrape config on the number of targets dropped by relabeling - # that will be kept in memory. 0 means no limit. - [ keep_dropped_targets: | default = 0 ] - - # Specifies the validation scheme for metric and label names. Either blank or - # "utf8" for full UTF-8 support, or "legacy" for letters, numbers, colons, - # and underscores. - [ metric_name_validation_scheme: | default "utf8" ] - - # Specifies whether to convert all scraped classic histograms into native - # histograms with custom buckets. - [ convert_classic_histograms_to_nhcb: | default = false] - - # Specifies whether to scrape a classic histogram, even if it is also exposed as a native - # histogram (has no effect without --enable-feature=native-histograms). - [ always_scrape_classic_histograms: | default = false ] - - -runtime: - # Configure the Go garbage collector GOGC parameter - # See: https://tip.golang.org/doc/gc-guide#GOGC - # Lowering this number increases CPU usage. - [ gogc: | default = 75 ] - -# Rule files specifies a list of globs. Rules and alerts are read from -# all matching files. -rule_files: - [ - ... ] - -# Scrape config files specifies a list of globs. Scrape configs are read from -# all matching files and appended to the list of scrape configs. -scrape_config_files: - [ - ... ] - -# A list of scrape configurations. -scrape_configs: - [ - ... ] - -# Alerting specifies settings related to the Alertmanager. -alerting: - alert_relabel_configs: - [ - ... ] - alertmanagers: - [ - ... ] - -# Settings related to the remote write feature. -remote_write: - [ - ... ] - -# Settings related to the OTLP receiver feature. -# See https://prometheus.io/docs/guides/opentelemetry/ for best practices. -otlp: - # Promote specific list of resource attributes to labels. - # It cannot be configured simultaneously with 'promote_all_resource_attributes: true'. - [ promote_resource_attributes: [, ...] | default = [ ] ] - # Promoting all resource attributes to labels, except for the ones configured with 'ignore_resource_attributes'. - # Be aware that changes in attributes received by the OTLP endpoint may result in time series churn and lead to high memory usage by the Prometheus server. - # It cannot be set to 'true' simultaneously with 'promote_resource_attributes'. - [ promote_all_resource_attributes: | default = false ] - # Which resource attributes to ignore, can only be set when 'promote_all_resource_attributes' is true. - [ ignore_resource_attributes: [, ...] | default = [] ] - # Configures translation of OTLP metrics when received through the OTLP metrics - # endpoint. Available values: - # - "UnderscoreEscapingWithSuffixes" refers to commonly agreed normalization used - # by OpenTelemetry in https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/translator/prometheus - # - "NoUTF8EscapingWithSuffixes" is a mode that relies on UTF-8 support in Prometheus. - # It preserves all special characters like dots, but still adds required metric name suffixes - # for units and _total, as UnderscoreEscapingWithSuffixes does. - # - (EXPERIMENTAL) "NoTranslation" is a mode that relies on UTF-8 support in Prometheus. - # It preserves all special character like dots and won't append special suffixes for metric - # unit and type. - # - # WARNING: The "NoTranslation" setting has significant known risks and limitations (see https://prometheus.io/docs/practices/naming/ - # for details): - # * Impaired UX when using PromQL in plain YAML (e.g. alerts, rules, dashboard, autoscaling configuration). - # * Series collisions which in the best case may result in OOO errors, in the worst case a silently malformed - # time series. For instance, you may end up in situation of ingesting `foo.bar` series with unit - # `seconds` and a separate series `foo.bar` with unit `milliseconds`. - [ translation_strategy: | default = "UnderscoreEscapingWithSuffixes" ] - # Enables adding "service.name", "service.namespace" and "service.instance.id" - # resource attributes to the "target_info" metric, on top of converting - # them into the "instance" and "job" labels. - [ keep_identifying_resource_attributes: | default = false ] - # Configures optional translation of OTLP explicit bucket histograms into native histograms with custom buckets. - [ convert_histograms_to_nhcb: | default = false ] - -# Settings related to the remote read feature. -remote_read: - [ - ... ] - -# Storage related settings that are runtime reloadable. -storage: - [ tsdb: ] - [ exemplars: ] - -# Configures exporting traces. -tracing: - [ ] -``` - -### `` - -A `scrape_config` section specifies a set of targets and parameters describing how -to scrape them. In the general case, one scrape configuration specifies a single -job. In advanced configurations, this may change. - -Targets may be statically configured via the `static_configs` parameter or -dynamically discovered using one of the supported service-discovery mechanisms. - -Additionally, `relabel_configs` allow advanced modifications to any -target and its labels before scraping. - -```yaml -# The job name assigned to scraped metrics by default. -job_name: - -# How frequently to scrape targets from this job. -[ scrape_interval: | default = ] - -# Per-scrape timeout when scraping this job. -# It cannot be greater than the scrape interval. -[ scrape_timeout: | default = ] - -# The protocols to negotiate during a scrape with the client. -# Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, -# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. -[ scrape_protocols: [, ...] | default = ] - -# Fallback protocol to use if a scrape returns blank, unparseable, or otherwise -# invalid Content-Type. -# Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, -# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. -[ fallback_scrape_protocol: ] - -# Whether to scrape a classic histogram, even if it is also exposed as a native -# histogram (has no effect without --enable-feature=native-histograms). -[ always_scrape_classic_histograms: | -default = ] - -# The HTTP resource path on which to fetch metrics from targets. -[ metrics_path: | default = /metrics ] - -# honor_labels controls how Prometheus handles conflicts between labels that are -# already present in scraped data and labels that Prometheus would attach -# server-side ("job" and "instance" labels, manually configured target -# labels, and labels generated by service discovery implementations). -# -# If honor_labels is set to "true", label conflicts are resolved by keeping label -# values from the scraped data and ignoring the conflicting server-side labels. -# -# If honor_labels is set to "false", label conflicts are resolved by renaming -# conflicting labels in the scraped data to "exported_" (for -# example "exported_instance", "exported_job") and then attaching server-side -# labels. -# -# Setting honor_labels to "true" is useful for use cases such as federation and -# scraping the Pushgateway, where all labels specified in the target should be -# preserved. -# -# Note that any globally configured "external_labels" are unaffected by this -# setting. In communication with external systems, they are always applied only -# when a time series does not have a given label yet and are ignored otherwise. -[ honor_labels: | default = false ] - -# honor_timestamps controls whether Prometheus respects the timestamps present -# in scraped data. -# -# If honor_timestamps is set to "true", the timestamps of the metrics exposed -# by the target will be used. -# -# If honor_timestamps is set to "false", the timestamps of the metrics exposed -# by the target will be ignored. -[ honor_timestamps: | default = true ] - -# track_timestamps_staleness controls whether Prometheus tracks staleness of -# the metrics that have an explicit timestamps present in scraped data. -# -# If track_timestamps_staleness is set to "true", a staleness marker will be -# inserted in the TSDB when a metric is no longer present or the target -# is down. -[ track_timestamps_staleness: | default = false ] - -# Configures the protocol scheme used for requests. -[ scheme: | default = http ] - -# Optional HTTP URL parameters. -params: - [ : [, ...] ] - -# If enable_compression is set to "false", Prometheus will request uncompressed -# response from the scraped target. -[ enable_compression: | default = true ] - -# File to which scrape failures are logged. -# Reloading the configuration will reopen the file. -[ scrape_failure_log_file: ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] - -# List of Azure service discovery configurations. -azure_sd_configs: - [ - ... ] - -# List of Consul service discovery configurations. -consul_sd_configs: - [ - ... ] - -# List of DigitalOcean service discovery configurations. -digitalocean_sd_configs: - [ - ... ] - -# List of Docker service discovery configurations. -docker_sd_configs: - [ - ... ] - -# List of Docker Swarm service discovery configurations. -dockerswarm_sd_configs: - [ - ... ] - -# List of DNS service discovery configurations. -dns_sd_configs: - [ - ... ] - -# List of EC2 service discovery configurations. -ec2_sd_configs: - [ - ... ] - -# List of Eureka service discovery configurations. -eureka_sd_configs: - [ - ... ] - -# List of file service discovery configurations. -file_sd_configs: - [ - ... ] - -# List of GCE service discovery configurations. -gce_sd_configs: - [ - ... ] - -# List of Hetzner service discovery configurations. -hetzner_sd_configs: - [ - ... ] - -# List of HTTP service discovery configurations. -http_sd_configs: - [ - ... ] - - -# List of IONOS service discovery configurations. -ionos_sd_configs: - [ - ... ] - -# List of Kubernetes service discovery configurations. -kubernetes_sd_configs: - [ - ... ] - -# List of Kuma service discovery configurations. -kuma_sd_configs: - [ - ... ] - -# List of Lightsail service discovery configurations. -lightsail_sd_configs: - [ - ... ] - -# List of Linode service discovery configurations. -linode_sd_configs: - [ - ... ] - -# List of Marathon service discovery configurations. -marathon_sd_configs: - [ - ... ] - -# List of AirBnB's Nerve service discovery configurations. -nerve_sd_configs: - [ - ... ] - -# List of Nomad service discovery configurations. -nomad_sd_configs: - [ - ... ] - -# List of OpenStack service discovery configurations. -openstack_sd_configs: - [ - ... ] - -# List of OVHcloud service discovery configurations. -ovhcloud_sd_configs: - [ - ... ] - -# List of PuppetDB service discovery configurations. -puppetdb_sd_configs: - [ - ... ] - -# List of Scaleway service discovery configurations. -scaleway_sd_configs: - [ - ... ] - -# List of Zookeeper Serverset service discovery configurations. -serverset_sd_configs: - [ - ... ] - -# List of STACKIT service discovery configurations. -stackit_sd_configs: - [ - ... ] - -# List of Triton service discovery configurations. -triton_sd_configs: - [ - ... ] - -# List of Uyuni service discovery configurations. -uyuni_sd_configs: - [ - ... ] - -# List of labeled statically configured targets for this job. -static_configs: - [ - ... ] - -# List of target relabel configurations. -relabel_configs: - [ - ... ] - -# List of metric relabel configurations. -metric_relabel_configs: - [ - ... ] - -# An uncompressed response body larger than this many bytes will cause the -# scrape to fail. 0 means no limit. Example: 100MB. -# This is an experimental feature, this behaviour could -# change or be removed in the future. -[ body_size_limit: | default = 0 ] - -# Per-scrape limit on the number of scraped samples that will be accepted. -# If more than this number of samples are present after metric relabeling -# the entire scrape will be treated as failed. 0 means no limit. -[ sample_limit: | default = 0 ] - -# Limit on the number of labels that will be accepted per sample. If more -# than this number of labels are present on any sample post metric-relabeling, -# the entire scrape will be treated as failed. 0 means no limit. -[ label_limit: | default = 0 ] - -# Limit on the length (in bytes) of each individual label name. If any label -# name in a scrape is longer than this number post metric-relabeling, the -# entire scrape will be treated as failed. Note that label names are UTF-8 -# encoded, and characters can take up to 4 bytes. 0 means no limit. -[ label_name_length_limit: | default = 0 ] - -# Limit on the length (in bytes) of each individual label value. If any label -# value in a scrape is longer than this number post metric-relabeling, the -# entire scrape will be treated as failed. Note that label values are UTF-8 -# encoded, and characters can take up to 4 bytes. 0 means no limit. -[ label_value_length_limit: | default = 0 ] - -# Limit per scrape config on number of unique targets that will be -# accepted. If more than this number of targets are present after target -# relabeling, Prometheus will mark the targets as failed without scraping them. -# 0 means no limit. This is an experimental feature, this behaviour could -# change in the future. -[ target_limit: | default = 0 ] - -# Limit per scrape config on the number of targets dropped by relabeling -# that will be kept in memory. 0 means no limit. -[ keep_dropped_targets: | default = 0 ] - -# Specifies the validation scheme for metric and label names. Either blank or -# "utf8" for full UTF-8 support, or "legacy" for letters, numbers, colons, and -# underscores. -[ metric_name_validation_scheme: | default "utf8" ] - -# Specifies the character escaping scheme that will be requested when scraping -# for metric and label names that do not conform to the legacy Prometheus -# character set. Available options are: -# * `allow-utf-8`: Full UTF-8 support, no escaping needed. -# * `underscores`: Escape all legacy-invalid characters to underscores. -# * `dots`: Escapes dots to `_dot_`, underscores to `__`, and all other -# legacy-invalid characters to underscores. -# * `values`: Prepend the name with `U__` and replace all invalid -# characters with their unicode value, surrounded by underscores. Single -# underscores are replaced with double underscores. -# e.g. "U__my_2e_dotted_2e_name". -# If this value is left blank, Prometheus will default to `allow-utf-8` if the -# validation scheme for the current scrape config is set to utf8, or -# `underscores` if the validation scheme is set to `legacy`. -[ metric_name_escaping_scheme: | default "allow-utf-8" ] - -# Limit on total number of positive and negative buckets allowed in a single -# native histogram. The resolution of a histogram with more buckets will be -# reduced until the number of buckets is within the limit. If the limit cannot -# be reached, the scrape will fail. -# 0 means no limit. -[ native_histogram_bucket_limit: | default = 0 ] - -# Lower limit for the growth factor of one bucket to the next in each native -# histogram. The resolution of a histogram with a lower growth factor will be -# reduced as much as possible until it is within the limit. -# To set an upper limit for the schema (equivalent to "scale" in OTel's -# exponential histograms), use the following factor limits: -# -# +----------------------------+----------------------------+ -# | growth factor | resulting schema AKA scale | -# +----------------------------+----------------------------+ -# | 65536 | -4 | -# +----------------------------+----------------------------+ -# | 256 | -3 | -# +----------------------------+----------------------------+ -# | 16 | -2 | -# +----------------------------+----------------------------+ -# | 4 | -1 | -# +----------------------------+----------------------------+ -# | 2 | 0 | -# +----------------------------+----------------------------+ -# | 1.4 | 1 | -# +----------------------------+----------------------------+ -# | 1.1 | 2 | -# +----------------------------+----------------------------+ -# | 1.09 | 3 | -# +----------------------------+----------------------------+ -# | 1.04 | 4 | -# +----------------------------+----------------------------+ -# | 1.02 | 5 | -# +----------------------------+----------------------------+ -# | 1.01 | 6 | -# +----------------------------+----------------------------+ -# | 1.005 | 7 | -# +----------------------------+----------------------------+ -# | 1.002 | 8 | -# +----------------------------+----------------------------+ -# -# 0 results in the smallest supported factor (which is currently ~1.0027 or -# schema 8, but might change in the future). -[ native_histogram_min_bucket_factor: | default = 0 ] - -# Specifies whether to convert classic histograms into native histograms with -# custom buckets (has no effect without --enable-feature=native-histograms). -[ convert_classic_histograms_to_nhcb: | default = -] -``` - -Where `` must be unique across all scrape configurations. - -### `` - -A `http_config` allows configuring HTTP requests. - -```yaml -# Sets the `Authorization` header on every request with the -# configured username and password. -# username and username_file are mutually exclusive. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ username_file: ] - [ password: ] - [ password_file: ] - -# Sets the `Authorization` header on every request with -# the configured credentials. -authorization: - # Sets the authentication type of the request. - [ type: | default: Bearer ] - # Sets the credentials of the request. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials of the request with the credentials read from the - # configured file. It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# Configures the request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] -``` - -### `` - -A `tls_config` allows configuring TLS connections. - -```yaml -# CA certificate to validate API server certificate with. At most one of ca and ca_file is allowed. -[ ca: ] -[ ca_file: ] - -# Certificate and key for client cert authentication to the server. -# At most one of cert and cert_file is allowed. -# At most one of key and key_file is allowed. -[ cert: ] -[ cert_file: ] -[ key: ] -[ key_file: ] - -# ServerName extension to indicate the name of the server. -# https://tools.ietf.org/html/rfc4366#section-3.1 -[ server_name: ] - -# Disable validation of the server certificate. -[ insecure_skip_verify: ] - -# Minimum acceptable TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS -# 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). -# If unset, Prometheus will use Go default minimum version, which is TLS 1.2. -# See MinVersion in https://pkg.go.dev/crypto/tls#Config. -[ min_version: ] -# Maximum acceptable TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS -# 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). -# If unset, Prometheus will use Go default maximum version, which is TLS 1.3. -# See MaxVersion in https://pkg.go.dev/crypto/tls#Config. -[ max_version: ] -``` - -### `` - -OAuth 2.0 authentication using the client credentials or password grant type. -Prometheus fetches an access token from the specified endpoint with -the given client access and secret keys. - -```yaml -client_id: -[ client_secret: ] - -# Read the client secret from a file. -# It is mutually exclusive with `client_secret`. -[ client_secret_file: ] - -# Scopes for the token request. -scopes: - [ - ... ] - -# The URL to fetch the token from. -token_url: - -# Optional parameters to append to the token URL. -# To set 'password' grant type, add it to params: -# endpoint_params: -# grant_type: 'password' -# username: 'username@example.com' -# password: 'strongpassword' -endpoint_params: - [ : ... ] - -# Configures the token request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] -``` - -### `` - -Azure SD configurations allow retrieving scrape targets from Azure VMs. - -The discovery requires at least the following permissions: - -* `Microsoft.Compute/virtualMachines/read`: Required for VM discovery -* `Microsoft.Network/networkInterfaces/read`: Required for VM discovery -* `Microsoft.Compute/virtualMachineScaleSets/virtualMachines/read`: Required for scale set (VMSS) discovery -* `Microsoft.Compute/virtualMachineScaleSets/virtualMachines/networkInterfaces/read`: Required for scale set (VMSS) discovery - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_azure_machine_id`: the machine ID -* `__meta_azure_machine_location`: the location the machine runs in -* `__meta_azure_machine_name`: the machine name -* `__meta_azure_machine_computer_name`: the machine computer name -* `__meta_azure_machine_os_type`: the machine operating system -* `__meta_azure_machine_private_ip`: the machine's private IP -* `__meta_azure_machine_public_ip`: the machine's public IP if it exists -* `__meta_azure_machine_resource_group`: the machine's resource group -* `__meta_azure_machine_tag_`: each tag value of the machine -* `__meta_azure_machine_scale_set`: the name of the scale set which the vm is part of (this value is only set if you are using a [scale set](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/)) -* `__meta_azure_machine_size`: the machine size -* `__meta_azure_subscription_id`: the subscription ID -* `__meta_azure_tenant_id`: the tenant ID - -See below for the configuration options for Azure discovery: - -```yaml -# The information to access the Azure API. -# The Azure environment. -[ environment: | default = AzurePublicCloud ] - -# The authentication method, either OAuth, ManagedIdentity or SDK. -# See https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/overview -# SDK authentication method uses environment variables by default. -# See https://learn.microsoft.com/en-us/azure/developer/go/azure-sdk-authentication -[ authentication_method: | default = OAuth] -# The subscription ID. Always required. -subscription_id: -# Optional tenant ID. Only required with authentication_method OAuth. -[ tenant_id: ] -# Optional client ID. Only required with authentication_method OAuth. -[ client_id: ] -# Optional client secret. Only required with authentication_method OAuth. -[ client_secret: ] - -# Optional resource group name. Limits discovery to this resource group. -[ resource_group: ] - -# Refresh interval to re-read the instance list. -[ refresh_interval: | default = 300s ] - -# The port to scrape metrics from. If using the public IP address, this must -# instead be specified in the relabeling rule. -[ port: | default = 80 ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -### `` - -Consul SD configurations allow retrieving scrape targets from [Consul's](https://www.consul.io) -Catalog API. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_consul_address`: the address of the target -* `__meta_consul_dc`: the datacenter name for the target -* `__meta_consul_health`: the health status of the service -* `__meta_consul_partition`: the admin partition name where the service is registered -* `__meta_consul_metadata_`: each node metadata key value of the target -* `__meta_consul_node`: the node name defined for the target -* `__meta_consul_service_address`: the service address of the target -* `__meta_consul_service_id`: the service ID of the target -* `__meta_consul_service_metadata_`: each service metadata key value of the target -* `__meta_consul_service_port`: the service port of the target -* `__meta_consul_service`: the name of the service the target belongs to -* `__meta_consul_tagged_address_`: each node tagged address key value of the target -* `__meta_consul_tags`: the list of tags of the target joined by the tag separator - -```yaml -# The information to access the Consul API. It is to be defined -# as the Consul documentation requires. -[ server: | default = "localhost:8500" ] -# Prefix for URIs for when consul is behind an API gateway (reverse proxy). -[ path_prefix: ] -[ token: ] -[ datacenter: ] -# Namespaces are only supported in Consul Enterprise. -[ namespace: ] -# Admin Partitions are only supported in Consul Enterprise. -[ partition: ] -[ scheme: | default = "http" ] -# The username and password fields are deprecated in favor of the basic_auth configuration. -[ username: ] -[ password: ] - -# A list of services for which targets are retrieved. If omitted, all services -# are scraped. -services: - [ - ] - -# A Consul Filter expression used to filter the catalog results -# See https://www.consul.io/api-docs/catalog#list-services to know more -# about the filter expressions that can be used. -[ filter: ] - -# The `tags` and `node_meta` fields are deprecated in Consul in favor of `filter`. -# An optional list of tags used to filter nodes for a given service. Services must contain all tags in the list. -tags: - [ - ] - -# Node metadata key/value pairs to filter nodes for a given service. As of Consul 1.14, consider `filter` instead. -[ node_meta: - [ : ... ] ] - -# The string by which Consul tags are joined into the tag label. -[ tag_separator: | default = , ] - -# Allow stale Consul results (see https://www.consul.io/api/features/consistency.html). Will reduce load on Consul. -[ allow_stale: | default = true ] - -# The time after which the provided names are refreshed. -# On large setup it might be a good idea to increase this value because the catalog will change all the time. -[ refresh_interval: | default = 30s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -Note that the IP number and port used to scrape the targets is assembled as -`<__meta_consul_address>:<__meta_consul_service_port>`. However, in some -Consul setups, the relevant address is in `__meta_consul_service_address`. -In those cases, you can use the [relabel](#relabel_config) -feature to replace the special `__address__` label. - -The [relabeling phase](#relabel_config) is the preferred and more powerful -way to filter services or nodes for a service based on arbitrary labels. For -users with thousands of services it can be more efficient to use the Consul API -directly which has basic support for filtering nodes (currently by node -metadata and a single tag). - -### `` - -DigitalOcean SD configurations allow retrieving scrape targets from [DigitalOcean's](https://www.digitalocean.com/) -Droplets API. -This service discovery uses the public IPv4 address by default, by that can be -changed with relabeling, as demonstrated in [the Prometheus digitalocean-sd -configuration file](/documentation/examples/prometheus-digitalocean.yml). - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_digitalocean_droplet_id`: the id of the droplet -* `__meta_digitalocean_droplet_name`: the name of the droplet -* `__meta_digitalocean_image`: the slug of the droplet's image -* `__meta_digitalocean_image_name`: the display name of the droplet's image -* `__meta_digitalocean_private_ipv4`: the private IPv4 of the droplet -* `__meta_digitalocean_public_ipv4`: the public IPv4 of the droplet -* `__meta_digitalocean_public_ipv6`: the public IPv6 of the droplet -* `__meta_digitalocean_region`: the region of the droplet -* `__meta_digitalocean_size`: the size of the droplet -* `__meta_digitalocean_status`: the status of the droplet -* `__meta_digitalocean_features`: the comma-separated list of features of the droplet -* `__meta_digitalocean_tags`: the comma-separated list of tags of the droplet -* `__meta_digitalocean_vpc`: the id of the droplet's VPC - -```yaml -# The port to scrape metrics from. -[ port: | default = 80 ] - -# The time after which the droplets are refreshed. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -### `` - -Docker SD configurations allow retrieving scrape targets from [Docker Engine](https://docs.docker.com/engine/) hosts. - -This SD discovers "containers" and will create a target for each network IP and port the container is configured to expose. - -Available meta labels: - -* `__meta_docker_container_id`: the id of the container -* `__meta_docker_container_name`: the name of the container -* `__meta_docker_container_network_mode`: the network mode of the container -* `__meta_docker_container_label_`: each label of the container, with any unsupported characters converted to an underscore -* `__meta_docker_network_id`: the ID of the network -* `__meta_docker_network_name`: the name of the network -* `__meta_docker_network_ingress`: whether the network is ingress -* `__meta_docker_network_internal`: whether the network is internal -* `__meta_docker_network_label_`: each label of the network, with any unsupported characters converted to an underscore -* `__meta_docker_network_scope`: the scope of the network -* `__meta_docker_network_ip`: the IP of the container in this network -* `__meta_docker_port_private`: the port on the container -* `__meta_docker_port_public`: the external port if a port-mapping exists -* `__meta_docker_port_public_ip`: the public IP if a port-mapping exists - -See below for the configuration options for Docker discovery: - -```yaml -# Address of the Docker daemon. -host: - -# The port to scrape metrics from, when `role` is nodes, and for discovered -# tasks and services that don't have published ports. -[ port: | default = 80 ] - -# The host to use if the container is in host networking mode. -[ host_networking_host: | default = "localhost" ] - -# Sort all non-nil networks in ascending order based on network name and -# get the first network if the container has multiple networks defined, -# thus avoiding collecting duplicate targets. -[ match_first_network: | default = true ] - -# Optional filters to limit the discovery process to a subset of available -# resources. -# The available filters are listed in the upstream documentation: -# https://docs.docker.com/engine/api/v1.40/#operation/ContainerList -[ filters: - [ - name: - values: , [...] ] - -# The time after which the containers are refreshed. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -The [relabeling phase](#relabel_config) is the preferred and more powerful -way to filter containers. For users with thousands of containers it -can be more efficient to use the Docker API directly which has basic support for -filtering containers (using `filters`). - -See [this example Prometheus configuration file](/documentation/examples/prometheus-docker.yml) -for a detailed example of configuring Prometheus for Docker Engine. - -### `` - -Docker Swarm SD configurations allow retrieving scrape targets from [Docker Swarm](https://docs.docker.com/engine/swarm/) -engine. - -One of the following roles can be configured to discover targets: - -#### `services` - -The `services` role discovers all [Swarm services](https://docs.docker.com/engine/swarm/key-concepts/#services-and-tasks) -and exposes their ports as targets. For each published port of a service, a -single target is generated. If a service has no published ports, a target per -service is created using the `port` parameter defined in the SD configuration. - -Available meta labels: - -* `__meta_dockerswarm_service_id`: the id of the service -* `__meta_dockerswarm_service_name`: the name of the service -* `__meta_dockerswarm_service_mode`: the mode of the service -* `__meta_dockerswarm_service_endpoint_port_name`: the name of the endpoint port, if available -* `__meta_dockerswarm_service_endpoint_port_publish_mode`: the publish mode of the endpoint port -* `__meta_dockerswarm_service_label_`: each label of the service, with any unsupported characters converted to an underscore -* `__meta_dockerswarm_service_task_container_hostname`: the container hostname of the target, if available -* `__meta_dockerswarm_service_task_container_image`: the container image of the target -* `__meta_dockerswarm_service_updating_status`: the status of the service, if available -* `__meta_dockerswarm_network_id`: the ID of the network -* `__meta_dockerswarm_network_name`: the name of the network -* `__meta_dockerswarm_network_ingress`: whether the network is ingress -* `__meta_dockerswarm_network_internal`: whether the network is internal -* `__meta_dockerswarm_network_label_`: each label of the network, with any unsupported characters converted to an underscore -* `__meta_dockerswarm_network_scope`: the scope of the network - -#### `tasks` - -The `tasks` role discovers all [Swarm tasks](https://docs.docker.com/engine/swarm/key-concepts/#services-and-tasks) -and exposes their ports as targets. For each published port of a task, a single -target is generated. If a task has no published ports, a target per task is -created using the `port` parameter defined in the SD configuration. - -Available meta labels: - -* `__meta_dockerswarm_container_label_`: each label of the container, with any unsupported characters converted to an underscore -* `__meta_dockerswarm_task_id`: the id of the task -* `__meta_dockerswarm_task_container_id`: the container id of the task -* `__meta_dockerswarm_task_desired_state`: the desired state of the task -* `__meta_dockerswarm_task_slot`: the slot of the task -* `__meta_dockerswarm_task_state`: the state of the task -* `__meta_dockerswarm_task_port_publish_mode`: the publish mode of the task port -* `__meta_dockerswarm_service_id`: the id of the service -* `__meta_dockerswarm_service_name`: the name of the service -* `__meta_dockerswarm_service_mode`: the mode of the service -* `__meta_dockerswarm_service_label_`: each label of the service, with any unsupported characters converted to an underscore -* `__meta_dockerswarm_network_id`: the ID of the network -* `__meta_dockerswarm_network_name`: the name of the network -* `__meta_dockerswarm_network_ingress`: whether the network is ingress -* `__meta_dockerswarm_network_internal`: whether the network is internal -* `__meta_dockerswarm_network_label_`: each label of the network, with any unsupported characters converted to an underscore -* `__meta_dockerswarm_network_label`: each label of the network, with any unsupported characters converted to an underscore -* `__meta_dockerswarm_network_scope`: the scope of the network -* `__meta_dockerswarm_node_id`: the ID of the node -* `__meta_dockerswarm_node_hostname`: the hostname of the node -* `__meta_dockerswarm_node_address`: the address of the node -* `__meta_dockerswarm_node_availability`: the availability of the node -* `__meta_dockerswarm_node_label_`: each label of the node, with any unsupported characters converted to an underscore -* `__meta_dockerswarm_node_platform_architecture`: the architecture of the node -* `__meta_dockerswarm_node_platform_os`: the operating system of the node -* `__meta_dockerswarm_node_role`: the role of the node -* `__meta_dockerswarm_node_status`: the status of the node - -The `__meta_dockerswarm_network_*` meta labels are not populated for ports which -are published with `mode=host`. - -#### `nodes` - -The `nodes` role is used to discover [Swarm nodes](https://docs.docker.com/engine/swarm/key-concepts/#nodes). - -Available meta labels: - -* `__meta_dockerswarm_node_address`: the address of the node -* `__meta_dockerswarm_node_availability`: the availability of the node -* `__meta_dockerswarm_node_engine_version`: the version of the node engine -* `__meta_dockerswarm_node_hostname`: the hostname of the node -* `__meta_dockerswarm_node_id`: the ID of the node -* `__meta_dockerswarm_node_label_`: each label of the node, with any unsupported characters converted to an underscore -* `__meta_dockerswarm_node_manager_address`: the address of the manager component of the node -* `__meta_dockerswarm_node_manager_leader`: the leadership status of the manager component of the node (true or false) -* `__meta_dockerswarm_node_manager_reachability`: the reachability of the manager component of the node -* `__meta_dockerswarm_node_platform_architecture`: the architecture of the node -* `__meta_dockerswarm_node_platform_os`: the operating system of the node -* `__meta_dockerswarm_node_role`: the role of the node -* `__meta_dockerswarm_node_status`: the status of the node - -See below for the configuration options for Docker Swarm discovery: - -```yaml -# Address of the Docker daemon. -host: - -# Role of the targets to retrieve. Must be `services`, `tasks`, or `nodes`. -role: - -# The port to scrape metrics from, when `role` is nodes, and for discovered -# tasks and services that don't have published ports. -[ port: | default = 80 ] - -# Optional filters to limit the discovery process to a subset of available -# resources. -# The available filters are listed in the upstream documentation: -# Services: https://docs.docker.com/engine/api/v1.40/#operation/ServiceList -# Tasks: https://docs.docker.com/engine/api/v1.40/#operation/TaskList -# Nodes: https://docs.docker.com/engine/api/v1.40/#operation/NodeList -[ filters: - [ - name: - values: , [...] ] - -# The time after which the service discovery data is refreshed. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -The [relabeling phase](#relabel_config) is the preferred and more powerful -way to filter tasks, services or nodes. For users with thousands of tasks it -can be more efficient to use the Swarm API directly which has basic support for -filtering nodes (using `filters`). - -See [this example Prometheus configuration file](/documentation/examples/prometheus-dockerswarm.yml) -for a detailed example of configuring Prometheus for Docker Swarm. - -### `` - -A DNS-based service discovery configuration allows specifying a set of DNS -domain names which are periodically queried to discover a list of targets. The -DNS servers to be contacted are read from `/etc/resolv.conf`. - -This service discovery method only supports basic DNS A, AAAA, MX, NS and SRV -record queries, but not the advanced DNS-SD approach specified in -[RFC6763](https://tools.ietf.org/html/rfc6763). - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_dns_name`: the record name that produced the discovered target. -* `__meta_dns_srv_record_target`: the target field of the SRV record -* `__meta_dns_srv_record_port`: the port field of the SRV record -* `__meta_dns_mx_record_target`: the target field of the MX record -* `__meta_dns_ns_record_target`: the target field of the NS record - -```yaml -# A list of DNS domain names to be queried. -names: - [ - ] - -# The type of DNS query to perform. One of SRV, A, AAAA, MX or NS. -[ type: | default = 'SRV' ] - -# The port number used if the query type is not SRV. -[ port: ] - -# The time after which the provided names are refreshed. -[ refresh_interval: | default = 30s ] -``` - -### `` - -EC2 SD configurations allow retrieving scrape targets from AWS EC2 -instances. The private IP address is used by default, but may be changed to -the public IP address with relabeling. - -The IAM credentials used must have the `ec2:DescribeInstances` permission to -discover scrape targets, and may optionally have the -`ec2:DescribeAvailabilityZones` permission if you want the availability zone ID -available as a label (see below). - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_ec2_ami`: the EC2 Amazon Machine Image -* `__meta_ec2_architecture`: the architecture of the instance -* `__meta_ec2_availability_zone`: the availability zone in which the instance is running -* `__meta_ec2_availability_zone_id`: the [availability zone ID](https://docs.aws.amazon.com/ram/latest/userguide/working-with-az-ids.html) in which the instance is running (requires `ec2:DescribeAvailabilityZones`) -* `__meta_ec2_instance_id`: the EC2 instance ID -* `__meta_ec2_instance_lifecycle`: the lifecycle of the EC2 instance, set only for 'spot' or 'scheduled' instances, absent otherwise -* `__meta_ec2_instance_state`: the state of the EC2 instance -* `__meta_ec2_instance_type`: the type of the EC2 instance -* `__meta_ec2_ipv6_addresses`: comma separated list of IPv6 addresses assigned to the instance's network interfaces, if present -* `__meta_ec2_owner_id`: the ID of the AWS account that owns the EC2 instance -* `__meta_ec2_platform`: the Operating System platform, set to 'windows' on Windows servers, absent otherwise -* `__meta_ec2_primary_ipv6_addresses`: comma separated list of the Primary IPv6 addresses of the instance, if present. The list is ordered based on the position of each corresponding network interface in the attachment order. -* `__meta_ec2_primary_subnet_id`: the subnet ID of the primary network interface, if available -* `__meta_ec2_private_dns_name`: the private DNS name of the instance, if available -* `__meta_ec2_private_ip`: the private IP address of the instance, if present -* `__meta_ec2_public_dns_name`: the public DNS name of the instance, if available -* `__meta_ec2_public_ip`: the public IP address of the instance, if available -* `__meta_ec2_region`: the region of the instance -* `__meta_ec2_subnet_id`: comma separated list of subnets IDs in which the instance is running, if available -* `__meta_ec2_tag_`: each tag value of the instance -* `__meta_ec2_vpc_id`: the ID of the VPC in which the instance is running, if available - -See below for the configuration options for EC2 discovery: - -```yaml -# The information to access the EC2 API. - -# The AWS region. If blank, the region from the instance metadata is used. -[ region: ] - -# Custom endpoint to be used. -[ endpoint: ] - -# The AWS API keys. If blank, the environment variables `AWS_ACCESS_KEY_ID` -# and `AWS_SECRET_ACCESS_KEY` are used. -[ access_key: ] -[ secret_key: ] -# Named AWS profile used to connect to the API. -[ profile: ] - -# AWS Role ARN, an alternative to using AWS API keys. -[ role_arn: ] - -# Refresh interval to re-read the instance list. -[ refresh_interval: | default = 60s ] - -# The port to scrape metrics from. If using the public IP address, this must -# instead be specified in the relabeling rule. -[ port: | default = 80 ] - -# Filters can be used optionally to filter the instance list by other criteria. -# Available filter criteria can be found here: -# https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html -# Filter API documentation: https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Filter.html -filters: - [ - name: - values: , [...] ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -The [relabeling phase](#relabel_config) is the preferred and more powerful -way to filter targets based on arbitrary labels. For users with thousands of -instances it can be more efficient to use the EC2 API directly which has -support for filtering instances. - -### `` - -OpenStack SD configurations allow retrieving scrape targets from OpenStack Nova -instances. - -One of the following `` types can be configured to discover targets: - -#### `hypervisor` - -The `hypervisor` role discovers one target per Nova hypervisor node. The target -address defaults to the `host_ip` attribute of the hypervisor. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_openstack_hypervisor_host_ip`: the hypervisor node's IP address. -* `__meta_openstack_hypervisor_hostname`: the hypervisor node's name. -* `__meta_openstack_hypervisor_id`: the hypervisor node's ID. -* `__meta_openstack_hypervisor_state`: the hypervisor node's state. -* `__meta_openstack_hypervisor_status`: the hypervisor node's status. -* `__meta_openstack_hypervisor_type`: the hypervisor node's type. - -#### `instance` - -The `instance` role discovers one target per network interface of Nova -instance. The target address defaults to the private IP address of the network -interface. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_openstack_address_pool`: the pool of the private IP. -* `__meta_openstack_instance_flavor`: the flavor name of the OpenStack instance, or the flavor ID if the flavor name isn't available. -* `__meta_openstack_instance_id`: the OpenStack instance ID. -* `__meta_openstack_instance_image`: the ID of the image the OpenStack instance is using. -* `__meta_openstack_instance_name`: the OpenStack instance name. -* `__meta_openstack_instance_status`: the status of the OpenStack instance. -* `__meta_openstack_private_ip`: the private IP of the OpenStack instance. -* `__meta_openstack_project_id`: the project (tenant) owning this instance. -* `__meta_openstack_public_ip`: the public IP of the OpenStack instance. -* `__meta_openstack_tag_`: each metadata item of the instance, with any unsupported characters converted to an underscore. -* `__meta_openstack_user_id`: the user account owning the tenant. - -#### `loadbalancer` - -The `loadbalancer` role discovers one target per Octavia loadbalancer with a -`PROMETHEUS` listener. The target address defaults to the VIP address -of the load balancer. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_openstack_loadbalancer_availability_zone`: the availability zone of the OpenStack load balancer. -* `__meta_openstack_loadbalancer_floating_ip`: the floating IP of the OpenStack load balancer. -* `__meta_openstack_loadbalancer_id`: the OpenStack load balancer ID. -* `__meta_openstack_loadbalancer_name`: the OpenStack load balancer name. -* `__meta_openstack_loadbalancer_provider`: the Octavia provider of the OpenStack load balancer. -* `__meta_openstack_loadbalancer_operating_status`: the operating status of the OpenStack load balancer. -* `__meta_openstack_loadbalancer_provisioning_status`: the provisioning status of the OpenStack load balancer. -* `__meta_openstack_loadbalancer_tags`: comma separated list of the OpenStack load balancer. -* `__meta_openstack_loadbalancer_vip`: the VIP of the OpenStack load balancer. -* `__meta_openstack_project_id`: the project (tenant) owning this load balancer. - -See below for the configuration options for OpenStack discovery: - -```yaml -# The information to access the OpenStack API. - -# The OpenStack role of entities that should be discovered. -role: - -# The OpenStack Region. -region: - -# identity_endpoint specifies the HTTP endpoint that is required to work with -# the Identity API of the appropriate version. While it's ultimately needed by -# all of the identity services, it will often be populated by a provider-level -# function. -[ identity_endpoint: ] - -# username is required if using Identity V2 API. Consult with your provider's -# control panel to discover your account's username. In Identity V3, either -# userid or a combination of username and domain_id or domain_name are needed. -[ username: ] -[ userid: ] - -# password for the Identity V2 and V3 APIs. Consult with your provider's -# control panel to discover your account's preferred method of authentication. -[ password: ] - -# At most one of domain_id and domain_name must be provided if using username -# with Identity V3. Otherwise, either are optional. -[ domain_name: ] -[ domain_id: ] - -# The project_id and project_name fields are optional for the Identity V2 API. -# Some providers allow you to specify a project_name instead of the project_id. -# Some require both. Your provider's authentication policies will determine -# how these fields influence authentication. -[ project_name: ] -[ project_id: ] - -# The application_credential_id or application_credential_name fields are -# required if using an application credential to authenticate. Some providers -# allow you to create an application credential to authenticate rather than a -# password. -[ application_credential_name: ] -[ application_credential_id: ] - -# The application_credential_secret field is required if using an application -# credential to authenticate. -[ application_credential_secret: ] - -# Whether the service discovery should list all instances for all projects. -# It is only relevant for the 'instance' role and usually requires admin permissions. -[ all_tenants: | default: false ] - -# Refresh interval to re-read the instance list. -[ refresh_interval: | default = 60s ] - -# The port to scrape metrics from. If using the public IP address, this must -# instead be specified in the relabeling rule. -[ port: | default = 80 ] - -# The availability of the endpoint to connect to. Must be one of public, admin or internal. -[ availability: | default = "public" ] - -# TLS configuration. -tls_config: - [ ] -``` - -### `` - -OVHcloud SD configurations allow retrieving scrape targets from OVHcloud's [dedicated servers](https://www.ovhcloud.com/en/bare-metal/) and [VPS](https://www.ovhcloud.com/en/vps/) using -their [API](https://api.ovh.com/). -Prometheus will periodically check the REST endpoint and create a target for every discovered server. -The role will try to use the public IPv4 address as default address, if there's none it will try to use the IPv6 one. This may be changed with relabeling. -For OVHcloud's [public cloud instances](https://www.ovhcloud.com/en/public-cloud/) you can use the [openstack_sd_config](#openstack_sd_config). - -#### VPS - -* `__meta_ovhcloud_vps_cluster`: the cluster of the server -* `__meta_ovhcloud_vps_datacenter`: the datacenter of the server -* `__meta_ovhcloud_vps_disk`: the disk of the server -* `__meta_ovhcloud_vps_display_name`: the display name of the server -* `__meta_ovhcloud_vps_ipv4`: the IPv4 of the server -* `__meta_ovhcloud_vps_ipv6`: the IPv6 of the server -* `__meta_ovhcloud_vps_keymap`: the KVM keyboard layout of the server -* `__meta_ovhcloud_vps_maximum_additional_ip`: the maximum additional IPs of the server -* `__meta_ovhcloud_vps_memory_limit`: the memory limit of the server -* `__meta_ovhcloud_vps_memory`: the memory of the server -* `__meta_ovhcloud_vps_monitoring_ip_blocks`: the monitoring IP blocks of the server -* `__meta_ovhcloud_vps_name`: the name of the server -* `__meta_ovhcloud_vps_netboot_mode`: the netboot mode of the server -* `__meta_ovhcloud_vps_offer_type`: the offer type of the server -* `__meta_ovhcloud_vps_offer`: the offer of the server -* `__meta_ovhcloud_vps_state`: the state of the server -* `__meta_ovhcloud_vps_vcore`: the number of virtual cores of the server -* `__meta_ovhcloud_vps_version`: the version of the server -* `__meta_ovhcloud_vps_zone`: the zone of the server - -#### Dedicated servers - -* `__meta_ovhcloud_dedicated_server_commercial_range`: the commercial range of the server -* `__meta_ovhcloud_dedicated_server_datacenter`: the datacenter of the server -* `__meta_ovhcloud_dedicated_server_ipv4`: the IPv4 of the server -* `__meta_ovhcloud_dedicated_server_ipv6`: the IPv6 of the server -* `__meta_ovhcloud_dedicated_server_link_speed`: the link speed of the server -* `__meta_ovhcloud_dedicated_server_name`: the name of the server -* `__meta_ovhcloud_dedicated_server_no_intervention`: whether datacenter intervention is disabled for the server -* `__meta_ovhcloud_dedicated_server_os`: the operating system of the server -* `__meta_ovhcloud_dedicated_server_rack`: the rack of the server -* `__meta_ovhcloud_dedicated_server_reverse`: the reverse DNS name of the server -* `__meta_ovhcloud_dedicated_server_server_id`: the ID of the server -* `__meta_ovhcloud_dedicated_server_state`: the state of the server -* `__meta_ovhcloud_dedicated_server_support_level`: the support level of the server - -See below for the configuration options for OVHcloud discovery: - -```yaml -# Access key to use. https://api.ovh.com -application_key: -application_secret: -consumer_key: -# Service of the targets to retrieve. Must be `vps` or `dedicated_server`. -service: -# API endpoint. https://github.com/ovh/go-ovh#supported-apis -[ endpoint: | default = "ovh-eu" ] -# Refresh interval to re-read the resources list. -[ refresh_interval: | default = 60s ] -``` - -### `` - -PuppetDB SD configurations allow retrieving scrape targets from -[PuppetDB](https://puppet.com/docs/puppetdb/latest/index.html) resources. - -This SD discovers resources and will create a target for each resource returned -by the API. - -The resource address is the `certname` of the resource and can be changed during -[relabeling](#relabel_config). - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_puppetdb_query`: the Puppet Query Language (PQL) query -* `__meta_puppetdb_certname`: the name of the node associated with the resource -* `__meta_puppetdb_resource`: a SHA-1 hash of the resource’s type, title, and parameters, for identification -* `__meta_puppetdb_type`: the resource type -* `__meta_puppetdb_title`: the resource title -* `__meta_puppetdb_exported`: whether the resource is exported (`"true"` or `"false"`) -* `__meta_puppetdb_tags`: comma separated list of resource tags -* `__meta_puppetdb_file`: the manifest file in which the resource was declared -* `__meta_puppetdb_environment`: the environment of the node associated with the resource -* `__meta_puppetdb_parameter_`: the parameters of the resource - - -See below for the configuration options for PuppetDB discovery: - -```yaml -# The URL of the PuppetDB root query endpoint. -url: - -# Puppet Query Language (PQL) query. Only resources are supported. -# https://puppet.com/docs/puppetdb/latest/api/query/v4/pql.html -query: - -# Whether to include the parameters as meta labels. -# Due to the differences between parameter types and Prometheus labels, -# some parameters might not be rendered. The format of the parameters might -# also change in future releases. -# -# Note: Enabling this exposes parameters in the Prometheus UI and API. Make sure -# that you don't have secrets exposed as parameters if you enable this. -[ include_parameters: | default = false ] - -# Refresh interval to re-read the resources list. -[ refresh_interval: | default = 60s ] - -# The port to scrape metrics from. -[ port: | default = 80 ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -See [this example Prometheus configuration file](/documentation/examples/prometheus-puppetdb.yml) -for a detailed example of configuring Prometheus with PuppetDB. - - -### `` - -File-based service discovery provides a more generic way to configure static targets -and serves as an interface to plug in custom service discovery mechanisms. - -It reads a set of files containing a list of zero or more -``s. Changes to all defined files are detected via disk watches -and applied immediately. - -While those individual files are watched for changes, -the parent directory is also watched implicitly. This is to handle [atomic -renaming](https://github.com/fsnotify/fsnotify/blob/c1467c02fba575afdb5f4201072ab8403bbf00f4/README.md?plain=1#L128) efficiently and to detect new files that match the configured globs. -This may cause issues if the parent directory contains a large number of other files, -as each of these files will be watched too, even though the events related -to them are not relevant. - -Files may be provided in YAML or JSON format. Only -changes resulting in well-formed target groups are applied. - -Files must contain a list of static configs, using these formats: - -**JSON** - -```json -[ - { - "targets": [ "", ... ], - "labels": { - "": "", ... - } - }, - ... -] -``` - -**YAML** - -```yaml -- targets: - [ - '' ] - labels: - [ : ... ] -``` - -As a fallback, the file contents are also re-read periodically at the specified -refresh interval. - -Each target has a meta label `__meta_filepath` during the -[relabeling phase](#relabel_config). Its value is set to the -filepath from which the target was extracted. - -There is a list of -[integrations](https://prometheus.io/docs/operating/integrations/#file-service-discovery) with this -discovery mechanism. - -```yaml -# Patterns for files from which target groups are extracted. -files: - [ - ... ] - -# Refresh interval to re-read the files. -[ refresh_interval: | default = 5m ] -``` - -Where `` may be a path ending in `.json`, `.yml` or `.yaml`. The last path segment -may contain a single `*` that matches any character sequence, e.g. `my/path/tg_*.json`. - -### `` - -[GCE](https://cloud.google.com/compute/) SD configurations allow retrieving scrape targets from GCP GCE instances. -The private IP address is used by default, but may be changed to the public IP -address with relabeling. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_gce_instance_id`: the numeric id of the instance -* `__meta_gce_instance_name`: the name of the instance -* `__meta_gce_label_`: each GCE label of the instance, with any unsupported characters converted to an underscore -* `__meta_gce_machine_type`: full or partial URL of the machine type of the instance -* `__meta_gce_metadata_`: each metadata item of the instance -* `__meta_gce_network`: the network URL of the instance -* `__meta_gce_private_ip`: the private IP address of the instance -* `__meta_gce_interface_ipv4_`: IPv4 address of each named interface -* `__meta_gce_project`: the GCP project in which the instance is running -* `__meta_gce_public_ip`: the public IP address of the instance, if present -* `__meta_gce_subnetwork`: the subnetwork URL of the instance -* `__meta_gce_tags`: comma separated list of instance tags -* `__meta_gce_zone`: the GCE zone URL in which the instance is running - -See below for the configuration options for GCE discovery: - -```yaml -# The information to access the GCE API. - -# The GCP Project -project: - -# The zone of the scrape targets. If you need multiple zones use multiple -# gce_sd_configs. -zone: - -# Filter can be used optionally to filter the instance list by other criteria -# Syntax of this filter string is described here in the filter query parameter section: -# https://cloud.google.com/compute/docs/reference/latest/instances/list -[ filter: ] - -# Refresh interval to re-read the instance list -[ refresh_interval: | default = 60s ] - -# The port to scrape metrics from. If using the public IP address, this must -# instead be specified in the relabeling rule. -[ port: | default = 80 ] - -# The tag separator is used to separate the tags on concatenation -[ tag_separator: | default = , ] -``` - -Credentials are discovered by the Google Cloud SDK default client by looking -in the following places, preferring the first location found: - -1. a JSON file specified by the `GOOGLE_APPLICATION_CREDENTIALS` environment variable -2. a JSON file in the well-known path `$HOME/.config/gcloud/application_default_credentials.json` -3. fetched from the GCE metadata server - -If Prometheus is running within GCE, the service account associated with the -instance it is running on should have at least read-only permissions to the -compute resources. If running outside of GCE make sure to create an appropriate -service account and place the credential file in one of the expected locations. - -### `` - -Hetzner SD configurations allow retrieving scrape targets from -[Hetzner](https://www.hetzner.com/) [Cloud](https://www.hetzner.cloud/) API and -[Robot](https://docs.hetzner.com/robot/) API. -This service discovery uses the public IPv4 address by default, but that can be -changed with relabeling, as demonstrated in [the Prometheus hetzner-sd -configuration file](/documentation/examples/prometheus-hetzner.yml). - -The following meta labels are available on all targets during [relabeling](#relabel_config): - -* `__meta_hetzner_server_id`: the ID of the server -* `__meta_hetzner_server_name`: the name of the server -* `__meta_hetzner_server_status`: the status of the server -* `__meta_hetzner_public_ipv4`: the public ipv4 address of the server -* `__meta_hetzner_public_ipv6_network`: the public ipv6 network (/64) of the server -* `__meta_hetzner_datacenter`: the datacenter of the server - -The labels below are only available for targets with `role` set to `hcloud`: - -* `__meta_hetzner_hcloud_image_name`: the image name of the server -* `__meta_hetzner_hcloud_image_description`: the description of the server image -* `__meta_hetzner_hcloud_image_os_flavor`: the OS flavor of the server image -* `__meta_hetzner_hcloud_image_os_version`: the OS version of the server image -* `__meta_hetzner_hcloud_datacenter_location`: the location of the server -* `__meta_hetzner_hcloud_datacenter_location_network_zone`: the network zone of the server -* `__meta_hetzner_hcloud_server_type`: the type of the server -* `__meta_hetzner_hcloud_cpu_cores`: the CPU cores count of the server -* `__meta_hetzner_hcloud_cpu_type`: the CPU type of the server (shared or dedicated) -* `__meta_hetzner_hcloud_memory_size_gb`: the amount of memory of the server (in GB) -* `__meta_hetzner_hcloud_disk_size_gb`: the disk size of the server (in GB) -* `__meta_hetzner_hcloud_private_ipv4_`: the private ipv4 address of the server within a given network -* `__meta_hetzner_hcloud_label_`: each label of the server, with any unsupported characters converted to an underscore -* `__meta_hetzner_hcloud_labelpresent_`: `true` for each label of the server, with any unsupported characters converted to an underscore - -The labels below are only available for targets with `role` set to `robot`: - -* `__meta_hetzner_robot_product`: the product of the server -* `__meta_hetzner_robot_cancelled`: the server cancellation status - -```yaml -# The Hetzner role of entities that should be discovered. -# One of robot or hcloud. -role: - -# The port to scrape metrics from. -[ port: | default = 80 ] - -# The time after which the servers are refreshed. -[ refresh_interval: | default = 60s ] - -# Label selector used to filter the servers when fetching them from the API. See https://docs.hetzner.cloud/#label-selector for more details. -# Only used when role is hcloud. -[ label_selector: ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -### `` - -HTTP-based service discovery provides a more generic way to configure static targets -and serves as an interface to plug in custom service discovery mechanisms. - -It fetches targets from an HTTP endpoint containing a list of zero or more -``s. The target must reply with an HTTP 200 response. -The HTTP header `Content-Type` must be `application/json`, and the body must be -valid JSON. - -Example response body: - -```json -[ - { - "targets": [ "", ... ], - "labels": { - "": "", ... - } - }, - ... -] -``` - -The endpoint is queried periodically at the specified refresh interval. -The `prometheus_sd_http_failures_total` counter metric tracks the number of -refresh failures. - -Each target has a meta label `__meta_url` during the -[relabeling phase](#relabel_config). Its value is set to the -URL from which the target was extracted. - -```yaml -# URL from which the targets are fetched. -url: - -# Refresh interval to re-query the endpoint. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -### `` - -IONOS SD configurations allows retrieving scrape targets from -[IONOS Cloud](https://cloud.ionos.com/) API. This service discovery uses the -first NICs IP address by default, but that can be changed with relabeling. The -following meta labels are available on all targets during -[relabeling](#relabel_config): - -* `__meta_ionos_server_availability_zone`: the availability zone of the server -* `__meta_ionos_server_boot_cdrom_id`: the ID of the CD-ROM the server is booted - from -* `__meta_ionos_server_boot_image_id`: the ID of the boot image or snapshot the - server is booted from -* `__meta_ionos_server_boot_volume_id`: the ID of the boot volume -* `__meta_ionos_server_cpu_family`: the CPU family of the server - to -* `__meta_ionos_server_id`: the ID of the server -* `__meta_ionos_server_ip`: comma separated list of all IPs assigned to the - server -* `__meta_ionos_server_lifecycle`: the lifecycle state of the server resource -* `__meta_ionos_server_name`: the name of the server -* `__meta_ionos_server_nic_ip_`: comma separated list of IPs, grouped - by the name of each NIC attached to the server -* `__meta_ionos_server_servers_id`: the ID of the servers the server belongs to -* `__meta_ionos_server_state`: the execution state of the server -* `__meta_ionos_server_type`: the type of the server - -```yaml -# The unique ID of the data center. -datacenter_id: - -# The port to scrape metrics from. -[ port: | default = 80 ] - -# The time after which the servers are refreshed. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -### `` - -Kubernetes SD configurations allow retrieving scrape targets from -[Kubernetes'](https://kubernetes.io/) REST API and always staying synchronized with -the cluster state. - -One of the following `role` types can be configured to discover targets: - -#### `node` - -The `node` role discovers one target per cluster node with the address defaulting -to the Kubelet's HTTP port. -The target address defaults to the first existing address of the Kubernetes -node object in the address type order of `NodeInternalIP`, `NodeExternalIP`, -`NodeLegacyHostIP`, and `NodeHostName`. - -Available meta labels: - -* `__meta_kubernetes_node_name`: The name of the node object. -* `__meta_kubernetes_node_provider_id`: The cloud provider's name for the node object. -* `__meta_kubernetes_node_label_`: Each label from the node object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_node_labelpresent_`: `true` for each label from the node object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_node_annotation_`: Each annotation from the node object. -* `__meta_kubernetes_node_annotationpresent_`: `true` for each annotation from the node object. -* `__meta_kubernetes_node_address_`: The first address for each node address type, if it exists. - -In addition, the `instance` label for the node will be set to the node name -as retrieved from the API server. - -#### `service` - -The `service` role discovers a target for each service port for each service. -This is generally useful for blackbox monitoring of a service. -The address will be set to the Kubernetes DNS name of the service and respective -service port. - -Available meta labels: - -* `__meta_kubernetes_namespace`: The namespace of the service object. -* `__meta_kubernetes_service_annotation_`: Each annotation from the service object. -* `__meta_kubernetes_service_annotationpresent_`: "true" for each annotation of the service object. -* `__meta_kubernetes_service_cluster_ip`: The cluster IP address of the service. (Does not apply to services of type ExternalName) -* `__meta_kubernetes_service_loadbalancer_ip`: The IP address of the loadbalancer. (Applies to services of type LoadBalancer) -* `__meta_kubernetes_service_external_name`: The DNS name of the service. (Applies to services of type ExternalName) -* `__meta_kubernetes_service_label_`: Each label from the service object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_service_labelpresent_`: `true` for each label of the service object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_service_name`: The name of the service object. -* `__meta_kubernetes_service_port_name`: Name of the service port for the target. -* `__meta_kubernetes_service_port_number`: Number of the service port for the target. -* `__meta_kubernetes_service_port_protocol`: Protocol of the service port for the target. -* `__meta_kubernetes_service_type`: The type of the service. - -#### `pod` - -The `pod` role discovers all pods and exposes their containers as targets. For each declared -port of a container, a single target is generated. If a container has no specified ports, -a port-free target per container is created for manually adding a port via relabeling. - -Available meta labels: - -* `__meta_kubernetes_namespace`: The namespace of the pod object. -* `__meta_kubernetes_pod_name`: The name of the pod object. -* `__meta_kubernetes_pod_ip`: The pod IP of the pod object. -* `__meta_kubernetes_pod_label_`: Each label from the pod object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_pod_labelpresent_`: `true` for each label from the pod object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_pod_annotation_`: Each annotation from the pod object. -* `__meta_kubernetes_pod_annotationpresent_`: `true` for each annotation from the pod object. -* `__meta_kubernetes_pod_container_init`: `true` if the container is an [InitContainer](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) -* `__meta_kubernetes_pod_container_name`: Name of the container the target address points to. -* `__meta_kubernetes_pod_container_id`: ID of the container the target address points to. The ID is in the form `://`. -* `__meta_kubernetes_pod_container_image`: The image the container is using. -* `__meta_kubernetes_pod_container_port_name`: Name of the container port. -* `__meta_kubernetes_pod_container_port_number`: Number of the container port. -* `__meta_kubernetes_pod_container_port_protocol`: Protocol of the container port. -* `__meta_kubernetes_pod_ready`: Set to `true` or `false` for the pod's ready state. -* `__meta_kubernetes_pod_phase`: Set to `Pending`, `Running`, `Succeeded`, `Failed` or `Unknown` - in the [lifecycle](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase). -* `__meta_kubernetes_pod_node_name`: The name of the node the pod is scheduled onto. -* `__meta_kubernetes_pod_host_ip`: The current host IP of the pod object. -* `__meta_kubernetes_pod_uid`: The UID of the pod object. -* `__meta_kubernetes_pod_controller_kind`: Object kind of the pod controller. -* `__meta_kubernetes_pod_controller_name`: Name of the pod controller. - -#### `endpoints` - -The `endpoints` role discovers targets from listed endpoints of a service. For each endpoint -address one target is discovered per port. If the endpoint is backed by a pod, all -additional container ports of the pod, not bound to an endpoint port, are discovered as targets as well. - -Note that the Endpoints API is [deprecated in Kubernetes v1.33+](https://kubernetes.io/blog/2025/04/24/endpoints-deprecation/), -it is recommended to use EndpointSlices instead and switch to the `endpointslice` role below. - -Available meta labels: - -* `__meta_kubernetes_namespace`: The namespace of the endpoints object. -* `__meta_kubernetes_endpoints_name`: The names of the endpoints object. -* `__meta_kubernetes_endpoints_label_`: Each label from the endpoints object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_endpoints_labelpresent_`: `true` for each label from the endpoints object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_endpoints_annotation_`: Each annotation from the endpoints object. -* `__meta_kubernetes_endpoints_annotationpresent_`: `true` for each annotation from the endpoints object. -* For all targets discovered directly from the endpoints list (those not additionally inferred - from underlying pods), the following labels are attached: - * `__meta_kubernetes_endpoint_hostname`: Hostname of the endpoint. - * `__meta_kubernetes_endpoint_node_name`: Name of the node hosting the endpoint. - * `__meta_kubernetes_endpoint_ready`: Set to `true` or `false` for the endpoint's ready state. - * `__meta_kubernetes_endpoint_port_name`: Name of the endpoint port. - * `__meta_kubernetes_endpoint_port_protocol`: Protocol of the endpoint port. - * `__meta_kubernetes_endpoint_address_target_kind`: Kind of the endpoint address target. - * `__meta_kubernetes_endpoint_address_target_name`: Name of the endpoint address target. -* If the endpoints belong to a service, all labels of the `role: service` discovery are attached. -* For all targets backed by a pod, all labels of the `role: pod` discovery are attached. - -#### `endpointslice` - -The `endpointslice` role discovers targets from existing endpointslices. For each endpoint -address referenced in the endpointslice object one target is discovered. If the endpoint is backed by a pod, all -additional container ports of the pod, not bound to an endpoint port, are discovered as targets as well. - -The role requires the `discovery.k8s.io/v1` API version (available since Kubernetes v1.21). - -Available meta labels: - -* `__meta_kubernetes_namespace`: The namespace of the endpoints object. -* `__meta_kubernetes_endpointslice_name`: The name of endpointslice object. -* `__meta_kubernetes_endpointslice_label_`: Each label from the endpointslice object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_endpointslice_labelpresent_`: `true` for each label from the endpointslice object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_endpointslice_annotation_`: Each annotation from the endpointslice object. -* `__meta_kubernetes_endpointslice_annotationpresent_`: `true` for each annotation from the endpointslice object. -* For all targets discovered directly from the endpointslice list (those not additionally inferred - from underlying pods), the following labels are attached: - * `__meta_kubernetes_endpointslice_address_target_kind`: Kind of the referenced object. - * `__meta_kubernetes_endpointslice_address_target_name`: Name of referenced object. - * `__meta_kubernetes_endpointslice_address_type`: The ip protocol family of the address of the target. - * `__meta_kubernetes_endpointslice_endpoint_conditions_ready`: Set to `true` or `false` for the referenced endpoint's ready state. - * `__meta_kubernetes_endpointslice_endpoint_conditions_serving`: Set to `true` or `false` for the referenced endpoint's serving state. - * `__meta_kubernetes_endpointslice_endpoint_conditions_terminating`: Set to `true` or `false` for the referenced endpoint's terminating state. - * `__meta_kubernetes_endpointslice_endpoint_topology_kubernetes_io_hostname`: Name of the node hosting the referenced endpoint. - * `__meta_kubernetes_endpointslice_endpoint_topology_present_kubernetes_io_hostname`: Flag that shows if the referenced object has a kubernetes.io/hostname annotation. - * `__meta_kubernetes_endpointslice_endpoint_hostname`: Hostname of the referenced endpoint. - * `__meta_kubernetes_endpointslice_endpoint_node_name`: Name of the Node hosting the referenced endpoint. - * `__meta_kubernetes_endpointslice_endpoint_zone`: Zone the referenced endpoint exists in. - * `__meta_kubernetes_endpointslice_port`: Port of the referenced endpoint. - * `__meta_kubernetes_endpointslice_port_name`: Named port of the referenced endpoint. - * `__meta_kubernetes_endpointslice_port_protocol`: Protocol of the referenced endpoint. -* If the endpoints belong to a service, all labels of the `role: service` discovery are attached. -* For all targets backed by a pod, all labels of the `role: pod` discovery are attached. - -#### `ingress` - -The `ingress` role discovers a target for each path of each ingress. -This is generally useful for blackbox monitoring of an ingress. -The address will be set to the host specified in the ingress spec. - -The role requires the `networking.k8s.io/v1` API version (available since Kubernetes v1.19). - -Available meta labels: - -* `__meta_kubernetes_namespace`: The namespace of the ingress object. -* `__meta_kubernetes_ingress_name`: The name of the ingress object. -* `__meta_kubernetes_ingress_label_`: Each label from the ingress object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_ingress_labelpresent_`: `true` for each label from the ingress object, with any unsupported characters converted to an underscore. -* `__meta_kubernetes_ingress_annotation_`: Each annotation from the ingress object. -* `__meta_kubernetes_ingress_annotationpresent_`: `true` for each annotation from the ingress object. -* `__meta_kubernetes_ingress_class_name`: Class name from ingress spec, if present. -* `__meta_kubernetes_ingress_scheme`: Protocol scheme of ingress, `https` if TLS - config is set. Defaults to `http`. -* `__meta_kubernetes_ingress_path`: Path from ingress spec. Defaults to `/`. - -See below for the configuration options for Kubernetes discovery: - -```yaml -# The information to access the Kubernetes API. - -# The API server addresses. If left empty, Prometheus is assumed to run inside -# of the cluster and will discover API servers automatically and use the pod's -# CA certificate and bearer token file at /var/run/secrets/kubernetes.io/serviceaccount/. -[ api_server: ] - -# The Kubernetes role of entities that should be discovered. -# One of endpoints, endpointslice, service, pod, node, or ingress. -role: - -# Optional path to a kubeconfig file. -# Note that api_server and kube_config are mutually exclusive. -[ kubeconfig_file: ] - -# Optional namespace discovery. If omitted, all namespaces are used. -namespaces: - own_namespace: - names: - [ - ] - -# Optional label and field selectors to limit the discovery process to a subset of available resources. -# See https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/ -# and https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ to learn more about the possible -# filters that can be used. The endpoints role supports pod, service and endpoints selectors. -# The pod role supports node selectors when configured with `attach_metadata: {node: true}`. -# Other roles only support selectors matching the role itself (e.g. node role can only contain node selectors). - -# Note: When making decision about using field/label selector make sure that this -# is the best approach - it will prevent Prometheus from reusing single list/watch -# for all scrape configs. This might result in a bigger load on the Kubernetes API, -# because per each selector combination there will be additional LIST/WATCH. On the other hand, -# if you just want to monitor small subset of pods in large cluster it's recommended to use selectors. -# Decision, if selectors should be used or not depends on the particular situation. -[ selectors: - [ - role: - [ label: ] - [ field: ] ]] - -# Optional metadata to attach to discovered targets. If omitted, no additional metadata is attached. -attach_metadata: -# Attaches node metadata to discovered targets. Valid for roles: pod, endpoints, endpointslice. -# When set to true, Prometheus must have permissions to get Nodes. - [ node: | default = false ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -See [this example Prometheus configuration file](/documentation/examples/prometheus-kubernetes.yml) -for a detailed example of configuring Prometheus for Kubernetes. - -You may wish to check out the 3rd party [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator), -which automates the Prometheus setup on top of Kubernetes. - -### `` - -Kuma SD configurations allow retrieving scrape target from the [Kuma](https://kuma.io) control plane. - -This SD discovers "monitoring assignments" based on Kuma [Dataplane Proxies](https://kuma.io/docs/latest/production/dp-config/dpp/#data-plane-proxy), -via the MADS v1 (Monitoring Assignment Discovery Service) xDS API, and will create a target for each proxy -inside a Prometheus-enabled mesh. - -The following meta labels are available for each target: - -* `__meta_kuma_mesh`: the name of the proxy's Mesh -* `__meta_kuma_dataplane`: the name of the proxy -* `__meta_kuma_service`: the name of the proxy's associated Service -* `__meta_kuma_label_`: each tag of the proxy - -See below for the configuration options for Kuma MonitoringAssignment discovery: - -```yaml -# Address of the Kuma Control Plane's MADS xDS server. -server: - -# Client id is used by Kuma Control Plane to compute Monitoring Assignment for specific Prometheus backend. -# This is useful when migrating between multiple Prometheus backends, or having separate backend for each Mesh. -# When not specified, system hostname/fqdn will be used if available, if not `prometheus` will be used. -[ client_id: ] - -# The time to wait between polling update requests. -[ refresh_interval: | default = 30s ] - -# The time after which the monitoring assignments are refreshed. -[ fetch_timeout: | default = 2m ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -The [relabeling phase](#relabel_config) is the preferred and more powerful way -to filter proxies and user-defined tags. - -### `` - -Lightsail SD configurations allow retrieving scrape targets from [AWS Lightsail](https://aws.amazon.com/lightsail/) -instances. The private IP address is used by default, but may be changed to -the public IP address with relabeling. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_lightsail_availability_zone`: the availability zone in which the instance is running -* `__meta_lightsail_blueprint_id`: the Lightsail blueprint ID -* `__meta_lightsail_bundle_id`: the Lightsail bundle ID -* `__meta_lightsail_instance_name`: the name of the Lightsail instance -* `__meta_lightsail_instance_state`: the state of the Lightsail instance -* `__meta_lightsail_instance_support_code`: the support code of the Lightsail instance -* `__meta_lightsail_ipv6_addresses`: comma separated list of IPv6 addresses assigned to the instance's network interfaces, if present -* `__meta_lightsail_private_ip`: the private IP address of the instance -* `__meta_lightsail_public_ip`: the public IP address of the instance, if available -* `__meta_lightsail_region`: the region of the instance -* `__meta_lightsail_tag_`: each tag value of the instance - -See below for the configuration options for Lightsail discovery: - -```yaml -# The information to access the Lightsail API. - -# The AWS region. If blank, the region from the instance metadata is used. -[ region: ] - -# Custom endpoint to be used. -[ endpoint: ] - -# The AWS API keys. If blank, the environment variables `AWS_ACCESS_KEY_ID` -# and `AWS_SECRET_ACCESS_KEY` are used. -[ access_key: ] -[ secret_key: ] -# Named AWS profile used to connect to the API. -[ profile: ] - -# AWS Role ARN, an alternative to using AWS API keys. -[ role_arn: ] - -# Refresh interval to re-read the instance list. -[ refresh_interval: | default = 60s ] - -# The port to scrape metrics from. If using the public IP address, this must -# instead be specified in the relabeling rule. -[ port: | default = 80 ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -### `` - -Linode SD configurations allow retrieving scrape targets from [Linode's](https://www.linode.com/) -Linode APIv4. -This service discovery uses the public IPv4 address by default, by that can be -changed with relabeling, as demonstrated in [the Prometheus linode-sd -configuration file](/documentation/examples/prometheus-linode.yml). - -Linode APIv4 Token must be created with scopes: `linodes:read_only`, `ips:read_only`, and `events:read_only`. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_linode_instance_id`: the id of the linode instance -* `__meta_linode_instance_label`: the label of the linode instance -* `__meta_linode_image`: the slug of the linode instance's image -* `__meta_linode_private_ipv4`: the private IPv4 of the linode instance -* `__meta_linode_public_ipv4`: the public IPv4 of the linode instance -* `__meta_linode_public_ipv6`: the public IPv6 of the linode instance -* `__meta_linode_private_ipv4_rdns`: the reverse DNS for the first private IPv4 of the linode instance -* `__meta_linode_public_ipv4_rdns`: the reverse DNS for the first public IPv4 of the linode instance -* `__meta_linode_public_ipv6_rdns`: the reverse DNS for the first public IPv6 of the linode instance -* `__meta_linode_region`: the region of the linode instance -* `__meta_linode_type`: the type of the linode instance -* `__meta_linode_status`: the status of the linode instance -* `__meta_linode_tags`: a list of tags of the linode instance joined by the tag separator -* `__meta_linode_group`: the display group a linode instance is a member of -* `__meta_linode_gpus`: the number of GPU's of the linode instance -* `__meta_linode_hypervisor`: the virtualization software powering the linode instance -* `__meta_linode_backups`: the backup service status of the linode instance -* `__meta_linode_specs_disk_bytes`: the amount of storage space the linode instance has access to -* `__meta_linode_specs_memory_bytes`: the amount of RAM the linode instance has access to -* `__meta_linode_specs_vcpus`: the number of VCPUS this linode has access to -* `__meta_linode_specs_transfer_bytes`: the amount of network transfer the linode instance is allotted each month -* `__meta_linode_extra_ips`: a list of all extra IPv4 addresses assigned to the linode instance joined by the tag separator -* `__meta_linode_ipv6_ranges`: a list of IPv6 ranges with mask assigned to the linode instance joined by the tag separator - -```yaml - -# Optional region to filter on. -[ region: ] - -# The port to scrape metrics from. -[ port: | default = 80 ] - -# The string by which Linode Instance tags are joined into the tag label. -[ tag_separator: | default = , ] - -# The time after which the linode instances are refreshed. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -### `` - -Marathon SD configurations allow retrieving scrape targets using the -[Marathon](https://mesosphere.github.io/marathon/) REST API. Prometheus -will periodically check the REST endpoint for currently running tasks and -create a target group for every app that has at least one healthy task. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_marathon_app`: the name of the app (with slashes replaced by dashes) -* `__meta_marathon_image`: the name of the Docker image used (if available) -* `__meta_marathon_task`: the ID of the Mesos task -* `__meta_marathon_app_label_`: any Marathon labels attached to the app, with any unsupported characters converted to an underscore -* `__meta_marathon_port_definition_label_`: the port definition labels, with any unsupported characters converted to an underscore -* `__meta_marathon_port_mapping_label_`: the port mapping labels, with any unsupported characters converted to an underscore -* `__meta_marathon_port_index`: the port index number (e.g. `1` for `PORT1`) - -See below for the configuration options for Marathon discovery: - -```yaml -# List of URLs to be used to contact Marathon servers. -# You need to provide at least one server URL. -servers: - - - -# Polling interval -[ refresh_interval: | default = 30s ] - -# Optional authentication information for token-based authentication -# https://docs.mesosphere.com/1.11/security/ent/iam-api/#passing-an-authentication-token -# It is mutually exclusive with `auth_token_file` and other authentication mechanisms. -[ auth_token: ] - -# Optional authentication information for token-based authentication -# https://docs.mesosphere.com/1.11/security/ent/iam-api/#passing-an-authentication-token -# It is mutually exclusive with `auth_token` and other authentication mechanisms. -[ auth_token_file: ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -By default every app listed in Marathon will be scraped by Prometheus. If not all -of your services provide Prometheus metrics, you can use a Marathon label and -Prometheus relabeling to control which instances will actually be scraped. -See [the Prometheus marathon-sd configuration file](/documentation/examples/prometheus-marathon.yml) -for a practical example on how to set up your Marathon app and your Prometheus -configuration. - -By default, all apps will show up as a single job in Prometheus (the one specified -in the configuration file), which can also be changed using relabeling. - -### `` - -Nerve SD configurations allow retrieving scrape targets from [AirBnB's Nerve] -(https://github.com/airbnb/nerve) which are stored in -[Zookeeper](https://zookeeper.apache.org/). - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_nerve_path`: the full path to the endpoint node in Zookeeper -* `__meta_nerve_endpoint_host`: the host of the endpoint -* `__meta_nerve_endpoint_port`: the port of the endpoint -* `__meta_nerve_endpoint_name`: the name of the endpoint - -```yaml -# The Zookeeper servers. -servers: - - -# Paths can point to a single service, or the root of a tree of services. -paths: - - -[ timeout: | default = 10s ] -``` -### `` - -Nomad SD configurations allow retrieving scrape targets from [Nomad's](https://www.nomadproject.io/) -Service API. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_nomad_address`: the service address of the target -* `__meta_nomad_dc`: the datacenter name for the target -* `__meta_nomad_namespace`: the namespace of the target -* `__meta_nomad_node_id`: the node name defined for the target -* `__meta_nomad_service`: the name of the service the target belongs to -* `__meta_nomad_service_address`: the service address of the target -* `__meta_nomad_service_id`: the service ID of the target -* `__meta_nomad_service_port`: the service port of the target -* `__meta_nomad_tags`: the list of tags of the target joined by the tag separator - -```yaml -# The information to access the Nomad API. It is to be defined -# as the Nomad documentation requires. -[ allow_stale: | default = true ] -[ namespace: | default = default ] -[ refresh_interval: | default = 60s ] -[ region: | default = global ] -# The URL to connect to the API. -[ server: ] -[ tag_separator: | default = ,] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -### `` - -Serverset SD configurations allow retrieving scrape targets from [Serversets] -(https://github.com/twitter/finagle/tree/develop/finagle-serversets) which are -stored in [Zookeeper](https://zookeeper.apache.org/). Serversets are commonly -used by [Finagle](https://twitter.github.io/finagle/) and -[Aurora](https://aurora.apache.org/). - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_serverset_path`: the full path to the serverset member node in Zookeeper -* `__meta_serverset_endpoint_host`: the host of the default endpoint -* `__meta_serverset_endpoint_port`: the port of the default endpoint -* `__meta_serverset_endpoint_host_`: the host of the given endpoint -* `__meta_serverset_endpoint_port_`: the port of the given endpoint -* `__meta_serverset_shard`: the shard number of the member -* `__meta_serverset_status`: the status of the member - -```yaml -# The Zookeeper servers. -servers: - - -# Paths can point to a single serverset, or the root of a tree of serversets. -paths: - - -[ timeout: | default = 10s ] -``` - -Serverset data must be in the JSON format, the Thrift format is not currently supported. - -### `` - -[STACKIT](https://www.stackit.de/de/) SD configurations allow retrieving -scrape targets from various APIs. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_stackit_availability_zone`: The availability zone of the server. -* `__meta_stackit_label_`: Each server label, with unsupported characters replaced by underscores. -* `__meta_stackit_labelpresent_`: "true" for each label of the server, with unsupported characters replaced by underscores. -* `__meta_stackit_private_ipv4_`: the private ipv4 address of the server within a given network -* `__meta_stackit_public_ipv4`: the public ipv4 address of the server -* `__meta_stackit_id`: The ID of the target. -* `__meta_stackit_type`: The type or brand of the target. -* `__meta_stackit_name`: The server name. -* `__meta_stackit_status`: The current status of the server. -* `__meta_stackit_power_status`: The power status of the server. - -See below for the configuration options for STACKIT discovery: - -```yaml -# The STACKIT project -project: - -# STACKIT region to use. No automatic discovery of the region is done. -[ region : | default = "eu01" ] - -# Custom API endpoint to be used. Format scheme://host:port -[ endpoint : ] - -# The port to scrape metrics from. -[ port: | default = 80 ] - -# Raw private key string used for authenticating a service account -[ private_key: ] - -# Path to a file containing the raw private key string -[ private_key_path: ] - -# Full JSON-formatted service account key used for authentication -[ service_account_key: ] - -# Path to a file containing the JSON-formatted service account key -[ service_account_key_path: ] - -# Path to a file containing STACKIT credentials. -[ credentials_file_path: ] - -# The time after which the servers are refreshed. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -A Service Account Token can be set through `http_config`. - -```yaml -stackit_sd_config: -- authorization: - credentials: -``` - -### `` - -[Triton](https://github.com/joyent/triton) SD configurations allow retrieving -scrape targets from [Container Monitor](https://github.com/joyent/rfd/blob/master/rfd/0027/README.md) -discovery endpoints. - -One of the following `` types can be configured to discover targets: - -#### `container` - -The `container` role discovers one target per "virtual machine" owned by the `account`. -These are SmartOS zones or lx/KVM/bhyve branded zones. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_triton_groups`: the list of groups belonging to the target joined by a comma separator -* `__meta_triton_machine_alias`: the alias of the target container -* `__meta_triton_machine_brand`: the brand of the target container -* `__meta_triton_machine_id`: the UUID of the target container -* `__meta_triton_machine_image`: the target container's image type -* `__meta_triton_server_id`: the server UUID the target container is running on - -#### `cn` - -The `cn` role discovers one target for per compute node (also known as "server" or "global zone") making up the Triton infrastructure. -The `account` must be a Triton operator and is currently required to own at least one `container`. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_triton_machine_alias`: the hostname of the target (requires triton-cmon 1.7.0 or newer) -* `__meta_triton_machine_id`: the UUID of the target - -See below for the configuration options for Triton discovery: - -```yaml -# The information to access the Triton discovery API. - -# The account to use for discovering new targets. -account: - -# The type of targets to discover, can be set to: -# * "container" to discover virtual machines (SmartOS zones, lx/KVM/bhyve branded zones) running on Triton -# * "cn" to discover compute nodes (servers/global zones) making up the Triton infrastructure -[ role : | default = "container" ] - -# The DNS suffix which should be applied to target. -dns_suffix: - -# The Triton discovery endpoint (e.g. 'cmon.us-east-3b.triton.zone'). This is -# often the same value as dns_suffix. -endpoint: - -# A list of groups for which targets are retrieved, only supported when `role` == `container`. -# If omitted all containers owned by the requesting account are scraped. -groups: - [ - ... ] - -# The port to use for discovery and metric scraping. -[ port: | default = 9163 ] - -# The interval which should be used for refreshing targets. -[ refresh_interval: | default = 60s ] - -# The Triton discovery API version. -[ version: | default = 1 ] - -# TLS configuration. -tls_config: - [ ] -``` - -### `` - -Eureka SD configurations allow retrieving scrape targets using the -[Eureka](https://github.com/Netflix/eureka) REST API. Prometheus -will periodically check the REST endpoint and -create a target for every app instance. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_eureka_app_name`: the name of the app -* `__meta_eureka_app_instance_id`: the ID of the app instance -* `__meta_eureka_app_instance_hostname`: the hostname of the instance -* `__meta_eureka_app_instance_homepage_url`: the homepage url of the app instance -* `__meta_eureka_app_instance_statuspage_url`: the status page url of the app instance -* `__meta_eureka_app_instance_healthcheck_url`: the health check url of the app instance -* `__meta_eureka_app_instance_ip_addr`: the IP address of the app instance -* `__meta_eureka_app_instance_vip_address`: the VIP address of the app instance -* `__meta_eureka_app_instance_secure_vip_address`: the secure VIP address of the app instance -* `__meta_eureka_app_instance_status`: the status of the app instance -* `__meta_eureka_app_instance_port`: the port of the app instance -* `__meta_eureka_app_instance_port_enabled`: the port enabled of the app instance -* `__meta_eureka_app_instance_secure_port`: the secure port address of the app instance -* `__meta_eureka_app_instance_secure_port_enabled`: the secure port of the app instance -* `__meta_eureka_app_instance_country_id`: the country ID of the app instance -* `__meta_eureka_app_instance_metadata_`: app instance metadata -* `__meta_eureka_app_instance_datacenterinfo_name`: the datacenter name of the app instance -* `__meta_eureka_app_instance_datacenterinfo_`: the datacenter metadata - -See below for the configuration options for Eureka discovery: - -```yaml -# The URL to connect to the Eureka server. -server: - -# Refresh interval to re-read the app instance list. -[ refresh_interval: | default = 30s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -See [the Prometheus eureka-sd configuration file](/documentation/examples/prometheus-eureka.yml) -for a practical example on how to set up your Eureka app and your Prometheus -configuration. - -### `` - -Scaleway SD configurations allow retrieving scrape targets from [Scaleway instances](https://www.scaleway.com/en/virtual-instances/) and [baremetal services](https://www.scaleway.com/en/bare-metal-servers/). - -The following meta labels are available on targets during [relabeling](#relabel_config): - -#### Instance role - - -* `__meta_scaleway_instance_boot_type`: the boot type of the server -* `__meta_scaleway_instance_hostname`: the hostname of the server -* `__meta_scaleway_instance_id`: the ID of the server -* `__meta_scaleway_instance_image_arch`: the arch of the server image -* `__meta_scaleway_instance_image_id`: the ID of the server image -* `__meta_scaleway_instance_image_name`: the name of the server image -* `__meta_scaleway_instance_location_cluster_id`: the cluster ID of the server location -* `__meta_scaleway_instance_location_hypervisor_id`: the hypervisor ID of the server location -* `__meta_scaleway_instance_location_node_id`: the node ID of the server location -* `__meta_scaleway_instance_name`: name of the server -* `__meta_scaleway_instance_organization_id`: the organization of the server -* `__meta_scaleway_instance_private_ipv4`: the private IPv4 address of the server -* `__meta_scaleway_instance_project_id`: project id of the server -* `__meta_scaleway_instance_public_ipv4`: the public IPv4 address of the server -* `__meta_scaleway_instance_public_ipv6`: the public IPv6 address of the server -* `__meta_scaleway_instance_public_ipv4_addresses`: the public IPv4 addresses of the server -* `__meta_scaleway_instance_public_ipv6_addresses`: the public IPv6 addresses of the server -* `__meta_scaleway_instance_region`: the region of the server -* `__meta_scaleway_instance_security_group_id`: the ID of the security group of the server -* `__meta_scaleway_instance_security_group_name`: the name of the security group of the server -* `__meta_scaleway_instance_status`: status of the server -* `__meta_scaleway_instance_tags`: the list of tags of the server joined by the tag separator -* `__meta_scaleway_instance_type`: commercial type of the server -* `__meta_scaleway_instance_zone`: the zone of the server (ex: `fr-par-1`, complete list [here](https://developers.scaleway.com/en/products/instance/api/#introduction)) - -This role uses the first address it finds in the following order: private IPv4, public IPv4, public IPv6. This can be -changed with relabeling, as demonstrated in [the Prometheus scaleway-sd -configuration file](/documentation/examples/prometheus-scaleway.yml). -Should an instance have no address before relabeling, it will not be added to the target list and you will not be able to relabel it. - -#### Baremetal role - -* `__meta_scaleway_baremetal_id`: the ID of the server -* `__meta_scaleway_baremetal_public_ipv4`: the public IPv4 address of the server -* `__meta_scaleway_baremetal_public_ipv6`: the public IPv6 address of the server -* `__meta_scaleway_baremetal_name`: the name of the server -* `__meta_scaleway_baremetal_os_name`: the name of the operating system of the server -* `__meta_scaleway_baremetal_os_version`: the version of the operating system of the server -* `__meta_scaleway_baremetal_project_id`: the project ID of the server -* `__meta_scaleway_baremetal_status`: the status of the server -* `__meta_scaleway_baremetal_tags`: the list of tags of the server joined by the tag separator -* `__meta_scaleway_baremetal_type`: the commercial type of the server -* `__meta_scaleway_baremetal_zone`: the zone of the server (ex: `fr-par-1`, complete list [here](https://developers.scaleway.com/en/products/instance/api/#introduction)) - -This role uses the public IPv4 address by default. This can be -changed with relabeling, as demonstrated in [the Prometheus scaleway-sd -configuration file](/documentation/examples/prometheus-scaleway.yml). - -See below for the configuration options for Scaleway discovery: - -```yaml -# Access key to use. https://console.scaleway.com/project/credentials -access_key: - -# Secret key to use when listing targets. https://console.scaleway.com/project/credentials -# It is mutually exclusive with `secret_key_file`. -[ secret_key: ] - -# Sets the secret key with the credentials read from the configured file. -# It is mutually exclusive with `secret_key`. -[ secret_key_file: ] - -# Project ID of the targets. -project_id: - -# Role of the targets to retrieve. Must be `instance` or `baremetal`. -role: - -# The port to scrape metrics from. -[ port: | default = 80 ] - -# API URL to use when doing the server listing requests. -[ api_url: | default = "https://api.scaleway.com" ] - -# Zone is the availability zone of your targets (e.g. fr-par-1). -[ zone: | default = fr-par-1 ] - -# NameFilter specify a name filter (works as a LIKE) to apply on the server listing request. -[ name_filter: ] - -# TagsFilter specify a tag filter (a server needs to have all defined tags to be listed) to apply on the server listing request. -tags_filter: -[ - ] - -# Refresh interval to re-read the targets list. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -### `` - -Uyuni SD configurations allow retrieving scrape targets from managed systems -via [Uyuni](https://www.uyuni-project.org/) API. - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_uyuni_endpoint_name`: the name of the application endpoint -* `__meta_uyuni_exporter`: the exporter exposing metrics for the target -* `__meta_uyuni_groups`: the system groups of the target -* `__meta_uyuni_metrics_path`: metrics path for the target -* `__meta_uyuni_minion_hostname`: hostname of the Uyuni client -* `__meta_uyuni_primary_fqdn`: primary FQDN of the Uyuni client -* `__meta_uyuni_proxy_module`: the module name if _Exporter Exporter_ proxy is - configured for the target -* `__meta_uyuni_scheme`: the protocol scheme used for requests -* `__meta_uyuni_system_id`: the system ID of the client - -See below for the configuration options for Uyuni discovery: - -```yaml -# The URL to connect to the Uyuni server. -server: - -# Credentials are used to authenticate the requests to Uyuni API. -username: -password: - -# The entitlement string to filter eligible systems. -[ entitlement: | default = monitoring_entitled ] - -# The string by which Uyuni group names are joined into the groups label. -[ separator: | default = , ] - -# Refresh interval to re-read the managed targets list. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -See [the Prometheus uyuni-sd configuration file](/documentation/examples/prometheus-uyuni.yml) -for a practical example on how to set up Uyuni Prometheus configuration. - -### `` - -Vultr SD configurations allow retrieving scrape targets from [Vultr](https://www.vultr.com/). - -This service discovery uses the main IPv4 address by default, which that be -changed with relabeling, as demonstrated in [the Prometheus vultr-sd -configuration file](/documentation/examples/prometheus-vultr.yml). - -The following meta labels are available on targets during [relabeling](#relabel_config): - -* `__meta_vultr_instance_id` : A unique ID for the vultr Instance. -* `__meta_vultr_instance_label` : The user-supplied label for this instance. -* `__meta_vultr_instance_os` : The Operating System name. -* `__meta_vultr_instance_os_id` : The Operating System id used by this instance. -* `__meta_vultr_instance_region` : The Region id where the Instance is located. -* `__meta_vultr_instance_plan` : A unique ID for the Plan. -* `__meta_vultr_instance_main_ip` : The main IPv4 address. -* `__meta_vultr_instance_internal_ip` : The private IP address. -* `__meta_vultr_instance_main_ipv6` : The main IPv6 address. -* `__meta_vultr_instance_features` : List of features that are available to the instance. -* `__meta_vultr_instance_tags` : List of tags associated with the instance. -* `__meta_vultr_instance_hostname` : The hostname for this instance. -* `__meta_vultr_instance_server_status` : The server health status. -* `__meta_vultr_instance_vcpu_count` : Number of vCPUs. -* `__meta_vultr_instance_ram_mb` : The amount of RAM in MB. -* `__meta_vultr_instance_disk_gb` : The size of the disk in GB. -* `__meta_vultr_instance_allowed_bandwidth_gb` : Monthly bandwidth quota in GB. - -```yaml -# The port to scrape metrics from. -[ port: | default = 80 ] - -# The time after which the instances are refreshed. -[ refresh_interval: | default = 60s ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - - -### `` - -A `static_config` allows specifying a list of targets and a common label set -for them. It is the canonical way to specify static targets in a scrape -configuration. - -```yaml -# The targets specified by the static config. -targets: - [ - '' ] - -# Labels assigned to all metrics scraped from the targets. -labels: - [ : ... ] -``` - -### `` - -Relabeling is a powerful tool to dynamically rewrite the label set of a target before -it gets scraped. Multiple relabeling steps can be configured per scrape configuration. -They are applied to the label set of each target in order of their appearance -in the configuration file. - -Initially, aside from the configured per-target labels, a target's `job` -label is set to the `job_name` value of the respective scrape configuration. -The `__address__` label is set to the `:` address of the target. -After relabeling, the `instance` label is set to the value of `__address__` by default if -it was not set during relabeling. - -The `__scheme__` and `__metrics_path__` labels -are set to the scheme and metrics path of the target respectively, as specified in `scrape_config`. - -The `__param_` -label is set to the value of the first passed URL parameter called ``, as defined in `scrape_config`. - -The `__scrape_interval__` and `__scrape_timeout__` labels are set to the target's -interval and timeout, as specified in `scrape_config`. - -Additional labels prefixed with `__meta_` may be available during the -relabeling phase. They are set by the service discovery mechanism that provided -the target and vary between mechanisms. - -Labels starting with `__` will be removed from the label set after target -relabeling is completed. - -If a relabeling step needs to store a label value only temporarily (as the -input to a subsequent relabeling step), use the `__tmp` label name prefix. This -prefix is guaranteed to never be used by Prometheus itself. - -```yaml -# The source_labels tells the rule what labels to fetch from the series. Any -# labels which do not exist get a blank value (""). Their content is concatenated -# using the configured separator and matched against the configured regular expression -# for the replace, keep, and drop actions. -[ source_labels: '[' [, ...] ']' ] - -# Separator placed between concatenated source label values. -[ separator: | default = ; ] - -# Label to which the resulting value is written in a replace action. -# It is mandatory for replace actions. Regex capture groups are available. -[ target_label: ] - -# Regular expression against which the extracted value is matched. -[ regex: | default = (.*) ] - -# Modulus to take of the hash of the source label values. -[ modulus: ] - -# Replacement value against which a regex replace is performed if the -# regular expression matches. Regex capture groups are available. -[ replacement: | default = $1 ] - -# Action to perform based on regex matching. -[ action: | default = replace ] -``` - -`` is any valid -[RE2 regular expression](https://github.com/google/re2/wiki/Syntax). It is -required for the `replace`, `keep`, `drop`, `labelmap`,`labeldrop` and `labelkeep` actions. The regex is -anchored on both ends. To un-anchor the regex, use `.*.*`. - -`` determines the relabeling action to take: - -* `replace`: Match `regex` against the concatenated `source_labels`. Then, set - `target_label` to `replacement`, with match group references - (`${1}`, `${2}`, ...) in `replacement` substituted by their value. If `regex` - does not match, no replacement takes place. -* `lowercase`: Maps the concatenated `source_labels` to their lower case. -* `uppercase`: Maps the concatenated `source_labels` to their upper case. -* `keep`: Drop targets for which `regex` does not match the concatenated `source_labels`. -* `drop`: Drop targets for which `regex` matches the concatenated `source_labels`. -* `keepequal`: Drop targets for which the concatenated `source_labels` do not match `target_label`. -* `dropequal`: Drop targets for which the concatenated `source_labels` do match `target_label`. -* `hashmod`: Set `target_label` to the `modulus` of a hash of the concatenated `source_labels`. -* `labelmap`: Match `regex` against all source label names, not just those specified in `source_labels`. Then - copy the values of the matching labels to label names given by `replacement` with match - group references (`${1}`, `${2}`, ...) in `replacement` substituted by their value. -* `labeldrop`: Match `regex` against all label names. Any label that matches will be - removed from the set of labels. -* `labelkeep`: Match `regex` against all label names. Any label that does not match will be - removed from the set of labels. - -Care must be taken with `labeldrop` and `labelkeep` to ensure that metrics are -still uniquely labeled once the labels are removed. - -### `` - -Metric relabeling is applied to samples as the last step before ingestion. It -has the same configuration format and actions as target relabeling. Metric -relabeling does not apply to automatically generated timeseries such as `up`. - -One use for this is to exclude time series that are too expensive to ingest. - -### `` - -Alert relabeling is applied to alerts before they are sent to the Alertmanager. -It has the same configuration format and actions as target relabeling. Alert -relabeling is applied after external labels. - -One use for this is ensuring a HA pair of Prometheus servers with different -external labels send identical alerts. - -### `` - -An `alertmanager_config` section specifies Alertmanager instances the Prometheus -server sends alerts to. It also provides parameters to configure how to -communicate with these Alertmanagers. - -Alertmanagers may be statically configured via the `static_configs` parameter or -dynamically discovered using one of the supported service-discovery mechanisms. - -Additionally, `relabel_configs` allow selecting Alertmanagers from discovered -entities and provide advanced modifications to the used API path, which is exposed -through the `__alerts_path__` label. - -```yaml -# Per-target Alertmanager timeout when pushing alerts. -[ timeout: | default = 10s ] - -# The api version of Alertmanager. -[ api_version: | default = v2 ] - -# Prefix for the HTTP path alerts are pushed to. -[ path_prefix: | default = / ] - -# Configures the protocol scheme used for requests. -[ scheme: | default = http ] - -# Optionally configures AWS's Signature Verification 4 signing process to sign requests. -# Cannot be set at the same time as basic_auth, authorization, oauth2, azuread or google_iam. -# To use the default credentials from the AWS SDK, use `sigv4: {}`. -sigv4: - # The AWS region. If blank, the region from the default credentials chain - # is used. - [ region: ] - - # The AWS API keys. If blank, the environment variables `AWS_ACCESS_KEY_ID` - # and `AWS_SECRET_ACCESS_KEY` are used. - [ access_key: ] - [ secret_key: ] - - # Named AWS profile used to authenticate. - [ profile: ] - - # AWS Role ARN, an alternative to using AWS API keys. - [ role_arn: ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] - -# List of Azure service discovery configurations. -azure_sd_configs: - [ - ... ] - -# List of Consul service discovery configurations. -consul_sd_configs: - [ - ... ] - -# List of DNS service discovery configurations. -dns_sd_configs: - [ - ... ] - -# List of EC2 service discovery configurations. -ec2_sd_configs: - [ - ... ] - -# List of Eureka service discovery configurations. -eureka_sd_configs: - [ - ... ] - -# List of file service discovery configurations. -file_sd_configs: - [ - ... ] - -# List of DigitalOcean service discovery configurations. -digitalocean_sd_configs: - [ - ... ] - -# List of Docker service discovery configurations. -docker_sd_configs: - [ - ... ] - -# List of Docker Swarm service discovery configurations. -dockerswarm_sd_configs: - [ - ... ] - -# List of GCE service discovery configurations. -gce_sd_configs: - [ - ... ] - -# List of Hetzner service discovery configurations. -hetzner_sd_configs: - [ - ... ] - -# List of HTTP service discovery configurations. -http_sd_configs: - [ - ... ] - - # List of IONOS service discovery configurations. -ionos_sd_configs: - [ - ... ] - -# List of Kubernetes service discovery configurations. -kubernetes_sd_configs: - [ - ... ] - -# List of Lightsail service discovery configurations. -lightsail_sd_configs: - [ - ... ] - -# List of Linode service discovery configurations. -linode_sd_configs: - [ - ... ] - -# List of Marathon service discovery configurations. -marathon_sd_configs: - [ - ... ] - -# List of AirBnB's Nerve service discovery configurations. -nerve_sd_configs: - [ - ... ] - -# List of Nomad service discovery configurations. -nomad_sd_configs: - [ - ... ] - -# List of OpenStack service discovery configurations. -openstack_sd_configs: - [ - ... ] - -# List of OVHcloud service discovery configurations. -ovhcloud_sd_configs: - [ - ... ] - -# List of PuppetDB service discovery configurations. -puppetdb_sd_configs: - [ - ... ] - -# List of Scaleway service discovery configurations. -scaleway_sd_configs: - [ - ... ] - -# List of Zookeeper Serverset service discovery configurations. -serverset_sd_configs: - [ - ... ] - -# List of STACKIT service discovery configurations. -stackit_sd_configs: - [ - ... ] - -# List of Triton service discovery configurations. -triton_sd_configs: - [ - ... ] - -# List of Uyuni service discovery configurations. -uyuni_sd_configs: - [ - ... ] - -# List of Vultr service discovery configurations. -vultr_sd_configs: - [ - ... ] - -# List of labeled statically configured Alertmanagers. -static_configs: - [ - ... ] - -# List of Alertmanager relabel configurations. -relabel_configs: - [ - ... ] - -# List of alert relabel configurations. -alert_relabel_configs: - [ - ... ] -``` - -### `` - -`write_relabel_configs` is relabeling applied to samples before sending them -to the remote endpoint. Write relabeling is applied after external labels. This -could be used to limit which samples are sent. - -There is a [small demo](/documentation/examples/remote_storage) of how to use -this functionality. - -```yaml -# The URL of the endpoint to send samples to. -url: - -# protobuf message to use when writing to the remote write endpoint. -# -# * The `prometheus.WriteRequest` represents the message introduced in Remote Write 1.0, which -# will be deprecated eventually. -# * The `io.prometheus.write.v2.Request` was introduced in Remote Write 2.0 and replaces the former, -# by improving efficiency and sending metadata, created timestamp and native histograms by default. -# -# Before changing this value, consult with your remote storage provider (or test) what message it supports. -# Read more on https://prometheus.io/docs/specs/remote_write_spec_2_0/#io-prometheus-write-v2-request -[ protobuf_message: | default = prometheus.WriteRequest ] - -# Timeout for requests to the remote write endpoint. -[ remote_timeout: | default = 30s ] - -# Custom HTTP headers to be sent along with each remote write request. -# Be aware that headers that are set by Prometheus itself can't be overwritten. -headers: - [ : ... ] - -# List of remote write relabel configurations. -write_relabel_configs: - [ - ... ] - -# Name of the remote write config, which if specified must be unique among remote write configs. -# The name will be used in metrics and logging in place of a generated value to help users distinguish between -# remote write configs. -[ name: ] - -# Enables sending of exemplars over remote write. Note that exemplar storage itself must be enabled for exemplars to be scraped in the first place. -[ send_exemplars: | default = false ] - -# Enables sending of native histograms, also known as sparse histograms, over remote write. -# For the `io.prometheus.write.v2.Request` message, this option is noop (always true). -[ send_native_histograms: | default = false ] - -# When enabled, remote-write will resolve the URL host name via DNS, choose one of the IP addresses at random, and connect to it. -# When disabled, remote-write relies on Go's standard behavior, which is to try to connect to each address in turn. -# The connection timeout applies to the whole operation, i.e. in the latter case it is spread over all attempt. -# This is an experimental feature, and its behavior might still change, or even get removed. -[ round_robin_dns: | default = false ] - -# Optionally configures AWS's Signature Verification 4 signing process to -# sign requests. Cannot be set at the same time as basic_auth, authorization, oauth2, or azuread. -# To use the default credentials from the AWS SDK, use `sigv4: {}`. -sigv4: - # The AWS region. If blank, the region from the default credentials chain - # is used. - [ region: ] - - # The AWS API keys. If blank, the environment variables `AWS_ACCESS_KEY_ID` - # and `AWS_SECRET_ACCESS_KEY` are used. - [ access_key: ] - [ secret_key: ] - - # Named AWS profile used to authenticate. - [ profile: ] - - # AWS Role ARN, an alternative to using AWS API keys. - [ role_arn: ] - -# Optional AzureAD configuration. -# Cannot be used at the same time as basic_auth, authorization, oauth2, sigv4 or google_iam. -azuread: - # The Azure Cloud. Options are 'AzurePublic', 'AzureChina', or 'AzureGovernment'. - [ cloud: | default = AzurePublic ] - - # Azure Managed Identity. Leave 'client_id' blank to use the default managed identity. - [ managed_identity: - [ client_id: ] ] - - # Azure OAuth. - [ oauth: - [ client_id: ] - [ client_secret: ] - [ tenant_id: ] ] - - # Azure SDK auth. - # See https://learn.microsoft.com/en-us/azure/developer/go/azure-sdk-authentication - [ sdk: - [ tenant_id: ] ] - -# WARNING: Remote write is NOT SUPPORTED by Google Cloud. This configuration is reserved for future use. -# Optional Google Cloud Monitoring configuration. -# Cannot be used at the same time as basic_auth, authorization, oauth2, sigv4 or azuread. -# To use the default credentials from the Google Cloud SDK, use `google_iam: {}`. -google_iam: - # Service account key with monitoring write permissions. - credentials_file: - -# Configures the queue used to write to remote storage. -queue_config: - # Number of samples to buffer per shard before we block reading of more - # samples from the WAL. It is recommended to have enough capacity in each - # shard to buffer several requests to keep throughput up while processing - # occasional slow remote requests. - [ capacity: | default = 10000 ] - # Maximum number of shards, i.e. amount of concurrency. - [ max_shards: | default = 50 ] - # Minimum number of shards, i.e. amount of concurrency. - [ min_shards: | default = 1 ] - # Maximum number of samples per send. - [ max_samples_per_send: | default = 2000] - # Maximum time a sample will wait for a send. The sample might wait less - # if the buffer is full. Further time might pass due to potential retries. - [ batch_send_deadline: | default = 5s ] - # Initial retry delay. Gets doubled for every retry. - [ min_backoff: | default = 30ms ] - # Maximum retry delay. - [ max_backoff: | default = 5s ] - # Retry upon receiving a 429 status code from the remote-write storage. - # This is experimental and might change in the future. - [ retry_on_http_429: | default = false ] - # If set, any sample that is older than sample_age_limit - # will not be sent to the remote storage. The default value is 0s, - # which means that all samples are sent. - [ sample_age_limit: | default = 0s ] - -# Configures the sending of series metadata to remote storage -# if the `prometheus.WriteRequest` message was chosen. When -# `io.prometheus.write.v2.Request` is used, metadata is always sent. -# -# Metadata configuration is subject to change at any point -# or be removed in future releases. -metadata_config: - # Whether metric metadata is sent to remote storage or not. - [ send: | default = true ] - # How frequently metric metadata is sent to remote storage. - [ send_interval: | default = 1m ] - # Maximum number of samples per send. - [ max_samples_per_send: | default = 500] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -# enable_http2 defaults to false for remote-write. -[ ] -``` - -There is a list of -[integrations](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) -with this feature. - -### `` - -```yaml -# The URL of the endpoint to query from. -url: - -# Name of the remote read config, which if specified must be unique among remote read configs. -# The name will be used in metrics and logging in place of a generated value to help users distinguish between -# remote read configs. -[ name: ] - -# An optional list of equality matchers which have to be -# present in a selector to query the remote read endpoint. -required_matchers: - [ : ... ] - -# Timeout for requests to the remote read endpoint. -[ remote_timeout: | default = 1m ] - -# Custom HTTP headers to be sent along with each remote read request. -# Be aware that headers that are set by Prometheus itself can't be overwritten. -headers: - [ : ... ] - -# Whether reads should be made for queries for time ranges that -# the local storage should have complete data for. -[ read_recent: | default = false ] - -# Whether to use the external labels as selectors for the remote read endpoint. -[ filter_external_labels: | default = true ] - -# HTTP client settings, including authentication methods (such as basic auth and -# authorization), proxy configurations, TLS options, custom HTTP headers, etc. -[ ] -``` - -There is a list of -[integrations](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) -with this feature. - -### `` - -`tsdb` lets you configure the runtime-reloadable configuration settings of the TSDB. - -```yaml -# Configures how old an out-of-order/out-of-bounds sample can be w.r.t. the TSDB max time. -# An out-of-order/out-of-bounds sample is ingested into the TSDB as long as the timestamp -# of the sample is >= TSDB.MaxTime-out_of_order_time_window. -# -# When out_of_order_time_window is >0, the errors out-of-order and out-of-bounds are -# combined into a single error called 'too-old'; a sample is either (a) ingestible -# into the TSDB, i.e. it is an in-order sample or an out-of-order/out-of-bounds sample -# that is within the out-of-order window, or (b) too-old, i.e. not in-order -# and before the out-of-order window. -# -# When out_of_order_time_window is greater than 0, it also affects experimental agent. It allows -# the agent's WAL to accept out-of-order samples that fall within the specified time window relative -# to the timestamp of the last appended sample for the same series. -[ out_of_order_time_window: | default = 0s ] -``` - -### `` - -Note that exemplar storage is still considered experimental and must be enabled via `--enable-feature=exemplar-storage`. - -```yaml -# Configures the maximum size of the circular buffer used to store exemplars for all series. Resizable during runtime. -[ max_exemplars: | default = 100000 ] -``` - -### `` - -`tracing_config` configures exporting traces from Prometheus to a tracing backend via the OTLP protocol. Tracing is currently an **experimental** feature and could change in the future. - -```yaml -# Client used to export the traces. Options are 'http' or 'grpc'. -[ client_type: | default = grpc ] - -# Endpoint to send the traces to. Should be provided in format :. -[ endpoint: ] - -# Sets the probability a given trace will be sampled. Must be a float from 0 through 1. -[ sampling_fraction: | default = 0 ] - -# If disabled, the client will use a secure connection. -[ insecure: | default = false ] - -# Key-value pairs to be used as headers associated with gRPC or HTTP requests. -headers: - [ : ... ] - -# Compression key for supported compression types. Supported compression: gzip. -[ compression: ] - -# Maximum time the exporter will wait for each batch export. -[ timeout: | default = 10s ] - -# TLS configuration. -tls_config: - [ ] -``` diff --git a/docs/configuration/https.md b/docs/configuration/https.md deleted file mode 100644 index 9a089ca922..0000000000 --- a/docs/configuration/https.md +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: HTTPS and authentication -sort_rank: 7 ---- - -Prometheus supports basic authentication and TLS. -This is **experimental** and might change in the future. - -To specify which web configuration file to load, use the `--web.config.file` flag. - -The file is written in [YAML format](https://en.wikipedia.org/wiki/YAML), -defined by the scheme described below. -Brackets indicate that a parameter is optional. For non-list parameters the -value is set to the specified default. - -The file is read upon every http request, such as any change in the -configuration and the certificates is picked up immediately. - -Generic placeholders are defined as follows: - -* ``: a boolean that can take the values `true` or `false` -* ``: a valid path in the current working directory -* ``: a regular string that is a secret, such as a password -* ``: a regular string - -A valid example file can be found [here](/documentation/examples/web-config.yml). - -```yaml -tls_server_config: - # Certificate and key files for server to use to authenticate to client. - cert_file: - key_file: - - # Server policy for client authentication. Maps to ClientAuth Policies. - # For more detail on clientAuth options: - # https://golang.org/pkg/crypto/tls/#ClientAuthType - # - # NOTE: If you want to enable client authentication, you need to use - # RequireAndVerifyClientCert. Other values are insecure. - [ client_auth_type: | default = "NoClientCert" ] - - # CA certificate for client certificate authentication to the server. - [ client_ca_file: ] - - # Verify that the client certificate has a Subject Alternate Name (SAN) - # which is an exact match to an entry in this list, else terminate the - # connection. SAN match can be one or multiple of the following: DNS, - # IP, e-mail, or URI address from https://pkg.go.dev/crypto/x509#Certificate. - [ client_allowed_sans: - [ - ] ] - - # Minimum TLS version that is acceptable. - [ min_version: | default = "TLS12" ] - - # Maximum TLS version that is acceptable. - [ max_version: | default = "TLS13" ] - - # List of supported cipher suites for TLS versions up to TLS 1.2. If empty, - # Go default cipher suites are used. Available cipher suites are documented - # in the go documentation: - # https://golang.org/pkg/crypto/tls/#pkg-constants - # - # Note that only the cipher returned by the following function are supported: - # https://pkg.go.dev/crypto/tls#CipherSuites - [ cipher_suites: - [ - ] ] - - # prefer_server_cipher_suites controls whether the server selects the - # client's most preferred ciphersuite, or the server's most preferred - # ciphersuite. If true then the server's preference, as expressed in - # the order of elements in cipher_suites, is used. - [ prefer_server_cipher_suites: | default = true ] - - # Elliptic curves that will be used in an ECDHE handshake, in preference - # order. Available curves are documented in the go documentation: - # https://golang.org/pkg/crypto/tls/#CurveID - [ curve_preferences: - [ - ] ] - -http_server_config: - # Enable HTTP/2 support. Note that HTTP/2 is only supported with TLS. - # This can not be changed on the fly. - [ http2: | default = true ] - # List of headers that can be added to HTTP responses. - [ headers: - # Set the Content-Security-Policy header to HTTP responses. - # Unset if blank. - [ Content-Security-Policy: ] - # Set the X-Frame-Options header to HTTP responses. - # Unset if blank. Accepted values are deny and sameorigin. - # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options - [ X-Frame-Options: ] - # Set the X-Content-Type-Options header to HTTP responses. - # Unset if blank. Accepted value is nosniff. - # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Content-Type-Options - [ X-Content-Type-Options: ] - # Set the X-XSS-Protection header to all responses. - # Unset if blank. - # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection - [ X-XSS-Protection: ] - # Set the Strict-Transport-Security header to HTTP responses. - # Unset if blank. - # Please make sure that you use this with care as this header might force - # browsers to load Prometheus and the other applications hosted on the same - # domain and subdomains over HTTPS. - # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security - [ Strict-Transport-Security: ] ] - -# Usernames and hashed passwords that have full access to the web -# server via basic authentication. If empty, no basic authentication is -# required. Passwords are hashed with bcrypt. -basic_auth_users: - [ : ... ] -``` - diff --git a/docs/configuration/index.md b/docs/configuration/index.md deleted file mode 100644 index 5cfaf2a556..0000000000 --- a/docs/configuration/index.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Configuration -sort_rank: 3 ---- diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md deleted file mode 100644 index 6d668a4da3..0000000000 --- a/docs/configuration/recording_rules.md +++ /dev/null @@ -1,160 +0,0 @@ ---- -title: Defining recording rules -nav_title: Recording rules -sort_rank: 2 ---- - -## Configuring rules - -Prometheus supports two types of rules which may be configured and then -evaluated at regular intervals: recording rules and [alerting -rules](alerting_rules.md). To include rules in Prometheus, create a file -containing the necessary rule statements and have Prometheus load the file via -the `rule_files` field in the [Prometheus configuration](configuration.md). -Rule files use YAML. - -The rule files can be reloaded at runtime by sending `SIGHUP` to the Prometheus -process. The changes are only applied if all rule files are well-formatted. - -## Syntax-checking rules - -To quickly check whether a rule file is syntactically correct without starting -a Prometheus server, you can use Prometheus's `promtool` command-line utility -tool: - -```bash -promtool check rules /path/to/example.rules.yml -``` - -The `promtool` binary is part of the `prometheus` archive offered on the -project's [download page](https://prometheus.io/download/). - -When the file is syntactically valid, the checker prints a textual -representation of the parsed rules to standard output and then exits with -a `0` return status. - -If there are any syntax errors or invalid input arguments, it prints an error -message to standard error and exits with a `1` return status. - -## Recording rules - -Recording rules allow you to precompute frequently needed or computationally -expensive expressions and save their result as a new set of time series. -Querying the precomputed result will then often be much faster than executing -the original expression every time it is needed. This is especially useful for -dashboards, which need to query the same expression repeatedly every time they -refresh. - -Recording and alerting rules exist in a rule group. Rules within a group are -run sequentially at a regular interval, with the same evaluation time. -The names of recording rules must be -[valid metric names](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels). -The names of alerting rules must be -[valid label values](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels). - -The syntax of a rule file is: - -```yaml -groups: - [ - ] -``` - -A simple example rules file would be: - -```yaml -groups: - - name: example - rules: - - record: code:prometheus_http_requests_total:sum - expr: sum by (code) (prometheus_http_requests_total) -``` - -### `` - -```yaml -# The name of the group. Must be unique within a file. -name: - -# How often rules in the group are evaluated. -[ interval: | default = global.evaluation_interval ] - -# Limit the number of alerts an alerting rule and series a recording -# rule can produce. 0 is no limit. -[ limit: | default = 0 ] - -# Offset the rule evaluation timestamp of this particular group by the specified duration into the past. -[ query_offset: | default = global.rule_query_offset ] - -# Labels to add or overwrite before storing the result for its rules. -# Labels defined in will override the key if it has a collision. -labels: - [ : ] - -rules: - [ - ... ] -``` - -### `` - -The syntax for recording rules is: - -```yaml -# The name of the time series to output to. Must be a valid metric name. -record: - -# The PromQL expression to evaluate. Every evaluation cycle this is -# evaluated at the current time, and the result recorded as a new set of -# time series with the metric name as given by 'record'. -expr: - -# Labels to add or overwrite before storing the result. -labels: - [ : ] -``` - -The syntax for alerting rules is: - -```yaml -# The name of the alert. Must be a valid label value. -alert: - -# The PromQL expression to evaluate. Every evaluation cycle this is -# evaluated at the current time, and all resultant time series become -# pending/firing alerts. -expr: - -# Alerts are considered firing once they have been returned for this long. -# Alerts which have not yet fired for long enough are considered pending. -[ for: | default = 0s ] - -# How long an alert will continue firing after the condition that triggered it -# has cleared. -[ keep_firing_for: | default = 0s ] - -# Labels to add or overwrite for each alert. -labels: - [ : ] - -# Annotations to add to each alert. -annotations: - [ : ] -``` - -See also the -[best practices for naming metrics created by recording rules](https://prometheus.io/docs/practices/rules/#recording-rules). - -## Limiting alerts and series - -A limit for alerts produced by alerting rules and series produced recording rules -can be configured per-group. When the limit is exceeded, _all_ series produced -by the rule are discarded, and if it's an alerting rule, _all_ alerts for -the rule, active, pending, or inactive, are cleared as well. The event will be -recorded as an error in the evaluation, and as such no stale markers are -written. - -## Rule query offset -This is useful to ensure the underlying metrics have been received and stored in Prometheus. Metric availability delays are more likely to occur when Prometheus is running as a remote write target due to the nature of distributed systems, but can also occur when there's anomalies with scraping and/or short evaluation intervals. - -## Failed rule evaluations due to slow evaluation - -If a rule group hasn't finished evaluating before its next evaluation is supposed to start (as defined by the `evaluation_interval`), the next evaluation will be skipped. Subsequent evaluations of the rule group will continue to be skipped until the initial evaluation either completes or times out. When this happens, there will be a gap in the metric produced by the recording rule. The `rule_group_iterations_missed_total` metric will be incremented for each missed iteration of the rule group. diff --git a/docs/configuration/template_examples.md b/docs/configuration/template_examples.md deleted file mode 100644 index bd076b256e..0000000000 --- a/docs/configuration/template_examples.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: Template examples -sort_rank: 4 ---- - -Prometheus supports templating in the annotations and labels of alerts, -as well as in served console pages. Templates have the ability to run -queries against the local database, iterate over data, use conditionals, -format data, etc. The Prometheus templating language is based on the [Go -templating](https://golang.org/pkg/text/template/) system. - -## Simple alert field templates - -```yaml -alert: InstanceDown -expr: up == 0 -for: 5m -labels: - severity: page -annotations: - summary: "Instance {{$labels.instance}} down" - description: "{{$labels.instance}} of job {{$labels.job}} has been down for more than 5 minutes." -``` - -Alert field templates will be executed during every rule iteration for each -alert that fires, so keep any queries and templates lightweight. If you have a -need for more complicated templates for alerts, it is recommended to link to a -console instead. - -## Simple iteration - -This displays a list of instances, and whether they are up: - -``` -{{ range query "up" }} - {{ .Labels.instance }} {{ .Value }} -{{ end }} -``` - -The special `.` variable contains the value of the current sample for each loop iteration. - -## Display one value - -``` -{{ with query "some_metric{instance='someinstance'}" }} - {{ . | first | value | humanize }} -{{ end }} -``` - -Go and Go's templating language are both strongly typed, so one must check that -samples were returned to avoid an execution error. For example this could -happen if a scrape or rule evaluation has not run yet, or a host was down. - -The included `prom_query_drilldown` template handles this, allows for -formatting of results, and linking to the [expression browser](https://prometheus.io/docs/visualization/browser/). - -## Using console URL parameters - -``` -{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" .Params.instance | query }} - {{ . | first | value | humanize1024 }}B -{{ end }} -``` - -If accessed as `console.html?instance=hostname`, `.Params.instance` will evaluate to `hostname`. - -## Advanced iteration - -```html - -{{ range printf "node_network_receive_bytes{job='node',instance='%s',device!='lo'}" .Params.instance | query | sortByLabel "device"}} - - - - - - - - - {{ end }} -
{{ .Labels.device }}
Received{{ with printf "rate(node_network_receive_bytes{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device | query }}{{ . | first | value | humanize }}B/s{{end}}
Transmitted{{ with printf "rate(node_network_transmit_bytes{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device | query }}{{ . | first | value | humanize }}B/s{{end}}
-``` - -Here we iterate over all network devices and display the network traffic for each. - -As the `range` action does not specify a variable, `.Params.instance` is not -available inside the loop as `.` is now the loop variable. - -## Defining reusable templates - -Prometheus supports defining templates that can be reused. This is particularly -powerful when combined with -[console library](template_reference.md#console-templates) support, allowing -sharing of templates across consoles. - -``` -{{/* Define the template */}} -{{define "myTemplate"}} - do something -{{end}} - -{{/* Use the template */}} -{{template "myTemplate"}} -``` - -Templates are limited to one argument. The `args` function can be used to wrap multiple arguments. - -``` -{{define "myMultiArgTemplate"}} - First argument: {{.arg0}} - Second argument: {{.arg1}} -{{end}} -{{template "myMultiArgTemplate" (args 1 2)}} -``` diff --git a/docs/configuration/template_reference.md b/docs/configuration/template_reference.md deleted file mode 100644 index 57f2606b13..0000000000 --- a/docs/configuration/template_reference.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Template reference -sort_rank: 5 ---- - -Prometheus supports templating in the annotations and labels of alerts, -as well as in served console pages. Templates have the ability to run -queries against the local database, iterate over data, use conditionals, -format data, etc. The Prometheus templating language is based on the [Go -templating](https://golang.org/pkg/text/template/) system. - -## Data Structures - -The primary data structure for dealing with time series data is the sample, defined as: - -```go -type sample struct { - Labels map[string]string - Value interface{} -} -``` - -The metric name of the sample is encoded in a special `__name__` label in the `Labels` map. - -`[]sample` means a list of samples. - -`interface{}` in Go is similar to a void pointer in C. - -## Functions - -In addition to the [default -functions](https://golang.org/pkg/text/template/#hdr-Functions) provided by Go -templating, Prometheus provides functions for easier processing of query -results in templates. - -If functions are used in a pipeline, the pipeline value is passed as the last argument. - -### Queries - -| Name | Arguments | Returns | Notes | -| ------------- | ------------- | -------- | -------- | -| query | query string | []sample | Queries the database, does not support returning range vectors. | -| first | []sample | sample | Equivalent to `index a 0` | -| label | label, sample | string | Equivalent to `index sample.Labels label` | -| value | sample | interface{} | Equivalent to `sample.Value` | -| sortByLabel | label, []samples | []sample | Sorts the samples by the given label. Is stable. | - -`first`, `label` and `value` are intended to make query results easily usable in pipelines. - -### Numbers - -| Name | Arguments | Returns | Notes | -|---------------------| -----------------| --------| --------- | -| humanize | number or string | string | Converts a number to a more readable format, using [metric prefixes](https://en.wikipedia.org/wiki/Metric_prefix). -| humanize1024 | number or string | string | Like `humanize`, but uses 1024 as the base rather than 1000. | -| humanizeDuration | number or string | string | Converts a duration in seconds to a more readable format. | -| humanizePercentage | number or string | string | Converts a ratio value to a fraction of 100. | -| humanizeTimestamp | number or string | string | Converts a Unix timestamp in seconds to a more readable format. | -| toTime | number or string | *time.Time | Converts a Unix timestamp in seconds to a time.Time. | - -Humanizing functions are intended to produce reasonable output for consumption -by humans, and are not guaranteed to return the same results between Prometheus -versions. - -### Strings - -| Name | Arguments | Returns | Notes | -| ------------- | ------------- | ------- | ----------- | -| title | string | string | [cases.Title](https://pkg.go.dev/golang.org/x/text/cases#Title), capitalises first character of each word.| -| toUpper | string | string | [strings.ToUpper](https://golang.org/pkg/strings/#ToUpper), converts all characters to upper case.| -| toLower | string | string | [strings.ToLower](https://golang.org/pkg/strings/#ToLower), converts all characters to lower case.| -| stripPort | string | string | [net.SplitHostPort](https://pkg.go.dev/net#SplitHostPort), splits string into host and port, then returns only host.| -| match | pattern, text | boolean | [regexp.MatchString](https://golang.org/pkg/regexp/#MatchString) Tests for a unanchored regexp match. | -| reReplaceAll | pattern, replacement, text | string | [Regexp.ReplaceAllString](https://golang.org/pkg/regexp/#Regexp.ReplaceAllString) Regexp substitution, unanchored. | -| graphLink | expr | string | Returns path to graph view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. | -| tableLink | expr | string | Returns path to tabular ("Table") view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. | -| parseDuration | string | float | Parses a duration string such as "1h" into the number of seconds it represents. | -| stripDomain | string | string | Removes the domain part of a FQDN. Leaves port untouched. | - -### Others - -| Name | Arguments | Returns | Notes | -| ------------- | ------------- | ------- | ----------- | -| args | []interface{} | map[string]interface{} | This converts a list of objects to a map with keys arg0, arg1 etc. This is intended to allow multiple arguments to be passed to templates. | -| tmpl | string, []interface{} | nothing | Like the built-in `template`, but allows non-literals as the template name. Note that the result is assumed to be safe, and will not be auto-escaped. Only available in consoles. | -| safeHtml | string | string | Marks string as HTML not requiring auto-escaping. | -| externalURL | _none_ | string | The external URL under which Prometheus is externally reachable. | -| pathPrefix | _none_ | string | The external URL [path](https://pkg.go.dev/net/url#URL) for use in console templates. | - -## Template type differences - -Each of the types of templates provide different information that can be used to -parameterize templates, and have a few other differences. - -### Alert field templates - -`.Value`, `.Labels`, `.ExternalLabels`, and `.ExternalURL` contain the alert value, the alert -labels, the globally configured external labels, and the external URL (configured with `--web.external-url`) respectively. They are -also exposed as the `$value`, `$labels`, `$externalLabels`, and `$externalURL` variables for -convenience. - -### Console templates - -Consoles are exposed on `/consoles/`, and sourced from the directory pointed to -by the `-web.console.templates` flag. - -Console templates are rendered with -[html/template](https://golang.org/pkg/html/template/), which provides -auto-escaping. To bypass the auto-escaping use the `safe*` functions., - -URL parameters are available as a map in `.Params`. To access multiple URL -parameters by the same name, `.RawParams` is a map of the list values for each -parameter. The URL path is available in `.Path`, excluding the `/consoles/` -prefix. The globally configured external labels are available as -`.ExternalLabels`. There are also convenience variables for all four: -`$rawParams`, `$params`, `$path`, and `$externalLabels`. - -Consoles also have access to all the templates defined with `{{define -"templateName"}}...{{end}}` found in `*.lib` files in the directory pointed to -by the `-web.console.libraries` flag. As this is a shared namespace, take care -to avoid clashes with other users. Template names beginning with `prom`, -`_prom`, and `__` are reserved for use by Prometheus, as are the functions -listed above. diff --git a/docs/configuration/unit_testing_rules.md b/docs/configuration/unit_testing_rules.md deleted file mode 100644 index 4e2c62e1cf..0000000000 --- a/docs/configuration/unit_testing_rules.md +++ /dev/null @@ -1,277 +0,0 @@ ---- -title: Unit testing for rules -sort_rank: 6 ---- - -You can use `promtool` to test your rules. - -```shell -# For a single test file. -./promtool test rules test.yml - -# If you have multiple test files, say test1.yml,test2.yml,test2.yml -./promtool test rules test1.yml test2.yml test3.yml -``` - -## Test file format - -```yaml -# This is a list of rule files to consider for testing. Globs are supported. -rule_files: - [ - ] - -[ evaluation_interval: | default = 1m ] - -# Setting fuzzy_compare true will very slightly weaken floating point comparisons. -# This will (effectively) ignore differences in the last bit of the mantissa. -[ fuzzy_compare: | default = false ] - -# The order in which group names are listed below will be the order of evaluation of -# rule groups (at a given evaluation time). The order is guaranteed only for the groups mentioned below. -# All the groups need not be mentioned below. -group_eval_order: - [ - ] - -# All the tests are listed here. -tests: - [ - ] -``` - -### `` - -``` yaml -# Series data -[ interval: | default = evaluation_interval ] -input_series: - [ - ] - -# Name of the test group -[ name: ] - -# Unit tests for the above data. - -# Unit tests for alerting rules. We consider the alerting rules from the input file. -alert_rule_test: - [ - ] - -# Unit tests for PromQL expressions. -promql_expr_test: - [ - ] - -# External labels accessible to the alert template. -external_labels: - [ : ... ] - -# External URL accessible to the alert template. -# Usually set using --web.external-url. - [ external_url: ] -``` - -### `` - -```yaml -# This follows the usual series notation '{