From 39db78d8e5f2a1e8cdb603ad0e53e7825e3e2911 Mon Sep 17 00:00:00 2001 From: Giovanni Guerrieri <49747129+Soap2G@users.noreply.github.com> Date: Thu, 5 Dec 2024 15:22:41 +0100 Subject: [PATCH 01/24] Include base ROOT image with Madgraph installed (#298) --- .../cluster/flux/jhub/jhub-configmap-profiles.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml index 2a243f3c..f4915920 100644 --- a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml +++ b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml @@ -22,6 +22,10 @@ data: description: "ROOT v6.26.10 as well as a ROOT C++ and a python-3.8 kernel." kubespawner_override: image: ghcr.io/vre-hub/vre-singleuser-root:sha-c94d95a + - display_name: "ROOT Higgs 2024 environment" + description: "ROOT v6.26.02." + kubespawner_override: + image: ghcr.io/vre-hub/vre-singleuser-root-base:sha-034c77b - display_name: "VIRGO - WDF environment" description: "Contains the full WDF v2.2.1 environment - Python 3.9 kernel." kubespawner_override: @@ -49,4 +53,4 @@ data: - display_name: "Zenodo extension DEV environment" description: "For testing purposes" kubespawner_override: - image: ghcr.io/vre-hub/vre-singleuser-zen_ext-dev:latest \ No newline at end of file + image: ghcr.io/vre-hub/vre-singleuser-zen_ext-dev:latest From 76448a08b44d55291dd011c0ae16cefab14e1f0f Mon Sep 17 00:00:00 2001 From: Giovanni Guerrieri <49747129+Soap2G@users.noreply.github.com> Date: Thu, 5 Dec 2024 15:56:39 +0100 Subject: [PATCH 02/24] Update version of image --- infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml index f4915920..254feff3 100644 --- a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml +++ b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml @@ -25,7 +25,7 @@ data: - display_name: "ROOT Higgs 2024 environment" description: "ROOT v6.26.02." kubespawner_override: - image: ghcr.io/vre-hub/vre-singleuser-root-base:sha-034c77b + image: ghcr.io/vre-hub/vre-singleuser-root-base:sha-c86ae9c - display_name: "VIRGO - WDF environment" description: "Contains the full WDF v2.2.1 environment - Python 3.9 kernel." kubespawner_override: From 3eb45343a272b6d47641bea9c5eef35de6b5521c Mon Sep 17 00:00:00 2001 From: Giovanni Guerrieri <49747129+Soap2G@users.noreply.github.com> Date: Thu, 5 Dec 2024 19:18:23 +0100 Subject: [PATCH 03/24] Temporary set latest tag I promise I'll fix this --- infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml index 254feff3..b5ac0a82 100644 --- a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml +++ b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml @@ -25,7 +25,7 @@ data: - display_name: "ROOT Higgs 2024 environment" description: "ROOT v6.26.02." kubespawner_override: - image: ghcr.io/vre-hub/vre-singleuser-root-base:sha-c86ae9c + image: ghcr.io/vre-hub/vre-singleuser-root-base:latest - display_name: "VIRGO - WDF environment" description: "Contains the full WDF v2.2.1 environment - Python 3.9 kernel." kubespawner_override: From dea6a29367664f554f0ae45d535175cec3917849 Mon Sep 17 00:00:00 2001 From: Giovanni Guerrieri <49747129+Soap2G@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:11:56 +0100 Subject: [PATCH 04/24] Conditional poststarthook creation (#299) * Conditional poststarthook creation * adding env var check --- .../cluster/flux/jhub/jhub-release.yaml | 39 +++++++++++-------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/infrastructure/cluster/flux/jhub/jhub-release.yaml b/infrastructure/cluster/flux/jhub/jhub-release.yaml index 057df9fb..ca807ec0 100644 --- a/infrastructure/cluster/flux/jhub/jhub-release.yaml +++ b/infrastructure/cluster/flux/jhub/jhub-release.yaml @@ -129,23 +129,28 @@ spec: command: - "sh" - "-c" - - > - mkdir -p /certs /tmp; - echo -n $RUCIO_ACCESS_TOKEN > /tmp/rucio_oauth.token; - echo -n "oauth2:${EOS_ACCESS_TOKEN}:iam-escape.cloud.cnaf.infn.it/userinfo" > /tmp/eos_oauth.token; - chmod 0600 /tmp/eos_oauth.token; - mkdir -p /opt/rucio/etc; - echo "[client]" >> /opt/rucio/etc/rucio.cfg; - echo "rucio_host = https://vre-rucio.cern.ch" >> /opt/rucio/etc/rucio.cfg; - echo "auth_host = https://vre-rucio-auth.cern.ch" >> /opt/rucio/etc/rucio.cfg; - echo "ca_cert = /certs/rucio_ca.pem" >> /opt/rucio/etc/rucio.cfg; - echo "account = $JUPYTERHUB_USER" >> /opt/rucio/etc/rucio.cfg; - echo "auth_type = oidc" >> /opt/rucio/etc/rucio.cfg; - echo "oidc_audience = rucio" >> /opt/rucio/etc/rucio.cfg; - echo "oidc_polling = true" >> /opt/rucio/etc/rucio.cfg; - echo "oidc_issuer = escape" >> /opt/rucio/etc/rucio.cfg; - echo "oidc_scope = openid profile offline_access" >> /opt/rucio/etc/rucio.cfg; - echo "auth_token_file_path = /tmp/rucio_oauth.token" >> /opt/rucio/etc/rucio.cfg; + - | + if [ "${SKIP_POSTSTART_HOOK}" = "true" ]; then + echo "hello world"; + else + mkdir -p /certs /tmp; + echo -n $RUCIO_ACCESS_TOKEN > /tmp/rucio_oauth.token; + echo -n "oauth2:${EOS_ACCESS_TOKEN}:iam-escape.cloud.cnaf.infn.it/userinfo" > /tmp/eos_oauth.token; + chmod 0600 /tmp/eos_oauth.token; + mkdir -p /opt/rucio/etc; + echo "[client]" >> /opt/rucio/etc/rucio.cfg; + echo "rucio_host = https://vre-rucio.cern.ch" >> /opt/rucio/etc/rucio.cfg; + echo "auth_host = https://vre-rucio-auth.cern.ch" >> /opt/rucio/etc/rucio.cfg; + echo "ca_cert = /certs/rucio_ca.pem" >> /opt/rucio/etc/rucio.cfg; + echo "account = $JUPYTERHUB_USER" >> /opt/rucio/etc/rucio.cfg; + echo "auth_type = oidc" >> /opt/rucio/etc/rucio.cfg; + echo "oidc_audience = rucio" >> /opt/rucio/etc/rucio.cfg; + echo "oidc_polling = true" >> /opt/rucio/etc/rucio.cfg; + echo "oidc_issuer = escape" >> /opt/rucio/etc/rucio.cfg; + echo "oidc_scope = openid profile offline_access" >> /opt/rucio/etc/rucio.cfg; + echo "auth_token_file_path = /tmp/rucio_oauth.token" >> /opt/rucio/etc/rucio.cfg; + fi; + networkPolicy: enabled: false storage: From fc7ec02f9a8fca5dcf02f0c4e1061ca28eeeab90 Mon Sep 17 00:00:00 2001 From: Giovanni Guerrieri <49747129+Soap2G@users.noreply.github.com> Date: Fri, 6 Dec 2024 13:43:35 +0100 Subject: [PATCH 05/24] Update description of environment --- infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml index b5ac0a82..1ffce64c 100644 --- a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml +++ b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml @@ -23,7 +23,7 @@ data: kubespawner_override: image: ghcr.io/vre-hub/vre-singleuser-root:sha-c94d95a - display_name: "ROOT Higgs 2024 environment" - description: "ROOT v6.26.02." + description: "ROOT v6.32.04, and a python-3.11 kernel." kubespawner_override: image: ghcr.io/vre-hub/vre-singleuser-root-base:latest - display_name: "VIRGO - WDF environment" From d06358d01eeb7e937b30f017bfb06149f6d5ca65 Mon Sep 17 00:00:00 2001 From: Giovanni Guerrieri <49747129+Soap2G@users.noreply.github.com> Date: Fri, 6 Dec 2024 13:57:47 +0100 Subject: [PATCH 06/24] Changing pull policy to cache images (#300) And speedup deployment --- infrastructure/cluster/flux/jhub/jhub-release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/jhub/jhub-release.yaml b/infrastructure/cluster/flux/jhub/jhub-release.yaml index ca807ec0..0226a954 100644 --- a/infrastructure/cluster/flux/jhub/jhub-release.yaml +++ b/infrastructure/cluster/flux/jhub/jhub-release.yaml @@ -184,7 +184,7 @@ spec: image: name: ghcr.io/vre-hub/vre-singleuser-py311 tag: sha-281055c - pullPolicy: Always + pullPolicy: IfNotPresent # nodeSelector: # jupyter: singleuser From c68b70f6fbb5453a162a556e5214a3a7f06458df Mon Sep 17 00:00:00 2001 From: Giovanni Guerrieri <49747129+Soap2G@users.noreply.github.com> Date: Mon, 9 Dec 2024 10:15:55 +0100 Subject: [PATCH 07/24] Update jhub-release.yaml --- infrastructure/cluster/flux/jhub/jhub-release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/jhub/jhub-release.yaml b/infrastructure/cluster/flux/jhub/jhub-release.yaml index 0226a954..ca807ec0 100644 --- a/infrastructure/cluster/flux/jhub/jhub-release.yaml +++ b/infrastructure/cluster/flux/jhub/jhub-release.yaml @@ -184,7 +184,7 @@ spec: image: name: ghcr.io/vre-hub/vre-singleuser-py311 tag: sha-281055c - pullPolicy: IfNotPresent + pullPolicy: Always # nodeSelector: # jupyter: singleuser From c69721cfe0fa42c3b1c1ca0ca45bf07141521108 Mon Sep 17 00:00:00 2001 From: Giovanni Guerrieri <49747129+Soap2G@users.noreply.github.com> Date: Mon, 9 Dec 2024 10:37:41 +0100 Subject: [PATCH 08/24] Update jhub-release.yaml --- infrastructure/cluster/flux/jhub/jhub-release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/jhub/jhub-release.yaml b/infrastructure/cluster/flux/jhub/jhub-release.yaml index ca807ec0..54d6c2be 100644 --- a/infrastructure/cluster/flux/jhub/jhub-release.yaml +++ b/infrastructure/cluster/flux/jhub/jhub-release.yaml @@ -122,7 +122,7 @@ spec: defaultUrl: "/lab" # The liefcycle hooks are used to create the Rucio configuration file, # and the token file by copying the REFRESH_TOKEN from the environment variable to the token file. - startTimeout: 600 + startTimeout: 1200 lifecycleHooks: postStart: exec: From fc52aacbdaf7b6405bc1a46cb3852cca0a152558 Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:09:57 +0100 Subject: [PATCH 09/24] Update links in README.md (#301) --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 163b2a20..c1ac73d4 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,11 @@ VRE links: - Code: https://github.com/vre-hub/vre/ - User documentation: https://vre-hub.github.io/ - Technical documentation: https://github.com/vre-hub/vre/wiki + - :construction: Ongoing migration: https://vre-hub.github.io/docs/tech-docs/home - VRE file transfer monitoring: https://monit-grafana-open.cern.ch/d/PJ65OqBVz/vre-rucio-events?orgId=16 - Live status of the VRE services: https://vre-hub.github.io/status/ - - VRE Slack channel: [invitation link](https://join.slack.com/t/eosc-escape/shared_invite/zt-1zd76ivit-Z2A2nszN0qfn4VF6Uk6UrQ). + - ESCAPE Mattermost Team: [invitation link](https://mattermost.web.cern.ch/signup_user_complete/?id=zqaa9p5fqfd9bnnc64at4b5aye&md=link&sbr=su). + - :exclamation: Afterwards please join the `VRE Support` channel [![flux check pipeline](https://github.com/vre-hub/vre/actions/workflows/merge-check-paths.yml/badge.svg)](https://github.com/vre-hub/vre/actions/workflows/merge-check-paths.yml) [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) @@ -31,4 +33,4 @@ To cite us, please use the latest publication: ## Contact -Email the CERN VRE team: `escape-cern-ops'at'cern.ch` \ No newline at end of file +Email the CERN VRE team: `escape-cern-ops'at'cern.ch` From 350d0691b9bd49a1fe6bd424be50afc525d84e75 Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Fri, 13 Dec 2024 12:04:15 +0100 Subject: [PATCH 10/24] Add def env (#302) * add python3.11.8 default ima * fix CI unchecked --- .github/workflows/merge-check-paths.yml | 1 + infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/merge-check-paths.yml b/.github/workflows/merge-check-paths.yml index 6c286b36..96c1181b 100644 --- a/.github/workflows/merge-check-paths.yml +++ b/.github/workflows/merge-check-paths.yml @@ -5,6 +5,7 @@ on: pull_request: paths: - 'infrastructure/cluster/flux/**' + - '**.tf' push: branches: - main diff --git a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml index 1ffce64c..35b5c7fa 100644 --- a/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml +++ b/infrastructure/cluster/flux/jhub/jhub-configmap-profiles.yaml @@ -54,3 +54,7 @@ data: description: "For testing purposes" kubespawner_override: image: ghcr.io/vre-hub/vre-singleuser-zen_ext-dev:latest + - display_name: "Python 3.11 environment" + description: "quay.io/jupyter/scipy-notebook:python-3.11 image" + kubespawner_override: + image: quay.io/jupyter/scipy-notebook:python-3.11.8 From c80d2875df6fc2d71b690ff69a1e68fdcaa98c1e Mon Sep 17 00:00:00 2001 From: garciagenrique Date: Mon, 20 Jan 2025 16:43:11 +0100 Subject: [PATCH 11/24] fix rucio-root-client --- .../cluster/flux/rucio/rucio-client.yaml | 49 +++++++++++++++++++ .../cluster/flux/rucio/rucio-cronjobs.yaml | 40 --------------- 2 files changed, 49 insertions(+), 40 deletions(-) create mode 100644 infrastructure/cluster/flux/rucio/rucio-client.yaml diff --git a/infrastructure/cluster/flux/rucio/rucio-client.yaml b/infrastructure/cluster/flux/rucio/rucio-client.yaml new file mode 100644 index 00000000..44cff8f1 --- /dev/null +++ b/infrastructure/cluster/flux/rucio/rucio-client.yaml @@ -0,0 +1,49 @@ +apiVersion: v1 +kind: Pod +metadata: + name: rucio-root-client + namespace: rucio +spec: + containers: + - name: rucio-client + image: rucio/rucio-clients:release-34.6.0 + imagePullPolicy: Always + volumeMounts: + - name: cern-bundle + mountPath: /etc/pki/tls/certs/ + env: + - name: RUCIO_CFG_CLIENT_RUCIO_HOST + value: "https://vre-rucio.cern.ch" + - name: RUCIO_CFG_CLIENT_AUTH_HOST + value: "https://vre-rucio-auth.cern.ch" + - name: RUCIO_CFG_CLIENT_CA_CERT + value: "/etc/pki/tls/certs/CERN-bundle.pem" + - name: RUCIO_CFG_CLIENT_ACCOUNT + value: "root" + - name: RUCIO_CFG_CLIENT_AUTH_TYPE + value: "userpass" + - name: RUCIO_CFG_CLIENT_USERNAME + valueFrom: + secretKeyRef: + name: rucio-root-account + key: root-username + - name: RUCIO_CFG_CLIENT_PASSWORD + valueFrom: + secretKeyRef: + name: rucio-root-account + key: root-password + command: ["sleep","3600"] + resources: + limits: + cpu: 100m + memory: 50Mi + requests: + cpu: 100m + memory: 50Mi + volumes: + - name: cern-bundle + secret: + secretName: cern-bundle + + + diff --git a/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml b/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml index c68417b9..f70341a9 100644 --- a/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml +++ b/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml @@ -91,43 +91,3 @@ spec: # - name: prod-rucio-x509up # secret: # secretName: prod-rucio-x509up -# --- -# apiVersion: v1 -# kind: Pod -# metadata: -# name: rucio-client -# namespace: rucio-vre -# spec: -# containers: -# - name: rucio-client -# image: ghcr.io/vre-hub/vre-rucio-client:v0.1.2-1-0487cc0 -# imagePullPolicy: Always -# env: -# - name: RUCIO_CFG_RUCIO_HOST -# value: "https://vre-rucio.cern.ch" -# - name: RUCIO_CFG_AUTH_HOST -# value: "https://vre-rucio-auth.cern.ch" -# - name: RUCIO_CFG_ACCOUNT -# value: "root" -# - name: RUCIO_CFG_AUTH_TYPE -# value: "userpass" -# - name: RUCIO_CFG_USERNAME -# valueFrom: -# secretKeyRef: -# name: root-account -# key: root-username -# - name: RUCIO_CFG_PASSWORD -# valueFrom: -# secretKeyRef: -# name: root-account -# key: root-password -# command: ["sleep","3600"] -# resources: -# limits: -# cpu: 100m -# memory: 50Mi -# requests: -# cpu: 100m -# memory: 50Mi - - From cfc1beb60b7129f958cd68f1d76096bb498977ed Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Mon, 20 Jan 2025 17:23:58 +0100 Subject: [PATCH 12/24] change rses.txt file with working rses (#304) --- containers/rucio-noise/rses.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/containers/rucio-noise/rses.txt b/containers/rucio-noise/rses.txt index 71db49b0..daf80d99 100644 --- a/containers/rucio-noise/rses.txt +++ b/containers/rucio-noise/rses.txt @@ -1,5 +1,4 @@ -CERN-EOS -CESNET-S3 +CERN-EOSPILOT CNAF-STORM CC-DCACHE PIC-DCACHE From b1c6e7d927967b1da851a961ec0ef6ef8ef752de Mon Sep 17 00:00:00 2001 From: garciagenrique Date: Tue, 21 Jan 2025 18:02:34 +0100 Subject: [PATCH 13/24] upgrade fts servers URL --- infrastructure/cluster/flux/rucio/rucio-daemons.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/rucio/rucio-daemons.yaml b/infrastructure/cluster/flux/rucio/rucio-daemons.yaml index de3ec4c6..aead6d1a 100644 --- a/infrastructure/cluster/flux/rucio/rucio-daemons.yaml +++ b/infrastructure/cluster/flux/rucio/rucio-daemons.yaml @@ -432,7 +432,7 @@ spec: vos: - vo: "escape" voms: "escape" - servers: "https://fts3-devel.cern.ch:8446,https://fts3-pilot.cern.ch:8446" + servers: "https://fts3-pilot.cern.ch:8446" script: "escape" secretMounts: - secretName: fts-cert From 6197aeebba92333b741922cc284f47f3d729cd1a Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Thu, 23 Jan 2025 09:13:56 +0100 Subject: [PATCH 14/24] add rucio-iam-connected-client pod (#303) * add rucio-iam-connected-client pod * fix st***d tf tests * fix rucio-noise cronjobs and add rucio-noise-test-pod * upgrade FTS server of rucio daemons * update rucio-noise-pod-and-rucio-ewpc01 * Update infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml --------- Co-authored-by: Giovanni Guerrieri <49747129+Soap2G@users.noreply.github.com> --- .github/workflows/merge-check-paths.yml | 1 + .../cluster/flux/rucio/rucio-client.yaml | 49 ------- .../cluster/flux/rucio/rucio-cronjobs.yaml | 84 ++++++------ .../cluster/flux/rucio/rucio-gitops-pods.yaml | 121 ++++++++++++++++++ 4 files changed, 166 insertions(+), 89 deletions(-) delete mode 100644 infrastructure/cluster/flux/rucio/rucio-client.yaml create mode 100644 infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml diff --git a/.github/workflows/merge-check-paths.yml b/.github/workflows/merge-check-paths.yml index 96c1181b..11005680 100644 --- a/.github/workflows/merge-check-paths.yml +++ b/.github/workflows/merge-check-paths.yml @@ -10,6 +10,7 @@ on: branches: - main paths: + - '**.tf' - 'infrastructure/cluster/flux/**' jobs: diff --git a/infrastructure/cluster/flux/rucio/rucio-client.yaml b/infrastructure/cluster/flux/rucio/rucio-client.yaml deleted file mode 100644 index 44cff8f1..00000000 --- a/infrastructure/cluster/flux/rucio/rucio-client.yaml +++ /dev/null @@ -1,49 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: rucio-root-client - namespace: rucio -spec: - containers: - - name: rucio-client - image: rucio/rucio-clients:release-34.6.0 - imagePullPolicy: Always - volumeMounts: - - name: cern-bundle - mountPath: /etc/pki/tls/certs/ - env: - - name: RUCIO_CFG_CLIENT_RUCIO_HOST - value: "https://vre-rucio.cern.ch" - - name: RUCIO_CFG_CLIENT_AUTH_HOST - value: "https://vre-rucio-auth.cern.ch" - - name: RUCIO_CFG_CLIENT_CA_CERT - value: "/etc/pki/tls/certs/CERN-bundle.pem" - - name: RUCIO_CFG_CLIENT_ACCOUNT - value: "root" - - name: RUCIO_CFG_CLIENT_AUTH_TYPE - value: "userpass" - - name: RUCIO_CFG_CLIENT_USERNAME - valueFrom: - secretKeyRef: - name: rucio-root-account - key: root-username - - name: RUCIO_CFG_CLIENT_PASSWORD - valueFrom: - secretKeyRef: - name: rucio-root-account - key: root-password - command: ["sleep","3600"] - resources: - limits: - cpu: 100m - memory: 50Mi - requests: - cpu: 100m - memory: 50Mi - volumes: - - name: cern-bundle - secret: - secretName: cern-bundle - - - diff --git a/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml b/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml index f70341a9..c6bd4766 100644 --- a/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml +++ b/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml @@ -1,4 +1,3 @@ ---- apiVersion: batch/v1 kind: CronJob metadata: @@ -15,6 +14,7 @@ spec: restartPolicy: OnFailure containers: - name: iam-sync + # TODO: make new relase after fixing all the cronjobs/pods and change the image image: ghcr.io/vre-hub/vre-iam-rucio-sync:v1.0.0-rc.2-82-aea1b65 volumeMounts: - name: rucio-cfg @@ -52,42 +52,46 @@ spec: - name: daemons-rucio-x509up secret: secretName: daemons-rucio-x509up -# --- -# apiVersion: batch/v1 -# kind: CronJob -# metadata: -# name: rucio-noise -# namespace: rucio-vre -# spec: -# schedule: "*/10 * * * *" -# concurrencyPolicy: Forbid -# successfulJobsHistoryLimit: 1 -# jobTemplate: -# spec: -# template: -# spec: -# restartPolicy: OnFailure -# containers: -# - name: rucio-noise -# image: ghcr.io/vre-hub/vre-rucio-noise:v1.0.0-rc0-52-7e5585c -# volumeMounts: -# - name: rucio-cfg -# mountPath: /opt/rucio/etc/ -# - name: prod-rucio-x509up -# mountPath: /tmp/ -# tty: true -# imagePullPolicy: Always -# command: -# - /bin/sh -# - -c -# - date; ls -l /etc/pki/tls/certs/; ls -l /tmp/; cd /opt/rucio/etc/; pwd; echo Hello from rucio-noise container; -# export RUCIO_CONFIG=/opt/rucio/etc/rucio.cfg; echo Exported config; cat /opt/rucio/etc/rucio.cfg; rucio -vvv whoami; -# cd /home; export FSIZE=10M; FILE_SIZE=${FSIZE} /bin/bash produce_noise.sh; echo "Rucio noise cronjob ${FSIZE} Done!" -# volumes: -# - name: rucio-cfg -# secret: -# secretName: escape-service-account -# defaultMode: 0400 -# - name: prod-rucio-x509up -# secret: -# secretName: prod-rucio-x509up +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: rucio-noise + namespace: rucio +spec: + # Change original schedule to 3 times a day and 1 Mb files + #schedule: "*/10 * * * *" + schedule: "30 08 * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: rucio-noise + # TODO: make new relase after fixing all the cronjobs/pods and change the image + image: ghcr.io/vre-hub/vre-rucio-noise:v1.0.0-rc.2-106-cfc1beb + volumeMounts: + - name: rucio-cfg + mountPath: /opt/rucio/etc/ + # This secrets is the old 'prod-rucio-x509up' + - name: daemons-rucio-x509up + mountPath: /tmp/ + tty: true + imagePullPolicy: Always + command: + - /bin/sh + - -c + - date; ls -l /etc/pki/tls/certs/; ls -l /tmp/; cd /opt/rucio/etc/; pwd; echo Hello from rucio-noise container; + export RUCIO_CONFIG=/opt/rucio/etc/rucio.cfg; echo Exported config; cat /opt/rucio/etc/rucio.cfg; rucio -vvv whoami; + cd /home; export FSIZE=1M; FILE_SIZE=${FSIZE} /bin/bash produce_noise.sh; echo "Rucio noise cronjob ${FSIZE} Done!" + volumes: + - name: rucio-cfg + secret: + secretName: escape-service-account + defaultMode: 0400 + - name: daemons-rucio-x509up + secret: + secretName: daemons-rucio-x509up diff --git a/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml b/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml new file mode 100644 index 00000000..9168fac5 --- /dev/null +++ b/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml @@ -0,0 +1,121 @@ +apiVersion: v1 +kind: Pod +metadata: + name: rucio-root-client + namespace: rucio +spec: + containers: + - name: rucio-client + image: rucio/rucio-clients:release-34.6.0 + imagePullPolicy: Always + volumeMounts: + - name: cern-bundle + mountPath: /etc/pki/tls/certs/ + env: + - name: RUCIO_CFG_CLIENT_RUCIO_HOST + value: "https://vre-rucio.cern.ch" + - name: RUCIO_CFG_CLIENT_AUTH_HOST + value: "https://vre-rucio-auth.cern.ch" + - name: RUCIO_CFG_CLIENT_CA_CERT + value: "/etc/pki/tls/certs/CERN-bundle.pem" + - name: RUCIO_CFG_CLIENT_ACCOUNT + value: "root" + - name: RUCIO_CFG_CLIENT_AUTH_TYPE + value: "userpass" + - name: RUCIO_CFG_CLIENT_USERNAME + valueFrom: + secretKeyRef: + name: rucio-root-account + key: root-username + - name: RUCIO_CFG_CLIENT_PASSWORD + valueFrom: + secretKeyRef: + name: rucio-root-account + key: root-password + command: ["sleep","3600"] + resources: + limits: + cpu: 100m + memory: 50Mi + requests: + cpu: 100m + memory: 50Mi + volumes: + - name: cern-bundle + secret: + secretName: cern-bundle +--- +# This pod deploys the same container as the `iam-sync` cronjob. +# It allows testing RUCIO IAM connection and interacting with the IAM server via python/CLI +apiVersion: v1 +kind: Pod +metadata: + name: rucio-iam-connected-client + namespace: rucio +spec: + containers: + - name: iam-debug + # TODO: make new relase after fixing all the cronjobs/pods and change the image + image: ghcr.io/vre-hub/vre-iam-rucio-sync:v1.0.0-rc.2-82-aea1b65 + imagePullPolicy: Always + env: + - name: IAM_SERVER + value: "https://iam-escape.cloud.cnaf.infn.it/" + - name: IAM_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: rucio-admin-iam-client + key: admin-client_secret + - name: IAM_CLIENT_ID + valueFrom: + secretKeyRef: + name: rucio-admin-iam-client + key: admin-client_id + volumeMounts: + - name: rucio-cfg + mountPath: /opt/rucio/etc/ + - name: daemons-rucio-x509up + mountPath: /tmp/ + command: ["sleep","3600"] + volumes: + - name: rucio-cfg + secret: + secretName: escape-service-account + - name: daemons-rucio-x509up + secret: + secretName: daemons-rucio-x509up +--- +# Same idea as the `rucio-iam-connected-client` pod but for the rucio-noise one. +# Connected to the ESCAPE Service account ewp2c01 +apiVersion: v1 +kind: Pod +metadata: + name: rucio-noise-pod-and-rucio-ewp2c01 + namespace: rucio +spec: + containers: + - name: rucio-noise-test + # TODO: make new relase after fixing all the cronjobs/pods and change the image + image: ghcr.io/vre-hub/vre-rucio-noise:v1.0.0-rc.2-106-cfc1beb + imagePullPolicy: Always + volumeMounts: + - name: rucio-cfg + mountPath: /opt/rucio/etc/ + # This secrets is the old 'prod-rucio-x509up' + - name: daemons-rucio-x509up + mountPath: /tmp/ + command: ["sleep","3600"] + volumes: + - name: rucio-cfg + secret: + secretName: escape-service-account + defaultMode: 0400 + # This secrets is the old 'prod-rucio-x509up' + - name: daemons-rucio-x509up + secret: + secretName: daemons-rucio-x509up +# commands to be run are on the `rucio-noise` cronjob. Here there are a small +# summary NOT KEPT UPDATED !!!!! +# date; ls -l /etc/pki/tls/certs/; ls -l /tmp/; cd /opt/rucio/etc/; pwd; echo Hello from rucio-noise container; +# export RUCIO_CONFIG=/opt/rucio/etc/rucio.cfg; echo Exported config; cat /opt/rucio/etc/rucio.cfg; rucio -vvv whoami; +# cd /home; export FSIZE=10M; FILE_SIZE=${FSIZE} /bin/bash produce_noise.sh; echo "Rucio noise cronjob ${FSIZE} Done!" \ No newline at end of file From 2b91044f7fcbe8066663a1a12a0a3d2e80026835 Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Thu, 23 Jan 2025 09:41:36 +0100 Subject: [PATCH 15/24] Update daemons schema --- infrastructure/cluster/flux/rucio/rucio-daemons.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/rucio/rucio-daemons.yaml b/infrastructure/cluster/flux/rucio/rucio-daemons.yaml index aead6d1a..2390741c 100644 --- a/infrastructure/cluster/flux/rucio/rucio-daemons.yaml +++ b/infrastructure/cluster/flux/rucio/rucio-daemons.yaml @@ -490,7 +490,7 @@ spec: schema: "escape" conveyor: - scheme: "srm,gsiftp,root,http,https" + scheme: "https,http,root,davs" transfertool: "fts3" ftshosts: "https://fts3-pilot.cern.ch:8446" cacert: "/etc/grid-security/ca.pem" From 7d3d2643ef3cf9cb4866e7d7dfbbc64394a6bbf3 Mon Sep 17 00:00:00 2001 From: garciagenrique Date: Wed, 29 Jan 2025 15:15:37 +0100 Subject: [PATCH 16/24] change conveyor usercert to /opt/proxy path --- infrastructure/cluster/flux/rucio/rucio-daemons.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/rucio/rucio-daemons.yaml b/infrastructure/cluster/flux/rucio/rucio-daemons.yaml index 2390741c..961d5cbe 100644 --- a/infrastructure/cluster/flux/rucio/rucio-daemons.yaml +++ b/infrastructure/cluster/flux/rucio/rucio-daemons.yaml @@ -494,7 +494,7 @@ spec: transfertool: "fts3" ftshosts: "https://fts3-pilot.cern.ch:8446" cacert: "/etc/grid-security/ca.pem" - usercert: "/tmp/x509up" + usercert: "/opt/proxy/x509up" allow_user_oidc_tokens: "True" request_oidc_scope: "openid profile offline_access wlcg.groups email fts:submit-transfer" request_oidc_audience: "fts" From c3d15e19f4074b1a2972b0b668d2986124e245a7 Mon Sep 17 00:00:00 2001 From: garciagenrique Date: Wed, 29 Jan 2025 15:17:45 +0100 Subject: [PATCH 17/24] change num of daemons to 1 count for easier debug --- infrastructure/cluster/flux/rucio/rucio-daemons.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/infrastructure/cluster/flux/rucio/rucio-daemons.yaml b/infrastructure/cluster/flux/rucio/rucio-daemons.yaml index 961d5cbe..38d60448 100644 --- a/infrastructure/cluster/flux/rucio/rucio-daemons.yaml +++ b/infrastructure/cluster/flux/rucio/rucio-daemons.yaml @@ -54,15 +54,15 @@ spec: abacusRseCount: 1 automatixCount: 1 cacheConsumerCount: 0 - conveyorTransferSubmitterCount: 2 - conveyorPollerCount: 2 + conveyorTransferSubmitterCount: 1 #2 + conveyorPollerCount: 1 #2 conveyorFinisherCount: 1 conveyorReceiverCount: 0 conveyorStagerCount: 0 conveyorThrottlerCount: 0 conveyorPreparerCount: 0 # for debugging, if it is not there the submitter will do the path computation adn source replica selection, and since the preparer needs a secret but I dont know of which kind, let's try without darkReaperCount: 0 - hermesCount: 2 + hermesCount: 1 #2 hermes2Count: 0 judgeCleanerCount: 1 judgeEvaluatorCount: 1 @@ -70,7 +70,7 @@ spec: judgeRepairerCount: 1 oauthManagerCount: 1 undertakerCount: 1 - reaperCount: 2 + reaperCount: 1 #2 replicaRecovererCount: 0 transmogrifierCount: 1 tracerKronosCount: 0 From 25fcc548dd8b17fef158deb4fb43726e989c11d3 Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Wed, 29 Jan 2025 18:19:51 +0100 Subject: [PATCH 18/24] improve verbosity and loops of rucio noise container (#305) --- containers/rucio-noise/produce_noise.sh | 42 ++++++++++++++----------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/containers/rucio-noise/produce_noise.sh b/containers/rucio-noise/produce_noise.sh index 62ffe072..ec3bd596 100644 --- a/containers/rucio-noise/produce_noise.sh +++ b/containers/rucio-noise/produce_noise.sh @@ -21,32 +21,36 @@ echo '* RUCIO_SCOPE = '"$RUCIO_SCOPE"'' echo '* FILE_LIFETIME = '"$FILE_LIFETIME"'' upload_and_transfer_and_delete () { - for (( i=0; i<$len; i++ )); do - if [ $1 != $i ]; then + for (( i=1; i<=$len; i++ )); do - echo '*** ======================================================================== ***' + echo '*** ======================================================================== ***' + echo '*** '"${rses[$i]}"' ***' - RANDOM_STRING=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1) - echo '*** generated random file identifier: '"$RANDOM_STRING"' ***' - filename=/home/auto_uploaded_${RANDOM_STRING}_source${rses[$1]} - did=auto_uploaded_${RANDOM_STRING}_source${rses[$1]} - - echo '*** generating '"$FILE_SIZE"' file on local storage ***' - head -c $FILE_SIZE < /dev/urandom > $filename - echo '*** filename: '"$filename"'' + RANDOM_STRING=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1) + echo '*** generated random file identifier: '"$RANDOM_STRING"' ***' + filename=/home/auto_uploaded_${RANDOM_STRING}_source${rses[$1]} + did=auto_uploaded_${RANDOM_STRING}_source${rses[$i]} + + echo '*** generating '"$FILE_SIZE"' file on local storage ***' + head -c $FILE_SIZE < /dev/urandom > $filename + echo '*** filename: '"$filename"' ***' - echo '*** uploading to rse '"${rses[$1]}"' and adding rule to rse '"${rses[$i]}"'' - rucio -v upload --rse ${rses[$1]} --lifetime $FILE_LIFETIME --scope $RUCIO_SCOPE $filename && rucio add-rule --lifetime $FILE_LIFETIME --activity "Functional Test" $RUCIO_SCOPE:$did 1 ${rses[$i]} + echo '*** uploading filename: '"$filename"' to '"${rses[$i]}"' ***' + rucio -v upload --rse ${rses[$1]} --lifetime $FILE_LIFETIME --scope $RUCIO_SCOPE $filename - #echo 'sleeping' sleep 3600 + for (( j=1; j<=$len; j++ )); do - echo '*** removing all replicas and dids associated to from rse '"${rses[$1]}"' and adding rule to rse '"${rses[$i]}"'' - echo '*** testing if `rucio erase` is able to remove all the replicas too ***' - rucio -v erase $RUCIO_SCOPE:$did + if [ $i != $j ]; then + + echo '*** adding rule from '"${rses[$i]}"' to '"${rses[$j]}"' ***' + rucio -v add-rule --lifetime $FILE_LIFETIME --activity "Functional Test" $RUCIO_SCOPE:$did 1 ${rses[$j]} + + done + + echo '*** Uploaded files and replicas should disappear after '${FILE_LIFETIME}' seconds ***' + # echo '*** Otherwise do a `rucio -v erase $RUCIO_SCOPE:$did` ***' - rm -f $filename - fi done } From 2d391b1c5927f8e213c4826221ca5d8f8befb69a Mon Sep 17 00:00:00 2001 From: garciagenrique Date: Wed, 29 Jan 2025 18:25:00 +0100 Subject: [PATCH 19/24] update version rucio-noise-pod-and-rucio-ewp2c01 --- infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml b/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml index 9168fac5..9f01e59a 100644 --- a/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml +++ b/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml @@ -96,7 +96,7 @@ spec: containers: - name: rucio-noise-test # TODO: make new relase after fixing all the cronjobs/pods and change the image - image: ghcr.io/vre-hub/vre-rucio-noise:v1.0.0-rc.2-106-cfc1beb + image: ghcr.io/vre-hub/vre-rucio-noise:v1.0.0-rc.2-112-25fcc54 imagePullPolicy: Always volumeMounts: - name: rucio-cfg From 929f8b91994a2bfeaf31f113dda9c8a211925ae9 Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Wed, 29 Jan 2025 18:58:06 +0100 Subject: [PATCH 20/24] forgot to end if in produce_noise.sh (#306) * forgot to end if in produce_noise.sh * please test your code begore pushing -.- --- containers/rucio-noise/produce_noise.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/containers/rucio-noise/produce_noise.sh b/containers/rucio-noise/produce_noise.sh index ec3bd596..0f416667 100644 --- a/containers/rucio-noise/produce_noise.sh +++ b/containers/rucio-noise/produce_noise.sh @@ -22,30 +22,32 @@ echo '* FILE_LIFETIME = '"$FILE_LIFETIME"'' upload_and_transfer_and_delete () { - for (( i=1; i<=$len; i++ )); do + for (( i=0; i<$len; i++ )); do echo '*** ======================================================================== ***' echo '*** '"${rses[$i]}"' ***' RANDOM_STRING=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1) echo '*** generated random file identifier: '"$RANDOM_STRING"' ***' - filename=/home/auto_uploaded_${RANDOM_STRING}_source${rses[$1]} + filename=/home/auto_uploaded_${RANDOM_STRING}_source${rses[$i]} did=auto_uploaded_${RANDOM_STRING}_source${rses[$i]} echo '*** generating '"$FILE_SIZE"' file on local storage ***' - head -c $FILE_SIZE < /dev/urandom > $filename + #head -c $FILE_SIZE < /dev/urandom > $filename echo '*** filename: '"$filename"' ***' echo '*** uploading filename: '"$filename"' to '"${rses[$i]}"' ***' - rucio -v upload --rse ${rses[$1]} --lifetime $FILE_LIFETIME --scope $RUCIO_SCOPE $filename + rucio -v upload --rse ${rses[$i]} --lifetime $FILE_LIFETIME --scope $RUCIO_SCOPE $filename - for (( j=1; j<=$len; j++ )); do + for (( j=0; j<$len; j++ )); do if [ $i != $j ]; then echo '*** adding rule from '"${rses[$i]}"' to '"${rses[$j]}"' ***' rucio -v add-rule --lifetime $FILE_LIFETIME --activity "Functional Test" $RUCIO_SCOPE:$did 1 ${rses[$j]} + fi + done echo '*** Uploaded files and replicas should disappear after '${FILE_LIFETIME}' seconds ***' From 9144522b692dd01bc4cad267d49a2bea45abba6d Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Thu, 30 Jan 2025 09:33:44 +0100 Subject: [PATCH 21/24] uncomment line (#307) --- containers/rucio-noise/produce_noise.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/rucio-noise/produce_noise.sh b/containers/rucio-noise/produce_noise.sh index 0f416667..73039bcc 100644 --- a/containers/rucio-noise/produce_noise.sh +++ b/containers/rucio-noise/produce_noise.sh @@ -33,7 +33,7 @@ upload_and_transfer_and_delete () { did=auto_uploaded_${RANDOM_STRING}_source${rses[$i]} echo '*** generating '"$FILE_SIZE"' file on local storage ***' - #head -c $FILE_SIZE < /dev/urandom > $filename + head -c $FILE_SIZE < /dev/urandom > $filename echo '*** filename: '"$filename"' ***' echo '*** uploading filename: '"$filename"' to '"${rses[$i]}"' ***' From 65d76340c94088d3f80b207ba56e0b45c6fb4a5f Mon Sep 17 00:00:00 2001 From: garciagenrique Date: Thu, 30 Jan 2025 10:11:24 +0100 Subject: [PATCH 22/24] upgrade rucio noise container version --- infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml | 2 +- infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml b/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml index c6bd4766..6276efba 100644 --- a/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml +++ b/infrastructure/cluster/flux/rucio/rucio-cronjobs.yaml @@ -72,7 +72,7 @@ spec: containers: - name: rucio-noise # TODO: make new relase after fixing all the cronjobs/pods and change the image - image: ghcr.io/vre-hub/vre-rucio-noise:v1.0.0-rc.2-106-cfc1beb + image: ghcr.io/vre-hub/vre-rucio-noise:v1.0.0-rc.2-115-9144522 volumeMounts: - name: rucio-cfg mountPath: /opt/rucio/etc/ diff --git a/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml b/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml index 9f01e59a..2ad0be6b 100644 --- a/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml +++ b/infrastructure/cluster/flux/rucio/rucio-gitops-pods.yaml @@ -96,7 +96,7 @@ spec: containers: - name: rucio-noise-test # TODO: make new relase after fixing all the cronjobs/pods and change the image - image: ghcr.io/vre-hub/vre-rucio-noise:v1.0.0-rc.2-112-25fcc54 + image: ghcr.io/vre-hub/vre-rucio-noise:v1.0.0-rc.2-115-9144522 imagePullPolicy: Always volumeMounts: - name: rucio-cfg From b1e62109fb7de3b6d8e459fe17e42893973ac4e5 Mon Sep 17 00:00:00 2001 From: garciagenrique Date: Thu, 30 Jan 2025 12:34:45 +0100 Subject: [PATCH 23/24] add Readme with the configuration of the eospilot deployment --- infrastructure/cluster/flux/eos/README.md | 37 +++++++++++------ .../cluster/flux/eos/eos-client.yaml | 40 +++++++++---------- .../cluster/flux/eos/eosxd-patches.yaml | 21 +--------- 3 files changed, 46 insertions(+), 52 deletions(-) diff --git a/infrastructure/cluster/flux/eos/README.md b/infrastructure/cluster/flux/eos/README.md index 0ca41dd0..35ed01a8 100644 --- a/infrastructure/cluster/flux/eos/README.md +++ b/infrastructure/cluster/flux/eos/README.md @@ -11,35 +11,46 @@ Any user connected to the VRE would be able to make use of the extension to acce ## EOS EULAKE instance -### Configuration +### ~~`eos/eulake`~~ `eos/pilot/eulake` configuration -## `eos/pilot/eulake` configuration +During the summer of 2024, the eulake instance was transferred into the EOS pilot instance. The snippets below have been edited acording to these changes. -During the summer of 2024, the eulake instance was moved into the EOS pilot instance. The snippets below have been edited acording to this changes. +> [!IMPORTANT] +> The `eospilot` instance is not configured by default on the CERN OpenStack clusters - in general, they are deployed with `cern-magnum` charts that brings EOS deployments to the cluster. Because `eulake` is a subdirectory within `eospilot`, note that the following snippets are set up to point to the `eospilot/eulake` subdirectory instead to `eospilot`. -The `eulake` instance is not configured by default on the CERN OpenStack clusters - The cluster are deployed with EOS deployments, though. To do so, path the `eos-csi-dir-etc-eos` configmap to add the eulake instance into your cluster. Modified the various mount directories as you wish. - -Then, add the `eulake` keytab secret as described below. The keytab sercret string can be find on the CERN-VRE `tbag`. + To add the `eospilot` instance to the EOS deployment, patch the `eos-csi-dir-etc-eos` configmap as shown below. No `ssskeytab` is further needed - as `eulake` used to require - `eospilot` uses the commn eos keytab. ```bash -# charts `eosxd-csi-1.3.1` are deployed with k8s clusters v1.29. -> kubectl -n kube-system patch configmap eos-csi-dir-etc-eos -p '{"data": {"fuse.eulake.conf": "{\"name\": \"eulake\", \"hostport\": \"eospilot.cern.ch\", \"remotemountdir\": \"/eos/pilot/eulake/escape/data/\", \"localmountdir\": \"/eos/eulake/\", \"auth\": {\"ssskeytab\": \"/etc/eos.keytab\"}}"}}' - -> kubectl -n kube-system patch secret eos-csi-file-etc-eos-keytab -p '{"stringData": {"fuse.sss.keytab": ""}}' +# charts `eosxd-csi-1.3.1` are deployed with k8s clusters v1.29.2 and cern-magnum-0.15.2. +> kubectl -n kube-system patch configmap eos-csi-dir-etc-eos -p '{"data": {"fuse.pilot.conf": "{\"name\": \"pilot\", \"hostport\": \"eospilot.cern.ch\", \"remotemountdir\": \"/eos/pilot/eulake/escape/data/\", \"auth\": {\"ssskeytab\": \"/etc/eos.keytab\"}}"}}' +``` +```yaml +# Patch also the following line into the big chunk of the `auto.eos` section below the rest of eos instances +data: + auto.eos: | + (...) + pilot -fstype=eosx,fsname=pilot :eosxd + (...) ``` -Now you can add this volumes on the jupyter hub deployment to access the instance from any pod or jupyter session. On the jupyter hub helm release +Now you can add this volume on the jupyterHub deployment to access the instance from any jupyter/pod session. On the jupyterHub helm Helm charts add: ```yaml extraVolumes: - name: eulake-cern-eos-rse hostPath: # This is pointing to /eos/pilot/eulake/escape/data, as defined on the eos-csi-dir-etc-eos/configmap - path: /var/eos/eulake + path: /var/eos/pilot extraVolumeMounts: - name: eulake-cern-eos-rse # mounts the EOS RSE needed for the Rucio JupiterLab extension - mountPath: /eos/cern-eos-rse + mountPath: /eos/eulake mountPropagation: HostToContainer readOnly: true ``` +> [!IMPORTANT] +> Please note that within this configuration there are two things happening. +> 1. The propagation of a volume into the cluster (mounting a specific subdirectory of `eospilot`). +> 2. The user authentication & authorisation to that subdirectory - which is not detailed here, and needs to be done from the eos server side. +> +> If A&A is not correctly given/propagated, users won't be able to access `/eos/eulake` from their session. diff --git a/infrastructure/cluster/flux/eos/eos-client.yaml b/infrastructure/cluster/flux/eos/eos-client.yaml index c99fb9a5..d1ee921e 100644 --- a/infrastructure/cluster/flux/eos/eos-client.yaml +++ b/infrastructure/cluster/flux/eos/eos-client.yaml @@ -1,20 +1,20 @@ -apiVersion: v1 -kind: Pod -metadata: - name: eos-client - namespace: default -spec: - containers: - - name: my-container - image: gitlab-registry.cern.ch/linuxsupport/alma9-base:latest - imagePullPolicy: IfNotPresent - command: ["sleep", "inf"] - volumeMounts: - - name: eos - mountPath: /eos - mountPropagation: HostToContainer - volumes: - - name: eos - hostPath: - path: /var/eos - type: Directory \ No newline at end of file +# apiVersion: v1 +# kind: Pod +# metadata: +# name: eos-client +# namespace: kube-system +# spec: +# containers: +# - name: my-container +# image: gitlab-registry.cern.ch/linuxsupport/alma9-base:latest +# imagePullPolicy: IfNotPresent +# command: ["sleep", "inf"] +# volumeMounts: +# - name: eos +# mountPath: /eos +# mountPropagation: HostToContainer +# volumes: +# - name: eos +# hostPath: +# path: /var/eos +# type: Directory \ No newline at end of file diff --git a/infrastructure/cluster/flux/eos/eosxd-patches.yaml b/infrastructure/cluster/flux/eos/eosxd-patches.yaml index a0667259..2bd48136 100644 --- a/infrastructure/cluster/flux/eos/eosxd-patches.yaml +++ b/infrastructure/cluster/flux/eos/eosxd-patches.yaml @@ -5,22 +5,5 @@ # namespace: kube-system # spec: # data: -# fuse.eulake.conf: "{ -# \"name\": \"eulake\", -# \"hostport\": \"eospilot.cern.ch\", -# \"remotemountdir\": \"/eos/pilot/eulake/escape/data\", -# \"localmountdir\": \"/eos/eulake\", -# \"auth\": { -# \"ssskeytab\": \"/etc/eos.keytab\" -# } -# }" -# --- -# apiVersion: apps/v1 -# kind: Secret -# metadata: -# name: eos-csi-dir-etc-eos -# namespace: kube-system -# spec: -# stringData: -# fuse.sss.keytab: | -# \ No newline at end of file +# fuse.eulake.conf: | +# '{"name":"eulake_test","hostport":"eospilot.cern.ch","remotemountdir":"/eos/pilot/eulake/escape/data/","localmountdir":"/eos/eulake/","auth":{"ssskeytab":"/etc/fuse.sss.keytab"}}' From a59acb23758676a3f05a746178edabdb346db786 Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Thu, 30 Jan 2025 12:43:23 +0100 Subject: [PATCH 24/24] WIP: add ingress for CERN prometheus configuration (#278) * add ingress for CERN prometheus configuration * update path and merge main * fix conflicts --- .../ingress_prometeus.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 infrastructure/cluster/manual/monit-magnum-ingress/ingress_prometeus.yaml diff --git a/infrastructure/cluster/manual/monit-magnum-ingress/ingress_prometeus.yaml b/infrastructure/cluster/manual/monit-magnum-ingress/ingress_prometeus.yaml new file mode 100644 index 00000000..2e0165c4 --- /dev/null +++ b/infrastructure/cluster/manual/monit-magnum-ingress/ingress_prometeus.yaml @@ -0,0 +1,32 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.class: nginx + nginx.ingress.kubernetes.io/rewrite-target: /$2 + nginx.ingress.kubernetes.io/ssl-redirect: "false" + nginx.ingress.kubernetes.io/configuration-snippet: |- + rewrite ^(/graph)(.*) /prometheus/graph$2 redirect; + name: kube-system-ingress + namespace: kube-system +spec: + rules: + - host: prom-vre.cern.ch + http: + paths: + - backend: + service: + name: cern-magnum-kube-prometheu-prometheus + port: + number: 9090 + path: /prometheus(/|$)(.*) + pathType: Prefix + - backend: + service: + name: cern-magnum-kube-prometheu-alertmanager + port: + number: 9093 + path: /alertmanager(/|$)(.*) + pathType: Prefix +status: + loadBalancer: {}