From 4e20bbcf8c444f8de8164daa41b581fe990bac4d Mon Sep 17 00:00:00 2001 From: Saloni Shah Date: Thu, 20 Mar 2025 15:21:01 -0400 Subject: [PATCH] upgraded galaxy chart; comments; wip installation of galaxy-deps chart --- Dockerfile | 8 +++- http/src/main/resources/reference.conf | 13 +++++- .../workbench/leonardo/config/Config.scala | 14 +++++- .../config/KubernetesGalaxyDepsConfig.scala | 13 ++++++ .../leonardo/http/api/AppRoutes.scala | 1 + .../http/service/LeoAppServiceInterp.scala | 9 +++- .../monitor/LeoPubsubMessageSubscriber.scala | 9 +++- .../workbench/leonardo/util/GKEAlgebra.scala | 2 +- .../leonardo/util/GKEInterpreter.scala | 44 +++++++++++++++++-- .../leonardo/KubernetesTestData.scala | 2 +- .../leonardo/config/ConfigSpec.scala | 3 +- .../dsde/workbench/leonardo/mocks.scala | 2 +- .../leonardo/util/GKEInterpreterSpec.scala | 3 +- 13 files changed, 108 insertions(+), 15 deletions(-) create mode 100644 http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/KubernetesGalaxyDepsConfig.scala diff --git a/Dockerfile b/Dockerfile index 50e98a89156..ecd1bd307aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,8 +27,10 @@ ENV HELM_DEBUG 1 ENV TERRA_APP_SETUP_VERSION 0.1.0 ENV TERRA_APP_VERSION 0.5.0 # This is galaxykubeman, which references Galaxy -ENV GALAXY_VERSION 2.10.0 +ENV GALAXY_VERSION 3.0.0 ENV NGINX_VERSION 4.3.0 +# This is galaxy-deps which is installed for Terra clusters +ENV GALAXY_DEPS_VERSION 1.0.0 # If you update this here, make sure to also update reference.conf: ENV CROMWELL_CHART_VERSION 0.2.523 ENV HAIL_BATCH_CHART_VERSION 0.2.0 @@ -45,13 +47,14 @@ RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master ./get_helm.sh --version v3.15.3 && \ rm get_helm.sh -# Add the repos containing nginx, galaxy, setup apps, custom apps, cromwell and aou charts +# Add the repos containing nginx, galaxy, galaxy-deps, setup apps, custom apps, cromwell and aou charts RUN helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx && \ helm repo add galaxy https://raw.githubusercontent.com/cloudve/helm-charts/anvil/ && \ helm repo add terra-app-setup-charts https://storage.googleapis.com/terra-app-setup-chart && \ helm repo add terra https://terra-app-charts.storage.googleapis.com && \ helm repo add cromwell-helm https://broadinstitute.github.io/cromwhelm/charts/ && \ helm repo add terra-helm https://terra-helm.storage.googleapis.com && \ + helm repo add cloudve https://raw.githubusercontent.com/CloudVE/helm-charts/master/ && \ helm repo update # .Files helm helper can't access files outside a chart. Hence in order to populate cert file properly, we're @@ -68,6 +71,7 @@ RUN cd /leonardo && \ helm pull terra-helm/rstudio --version $RSTUDIO_CHART_VERSION --untar && \ helm pull terra-helm/sas --version $SAS_CHART_VERSION --untar && \ helm pull oci://terradevacrpublic.azurecr.io/hail/hail-batch-terra-azure --version $HAIL_BATCH_CHART_VERSION --untar && \ + helm pull cloudve/galaxy-deps --version $GALAXY_DEPS_VERSION --untar && \ cd / # Install https://github.com/apangin/jattach to get access to JDK tools diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index a03939288e2..1a9ddbb6001 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -767,6 +767,14 @@ gke { "controller.admissionWebhooks.enabled=false" ] } + galaxyDeps { + namespace = "galaxy-deps" + release = "galaxy-deps" + chartName = "/leonardo/galaxy-deps" + # If you change this here, be sure to update it in the dockerfile + chartVersion = "1.0.0", + values = [] + } galaxyApp { # See comment in KubernetesServiceInterp for more context. Theoretically release names should # only need to be unique within a namespace, but something in the Galaxy chart requires them @@ -775,7 +783,7 @@ gke { chartName = "/leonardo/galaxykubeman" # If you change this here, be sure to update it in the dockerfile # This is galaxykubeman, which references Galaxy - chartVersion = "2.10.0" + chartVersion = "3.0.0" namespaceNameSuffix = "gxy-ns" serviceAccountName = "gxy-ksa" # Setting uninstallKeepHistory will cause the `helm uninstall` command to keep a record of @@ -817,7 +825,8 @@ gke { "2.8.0", "2.8.1", "2.9.0", - "2.10.0" + "2.10.0", + "3.0.0" # TODO - Is it right to exclude it here? ] } galaxyDisk { diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala index 76dfe59b6ad..5447cee1956 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala @@ -643,6 +643,16 @@ object Config { ) } + implicit private val galaxyDepsConfigReader: ValueReader[KubernetesGalaxyDepsConfig] = ValueReader.relative { config => + KubernetesGalaxyDepsConfig( + config.as[NamespaceName]("namespace"), + config.as[Release]("release"), + config.as[ChartName]("chartName"), + config.as[ChartVersion]("chartVersion"), + config.as[List[ValueConfig]]("values") + ) + } + implicit private val namespaceNameSuffixReader: ValueReader[NamespaceNameSuffix] = stringValueReader.map(NamespaceNameSuffix) implicit private val releaseNameSuffixReader: ValueReader[ReleaseNameSuffix] = @@ -773,6 +783,7 @@ object Config { val gkeAllowedAppConfig = config.as[AllowedAppConfig]("gke.allowedApp") val gkeNodepoolConfig = NodepoolConfig(gkeDefaultNodepoolConfig, gkeGalaxyNodepoolConfig) val gkeGalaxyDiskConfig = config.as[GalaxyDiskConfig]("gke.galaxyDisk") + val gkeGalaxyDepsConfig = config.as[KubernetesGalaxyDepsConfig]("gke.galaxyDeps") implicit private val leoPubsubMessageSubscriberConfigReader: ValueReader[LeoPubsubMessageSubscriberConfig] = ValueReader.relative { config => @@ -910,6 +921,7 @@ object Config { appMonitorConfig, gkeClusterConfig, proxyConfig, - gkeGalaxyDiskConfig + gkeGalaxyDiskConfig, + gkeGalaxyDepsConfig ) } diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/KubernetesGalaxyDepsConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/KubernetesGalaxyDepsConfig.scala new file mode 100644 index 00000000000..086869631e6 --- /dev/null +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/KubernetesGalaxyDepsConfig.scala @@ -0,0 +1,13 @@ +package org.broadinstitute.dsde.workbench.leonardo.config + +import org.broadinstitute.dsde.workbench.google2.KubernetesSerializableName.NamespaceName +import org.broadinstitute.dsde.workbench.leonardo.Chart +import org.broadinstitute.dsp.{ChartName, ChartVersion, Release} + +final case class KubernetesGalaxyDepsConfig(namespace: NamespaceName, + release: Release, + chartName: ChartName, + chartVersion: ChartVersion, + values: List[ValueConfig]) { + def chart: Chart = Chart(chartName, chartVersion) +} diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/http/api/AppRoutes.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/http/api/AppRoutes.scala index 27795d75b0d..30147d675dd 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/http/api/AppRoutes.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/http/api/AppRoutes.scala @@ -116,6 +116,7 @@ class AppRoutes(kubernetesService: AppService[IO], userInfoDirectives: UserInfoD } } + // Saloni - called for above POST /app route private[api] def createAppHandler(userInfo: UserInfo, googleProject: GoogleProject, appName: AppName, diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/http/service/LeoAppServiceInterp.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/http/service/LeoAppServiceInterp.scala index f74a5cb2e77..328464e4bcc 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/http/service/LeoAppServiceInterp.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/http/service/LeoAppServiceInterp.scala @@ -93,6 +93,7 @@ final class LeoAppServiceInterp[F[_]: Parallel](config: AppServiceConfig, ) _ <- F.raiseWhen(!hasPermission)(ForbiddenError(userEmail)) + // Saloni - note AOU_UI_LABEL label => maps to value "all-of-us" enableIntraNodeVisibility = req.labels.get(AOU_UI_LABEL).exists(x => x == "true") _ <- req.appType match { case AppType.Galaxy | AppType.HailBatch | AppType.Wds | AppType.Cromwell | AppType.WorkflowsApp | @@ -277,6 +278,7 @@ final class LeoAppServiceInterp[F[_]: Parallel](config: AppServiceConfig, ) app <- appQuery.save(saveApp, Some(ctx.traceId)).transaction + // Saloni - this is where the action is determined for cluster creation clusterNodepoolAction = saveClusterResult match { case ClusterExists(_, _) => // If we're using a pre-existing nodepool then don't specify CreateNodepool in the pubsub message @@ -284,8 +286,10 @@ final class LeoAppServiceInterp[F[_]: Parallel](config: AppServiceConfig, else Some(ClusterNodepoolAction.CreateNodepool(nodepool.id)) case ClusterDoesNotExist(c, n) => if (req.autopilot.isDefined) Some(ClusterNodepoolAction.CreateCluster(c.id)) - else + else { + // Saloni - this is where new cluster is created Some(ClusterNodepoolAction.CreateClusterAndNodepool(c.id, n.id, nodepool.id)) + } } createAppMessage = CreateAppMessage( googleProject, @@ -1051,6 +1055,9 @@ final class LeoAppServiceInterp[F[_]: Parallel](config: AppServiceConfig, } } yield () + // Saloni - what does this do? + // maybe it is fetching cluster that was already created before installing new apps + // called in createApp and createAppV2 private[service] def getSavableCluster( userEmail: WorkbenchEmail, cloudContext: CloudContext, diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/LeoPubsubMessageSubscriber.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/LeoPubsubMessageSubscriber.scala index 6181be75c54..c082ef281c1 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/LeoPubsubMessageSubscriber.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/LeoPubsubMessageSubscriber.scala @@ -100,6 +100,7 @@ class LeoPubsubMessageSubscriber[F[_]]( handleDeleteDiskMessage(msg) case msg: UpdateDiskMessage => handleUpdateDiskMessage(msg) + // Saloni - here case msg: CreateAppMessage => handleCreateAppMessage(msg) case msg: DeleteAppMessage => @@ -961,6 +962,7 @@ class LeoPubsubMessageSubscriber[F[_]]( ) } yield () + // Saloni - CreateAppMessage object contains enableIntraNodeVisibility private[monitor] def handleCreateAppMessage(msg: CreateAppMessage)(implicit ev: Ask[F, AppContext] ): F[Unit] = @@ -991,6 +993,7 @@ class LeoPubsubMessageSubscriber[F[_]]( } yield d } + // Saloni - this is where the cluster might be created // The "create app" flow potentially does a number of things: // 1. creates a Kubernetes cluster if it doesn't exist // 2. creates a nodepool within the cluster if it doesn't exist @@ -1003,6 +1006,8 @@ class LeoPubsubMessageSubscriber[F[_]]( // handler. createClusterOrNodepoolOp = msg.clusterNodepoolAction match { case Some(ClusterNodepoolAction.CreateClusterAndNodepool(clusterId, defaultNodepoolId, nodepoolId)) => + // Saloni - the first case is the one we care about + // enableIntraNodeVisibility passed in 'msg' createClusterAndNodepools(msg, clusterId, autopilotEnabled = false, List(defaultNodepoolId, nodepoolId)) case Some(ClusterNodepoolAction.CreateCluster(clusterId)) => createClusterAndNodepools(msg, clusterId, autopilotEnabled = true, List.empty) @@ -1100,6 +1105,8 @@ class LeoPubsubMessageSubscriber[F[_]]( ) } yield () + // Saloni - this creates and polls for cluster until it is created (or error) + // CreateAppMessage has enableIntraNodeVisibility private def createClusterAndNodepools(msg: CreateAppMessage, clusterId: KubernetesClusterLeoId, autopilotEnabled: Boolean, @@ -1133,7 +1140,7 @@ class LeoPubsubMessageSubscriber[F[_]]( // monitor cluster creation asynchronously monitorOp <- createClusterResultOpt.traverse_(createClusterResult => getGkeAlgFromRegistry() - .pollCluster(PollClusterParams(clusterId, msg.project, createClusterResult)) + .pollCluster(PollClusterParams(clusterId, msg.project, createClusterResult), msg.enableIntraNodeVisibility) // polling here .adaptError { case e => PubsubKubernetesError( AppError(e.getMessage, ctx.now, ErrorAction.CreateApp, ErrorSource.Cluster, None, Some(ctx.traceId)), diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEAlgebra.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEAlgebra.scala index bce84bffdf4..e35c6772964 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEAlgebra.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEAlgebra.scala @@ -30,7 +30,7 @@ trait GKEAlgebra[F[_]] { * Polls a creating GKE cluster for its completion and also does other cluster-wide set-up like * install nginx ingress controller. */ - def pollCluster(params: PollClusterParams)(implicit ev: Ask[F, AppContext]): F[Unit] + def pollCluster(params: PollClusterParams, enableIntraNodeVisibility: Boolean)(implicit ev: Ask[F, AppContext]): F[Unit] /** * Creates a GKE nodepool and polls it for completion. diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEInterpreter.scala index 8b192c426bb..d28fe9a959e 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEInterpreter.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEInterpreter.scala @@ -92,6 +92,7 @@ class GKEInterpreter[F[_]]( (ps: List[KubernetesPodStatus]) => ps.forall(isPodDone) implicit private def listDoneCheckable[A: DoneCheckable]: DoneCheckable[List[A]] = as => as.forall(_.isDone) + // Saloni - CreateClusterParams object has enableIntraNodeVisibility override def createCluster(params: CreateClusterParams)(implicit ev: Ask[F, AppContext] ): F[Option[CreateClusterResult]] = { @@ -220,7 +221,8 @@ class GKEInterpreter[F[_]]( ) } - override def pollCluster(params: PollClusterParams)(implicit ev: Ask[F, AppContext]): F[Unit] = + // Saloni - polls for cluster creation status + override def pollCluster(params: PollClusterParams, enableIntraNodeVisibility: Boolean)(implicit ev: Ask[F, AppContext]): F[Unit] = for { ctx <- ev.ask @@ -238,7 +240,7 @@ class GKEInterpreter[F[_]]( ) _ <- F.fromOption(dbCluster.nodepools.find(_.isDefault), DefaultNodepoolNotFoundException(dbCluster.id)) - // Poll GKE until completion + // Poll GKE until completion --> seems to be polling until completion? lastOp <- gkeService .pollOperation( params.createResult.op, @@ -275,6 +277,7 @@ class GKEInterpreter[F[_]]( ) ) + // Saloni - if it reaches here it must be cluster creation succeeded _ <- logger.info(ctx.loggingCtx)( s"Successfully created cluster ${dbCluster.getClusterId.toString}!" ) @@ -301,6 +304,11 @@ class GKEInterpreter[F[_]]( ) ) .transaction + + // Saloni - maybe here we can install galaxy-deps helm chart for Terra clusters + // install galaxy-deps helm chart only for Terra clusters + _ <- if (!enableIntraNodeVisibility) installGalaxyDepsForTerra(dbCluster, googleCluster) else F.unit + _ <- kubernetesClusterQuery.updateStatus(dbCluster.id, KubernetesClusterStatus.Running).transaction _ <- nodepoolQuery.updateStatuses(dbCluster.nodepools.map(_.id), NodepoolStatus.Running).transaction } yield () @@ -1302,6 +1310,7 @@ class GKEInterpreter[F[_]]( } } yield res + // Saloni - this is installing a helm chart private[util] def installNginx(dbCluster: KubernetesCluster, googleCluster: Cluster)(implicit ev: Ask[F, AppContext] ): F[IP] = @@ -1348,6 +1357,34 @@ class GKEInterpreter[F[_]]( ) } yield loadBalancerIp + // Saloni TODO - since this is being installed does it also need to be uninstalled when deleting resources? + private[util] def installGalaxyDepsForTerra(dbCluster: KubernetesCluster, googleCluster: Cluster)(implicit + ev: Ask[F, AppContext]): F[Unit] = { + for { + ctx <- ev.ask + + _ <- logger.info(ctx.loggingCtx)( + s"Installing galaxy-deps helm chart ${config.galaxyDepsConfig.chart} in cluster ${dbCluster.getClusterId.toString}" + ) + + helmAuthContext <- getHelmAuthContext(googleCluster, dbCluster, config.galaxyDepsConfig.namespace) + + // Invoke helm + _ <- helmClient + .installChart( + config.galaxyDepsConfig.release, + config.galaxyDepsConfig.chartName, + config.galaxyDepsConfig.chartVersion, + org.broadinstitute.dsp.Values(config.galaxyDepsConfig.values.map(_.value).mkString(",")), + createNamespace = true + ) + .run(helmAuthContext) + + // Saloni - TODO do we need to poll for anything? + } yield () + } + + // Saloni - this is where Galaxy is installed; called from createAndPollApp private[util] def installGalaxy(helmAuthContext: AuthContext, appName: AppName, release: Release, @@ -2088,7 +2125,8 @@ final case class GKEInterpreterConfig(leoUrlBase: URL, monitorConfig: AppMonitorConfig, clusterConfig: KubernetesClusterConfig, proxyConfig: ProxyConfig, - galaxyDiskConfig: GalaxyDiskConfig + galaxyDiskConfig: GalaxyDiskConfig, + galaxyDepsConfig: KubernetesGalaxyDepsConfig ) final case class TerraAppSetupChartConfig( diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/KubernetesTestData.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/KubernetesTestData.scala index 5dfd0cbcae3..e41334772d1 100644 --- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/KubernetesTestData.scala +++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/KubernetesTestData.scala @@ -48,7 +48,7 @@ object KubernetesTestData { val galaxyApp = AppType.Galaxy val galaxyChartName = ChartName("/leonardo/galaxykubeman") - val galaxyChartVersion = ChartVersion("2.10.0") + val galaxyChartVersion = ChartVersion("3.0.0") val galaxyChart = Chart(galaxyChartName, galaxyChartVersion) val galaxyReleasePrefix = "gxy-release" diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/config/ConfigSpec.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/config/ConfigSpec.scala index 1763ba4c209..b3e527a4052 100644 --- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/config/ConfigSpec.scala +++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/config/ConfigSpec.scala @@ -142,7 +142,8 @@ final class ConfigSpec extends AnyFlatSpec with Matchers { ChartVersion("2.8.0"), ChartVersion("2.8.1"), ChartVersion("2.9.0"), - ChartVersion("2.10.0") + ChartVersion("2.10.0"), + ChartVersion("3.0.0"), ) ) Config.gkeGalaxyAppConfig shouldBe expectedResult diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/mocks.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/mocks.scala index 240176ccdc5..55d0ac28c3a 100644 --- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/mocks.scala +++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/mocks.scala @@ -244,7 +244,7 @@ class MockGKEService extends GKEAlgebra[IO] { * Polls a creating GKE cluster for its completion and also does other cluster-wide set-up like * install nginx ingress controller. */ - override def pollCluster(params: PollClusterParams)(implicit ev: Ask[IO, AppContext]): IO[Unit] = IO.unit + override def pollCluster(params: PollClusterParams, enableIntraNodeVisibility: Boolean)(implicit ev: Ask[IO, AppContext]): IO[Unit] = IO.unit /** Creates a GKE nodepool but doesn't wait for its completion. */ override def createAndPollNodepool(params: CreateNodepoolParams)(implicit ev: Ask[IO, AppContext]): IO[Unit] = IO.unit diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEInterpreterSpec.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEInterpreterSpec.scala index 1c7af751d7f..284e89e3f94 100644 --- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEInterpreterSpec.scala +++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/util/GKEInterpreterSpec.scala @@ -297,7 +297,8 @@ class GKEInterpreterSpec extends AnyFlatSpecLike with TestComponent with Mockito PollClusterParams(savedCluster1.id, savedCluster1.cloudContext.asInstanceOf[CloudContext.Gcp].value, createResult.get - ) + ), + false ) .attempt } yield r shouldBe (Left(