From edf26189335671326c8d70f4abb833c9fdcd3e35 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Wed, 28 Jan 2026 20:52:57 -0800 Subject: [PATCH 01/16] Initial commit --- cmd/main.go | 7 + .../manager/controller_manager_telemetry.yaml | 9 + config/manager/kustomization.yaml | 1 + internal/controller/telemetry_controller.go | 110 ++++ .../controller/telemetry_controller_test.go | 64 ++ pkg/splunk/client/enterprise.go | 44 ++ pkg/splunk/enterprise/afwscheduler.go | 30 +- pkg/splunk/enterprise/afwscheduler_test.go | 49 +- pkg/splunk/enterprise/names.go | 24 +- pkg/splunk/enterprise/telemetry.go | 399 ++++++++++++ pkg/splunk/enterprise/telemetry_test.go | 578 ++++++++++++++++++ .../custom_resource_crud_s1_test.go | 3 + test/testenv/deployment.go | 10 + test/testenv/verificationutils.go | 26 + 14 files changed, 1287 insertions(+), 67 deletions(-) create mode 100644 config/manager/controller_manager_telemetry.yaml create mode 100644 internal/controller/telemetry_controller.go create mode 100644 internal/controller/telemetry_controller_test.go create mode 100644 pkg/splunk/enterprise/telemetry.go create mode 100644 pkg/splunk/enterprise/telemetry_test.go diff --git a/cmd/main.go b/cmd/main.go index f8aba0ae1..752173edd 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -221,6 +221,13 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "Standalone") os.Exit(1) } + if err = (&intController.TelemetryReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Telemetry") + os.Exit(1) + } //+kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { diff --git a/config/manager/controller_manager_telemetry.yaml b/config/manager/controller_manager_telemetry.yaml new file mode 100644 index 000000000..0ed5a866b --- /dev/null +++ b/config/manager/controller_manager_telemetry.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: manager-telemetry +data: + status: | + { + "lastTransmission": "" + } \ No newline at end of file diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 47f07b0e6..d6116406b 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -1,5 +1,6 @@ resources: - manager.yaml +- controller_manager_telemetry.yaml generatorOptions: disableNameSuffixHash: true diff --git a/internal/controller/telemetry_controller.go b/internal/controller/telemetry_controller.go new file mode 100644 index 000000000..8f49faa00 --- /dev/null +++ b/internal/controller/telemetry_controller.go @@ -0,0 +1,110 @@ +/* +Copyright (c) 2018-2022 Splunk Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise" + ctrl "sigs.k8s.io/controller-runtime" + "time" + + "github.com/pkg/errors" + metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics" + + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" +) + +const ( + // TODO: Below two contants are defined at default/kustomizatio.yaml, need to get it programatically? + ConfigMapNamePrefix = "splunk-operator-" + ConfigMapLabelName = "splunk-operator" + + telemetryRetryDelay = time.Second * 60 +) + +// TelemetryReconciler periodically reads all keys under the "telemetry" configmap +// in the Splunk operator namespace and logs all key values. +type TelemetryReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch + +func (r *TelemetryReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "Telemetry")).Inc() + defer recordInstrumentionData(time.Now(), req, "controller", "Telemetry") + + reqLogger := log.FromContext(ctx) + reqLogger = reqLogger.WithValues("telemetry", req.NamespacedName) + + reqLogger.Info("Reconciling telemetry") + + // Fetch the ConfigMap + cm := &corev1.ConfigMap{} + err := r.Get(ctx, req.NamespacedName, cm) + if err != nil { + if k8serrors.IsNotFound(err) { + reqLogger.Info("telemetry configmap not found; requeueing", "period(seconds)", int(telemetryRetryDelay/time.Second)) + return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil + } + return ctrl.Result{}, errors.Wrap(err, "could not load telemetry configmap") + } + + // Log all key/value pairs. No sorting per your request. + if len(cm.Data) == 0 { + reqLogger.Info("telemetry configmap has no data keys") + return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil + } + + reqLogger.Info("start", "Telemetry configmap version", cm.GetResourceVersion()) + + result, err := enterprise.ApplyTelemetry(ctx, r.Client, cm) + if err != nil { + reqLogger.Error(err, "Failed") + return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil + } + if result.Requeue && result.RequeueAfter != 0 { + reqLogger.Info("Requeued", "period(seconds)", int(result.RequeueAfter/time.Second)) + } + + return result, err +} + +// SetupWithManager sets up the controller with the Manager. +func (r *TelemetryReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&corev1.ConfigMap{}). + WithEventFilter(predicate.NewPredicateFuncs(func(obj client.Object) bool { + labels := obj.GetLabels() + if labels == nil { + return false + } + return obj.GetName() == enterprise.GetTelemetryConfigMapName(ConfigMapNamePrefix) && labels["name"] == ConfigMapLabelName + })). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 1, + }). + Complete(r) +} diff --git a/internal/controller/telemetry_controller_test.go b/internal/controller/telemetry_controller_test.go new file mode 100644 index 000000000..c73ac5a23 --- /dev/null +++ b/internal/controller/telemetry_controller_test.go @@ -0,0 +1,64 @@ +package controller + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +var _ = Describe("Telemetry Controller", func() { + var ( + ctx context.Context + cmName = "splunk-operator-telemetry" + ns = "test-telemetry-ns" + labels = map[string]string{"name": "splunk-operator"} + ) + + BeforeEach(func() { + ctx = context.TODO() + }) + + It("Reconcile returns requeue when ConfigMap not found", func() { + builder := fake.NewClientBuilder().WithScheme(scheme.Scheme) + c := builder.Build() + r := &TelemetryReconciler{Client: c, Scheme: scheme.Scheme} + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: cmName, Namespace: ns}} + result, err := r.Reconcile(ctx, req) + Expect(err).To(BeNil()) + Expect(result.Requeue).To(BeTrue()) + Expect(result.RequeueAfter).To(Equal(time.Second * 60)) + }) + + It("Reconcile returns requeue when ConfigMap has no data", func() { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: cmName, Namespace: ns, Labels: labels}, + Data: map[string]string{}, + } + builder := fake.NewClientBuilder().WithScheme(scheme.Scheme).WithObjects(cm) + c := builder.Build() + r := &TelemetryReconciler{Client: c, Scheme: scheme.Scheme} + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: cmName, Namespace: ns}} + result, err := r.Reconcile(ctx, req) + Expect(err).To(BeNil()) + Expect(result.Requeue).To(BeTrue()) + Expect(result.RequeueAfter).To(Equal(time.Second * 60)) + }) + + // Additional tests for error and success cases can be added here +}) + +/* +func TestTelemetryController(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Telemetry Controller Suite") +} + +*/ diff --git a/pkg/splunk/client/enterprise.go b/pkg/splunk/client/enterprise.go index 8bc36b08a..8a6247320 100644 --- a/pkg/splunk/client/enterprise.go +++ b/pkg/splunk/client/enterprise.go @@ -16,6 +16,7 @@ package client import ( + "bytes" "crypto/tls" "encoding/json" "fmt" @@ -954,6 +955,49 @@ func (c *SplunkClient) SetIdxcSecret(idxcSecret string) error { return c.Do(request, expectedStatus, nil) } +type LicenseInfo struct { + ID string `json:"guid"` + Type string `json:"type"` +} + +func (c *SplunkClient) GetLicenseInfo() (*LicenseInfo, error) { + apiResponse := struct { + Entry []struct { + Content LicenseInfo `json:"content"` + } `json:"entry"` + }{} + path := "/services/licenser/licenses" + err := c.Get(path, &apiResponse) + if err != nil { + return nil, err + } + if len(apiResponse.Entry) < 1 { + return nil, fmt.Errorf("invalid response from %s%s", c.ManagementURI, path) + } + return &apiResponse.Entry[0].Content, nil +} + +type TelemetryResponse struct { + Message string `json:"message"` + MetricValueID string `json:"metricValueId"` +} + +func (c *SplunkClient) SendTelemetry(path string, body []byte) (*TelemetryResponse, error) { + endpoint := fmt.Sprintf("%s%s", c.ManagementURI, path) + request, err := http.NewRequest("POST", endpoint, bytes.NewReader(body)) + if err != nil { + return nil, err + } + request.Header.Set("Content-Type", "application/json") + expectedStatus := []int{201} + var response TelemetryResponse + err = c.Do(request, expectedStatus, &response) + if err != nil { + return nil, err + } + return &response, nil +} + // RestartSplunk restarts specific Splunk instance // Can be used for any Splunk Instance // See https://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTsystem#server.2Fcontrol.2Frestart diff --git a/pkg/splunk/enterprise/afwscheduler.go b/pkg/splunk/enterprise/afwscheduler.go index 2dd2fd667..45aaaf10e 100644 --- a/pkg/splunk/enterprise/afwscheduler.go +++ b/pkg/splunk/enterprise/afwscheduler.go @@ -138,26 +138,6 @@ func runCustomCommandOnSplunkPods(ctx context.Context, cr splcommon.MetaObject, return err } -// Get extension for name of telemetry app -func getTelAppNameExtension(crKind string) (string, error) { - switch crKind { - case "Standalone": - return "stdaln", nil - case "LicenseMaster": - return "lmaster", nil - case "LicenseManager": - return "lmanager", nil - case "SearchHeadCluster": - return "shc", nil - case "ClusterMaster": - return "cmaster", nil - case "ClusterManager": - return "cmanager", nil - default: - return "", errors.New("Invalid CR kind for telemetry app") - } -} - // addTelApp adds a telemetry app var addTelApp = func(ctx context.Context, podExecClient splutil.PodExecClientImpl, replicas int32, cr splcommon.MetaObject) error { var err error @@ -170,26 +150,20 @@ var addTelApp = func(ctx context.Context, podExecClient splutil.PodExecClientImp // Create pod exec client crKind := cr.GetObjectKind().GroupVersionKind().Kind - // Get Tel App Name Extension - appNameExt, err := getTelAppNameExtension(crKind) - if err != nil { - return err - } - // Commands to run on pods var command1, command2 string // Handle non SHC scenarios(Standalone, CM, LM) if crKind != "SearchHeadCluster" { // Create dir on pods - command1 = fmt.Sprintf(createTelAppNonShcString, appNameExt, appNameExt, telAppConfString, appNameExt, telAppDefMetaConfString, appNameExt) + command1 = fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString) // App reload command2 = telAppReloadString } else { // Create dir on pods - command1 = fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, appNameExt, shcAppsLocationOnDeployer, appNameExt, telAppConfString, shcAppsLocationOnDeployer, appNameExt, telAppDefMetaConfString, shcAppsLocationOnDeployer, appNameExt) + command1 = fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer) // Bundle push command2 = fmt.Sprintf(applySHCBundleCmdStr, GetSplunkStatefulsetURL(cr.GetNamespace(), SplunkSearchHead, cr.GetName(), 0, false), "/tmp/status.txt") diff --git a/pkg/splunk/enterprise/afwscheduler_test.go b/pkg/splunk/enterprise/afwscheduler_test.go index 38668da69..4481b4651 100644 --- a/pkg/splunk/enterprise/afwscheduler_test.go +++ b/pkg/splunk/enterprise/afwscheduler_test.go @@ -4237,31 +4237,6 @@ func TestAdjustClusterAppsFilePermissions(t *testing.T) { mockPodExecReturnContexts[0].StdErr = "" } -func TestGetTelAppNameExtension(t *testing.T) { - crKinds := map[string]string{ - "Standalone": "stdaln", - "LicenseMaster": "lmaster", - "LicenseManager": "lmanager", - "SearchHeadCluster": "shc", - "ClusterMaster": "cmaster", - "ClusterManager": "cmanager", - } - - // Test all CR kinds - for k, v := range crKinds { - val, _ := getTelAppNameExtension(k) - if v != val { - t.Errorf("Invalid extension crkind %v, extension %v", k, v) - } - } - - // Test error code - _, err := getTelAppNameExtension("incorrect value") - if err == nil { - t.Errorf("Expected error") - } -} - func TestAddTelAppCMaster(t *testing.T) { ctx := context.TODO() @@ -4280,7 +4255,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Define mock podexec context podExecCommands := []string{ - fmt.Sprintf(createTelAppNonShcString, "cmaster", "cmaster", telAppConfString, "cmaster", telAppDefMetaConfString, "cmaster"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), telAppReloadString, } @@ -4304,7 +4279,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test shc podExecCommands = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shc", shcAppsLocationOnDeployer, "shc", telAppConfString, shcAppsLocationOnDeployer, "shc", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shc"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), fmt.Sprintf(applySHCBundleCmdStr, GetSplunkStatefulsetURL(shcCr.GetNamespace(), SplunkSearchHead, shcCr.GetName(), 0, false), "/tmp/status.txt"), } @@ -4320,7 +4295,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test non-shc error 1 podExecCommandsError := []string{ - fmt.Sprintf(createTelAppNonShcString, "cmerror", "cmerror", telAppConfString, "cmerror", telAppDefMetaConfString, "cmerror"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), } mockPodExecReturnContextsError := []*spltest.MockPodExecReturnContext{ @@ -4339,7 +4314,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test non-shc error 2 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppNonShcString, "cm", "cm", telAppConfString, "cm", telAppDefMetaConfString, "cm"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), } var mockPodExecClientError2 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: cmCr} mockPodExecClientError2.AddMockPodExecReturnContexts(ctx, podExecCommandsError, mockPodExecReturnContextsError...) @@ -4351,7 +4326,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test shc error 1 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shcerror", shcAppsLocationOnDeployer, "shcerror", telAppConfString, shcAppsLocationOnDeployer, "shcerror", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shcerror"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), } var mockPodExecClientError3 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: shcCr} @@ -4364,7 +4339,7 @@ func TestAddTelAppCMaster(t *testing.T) { // Test shc error 2 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shc", shcAppsLocationOnDeployer, "shc", telAppConfString, shcAppsLocationOnDeployer, "shc", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shc"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), } var mockPodExecClientError4 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: shcCr} mockPodExecClientError4.AddMockPodExecReturnContexts(ctx, podExecCommandsError, mockPodExecReturnContextsError...) @@ -4393,7 +4368,7 @@ func TestAddTelAppCManager(t *testing.T) { // Define mock podexec context podExecCommands := []string{ - fmt.Sprintf(createTelAppNonShcString, "cmanager", "cmanager", telAppConfString, "cmanager", telAppDefMetaConfString, "cmanager"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), telAppReloadString, } @@ -4417,7 +4392,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test shc podExecCommands = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shc", shcAppsLocationOnDeployer, "shc", telAppConfString, shcAppsLocationOnDeployer, "shc", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shc"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), fmt.Sprintf(applySHCBundleCmdStr, GetSplunkStatefulsetURL(shcCr.GetNamespace(), SplunkSearchHead, shcCr.GetName(), 0, false), "/tmp/status.txt"), } @@ -4433,7 +4408,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test non-shc error 1 podExecCommandsError := []string{ - fmt.Sprintf(createTelAppNonShcString, "cmerror", "cmerror", telAppConfString, "cmerror", telAppDefMetaConfString, "cmerror"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), } mockPodExecReturnContextsError := []*spltest.MockPodExecReturnContext{ @@ -4452,7 +4427,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test non-shc error 2 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppNonShcString, "cm", "cm", telAppConfString, "cm", telAppDefMetaConfString, "cm"), + fmt.Sprintf(createTelAppNonShcString, telAppConfString, telAppDefMetaConfString), } var mockPodExecClientError2 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: cmCr} mockPodExecClientError2.AddMockPodExecReturnContexts(ctx, podExecCommandsError, mockPodExecReturnContextsError...) @@ -4464,7 +4439,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test shc error 1 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shcerror", shcAppsLocationOnDeployer, "shcerror", telAppConfString, shcAppsLocationOnDeployer, "shcerror", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shcerror"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), } var mockPodExecClientError3 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: shcCr} @@ -4477,7 +4452,7 @@ func TestAddTelAppCManager(t *testing.T) { // Test shc error 2 podExecCommandsError = []string{ - fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, "shc", shcAppsLocationOnDeployer, "shc", telAppConfString, shcAppsLocationOnDeployer, "shc", telAppDefMetaConfString, shcAppsLocationOnDeployer, "shc"), + fmt.Sprintf(createTelAppShcString, shcAppsLocationOnDeployer, shcAppsLocationOnDeployer, telAppConfString, shcAppsLocationOnDeployer, telAppDefMetaConfString, shcAppsLocationOnDeployer), } var mockPodExecClientError4 *spltest.MockPodExecClient = &spltest.MockPodExecClient{Cr: shcCr} mockPodExecClientError4.AddMockPodExecReturnContexts(ctx, podExecCommandsError, mockPodExecReturnContextsError...) diff --git a/pkg/splunk/enterprise/names.go b/pkg/splunk/enterprise/names.go index 3d0439db7..f788edfe1 100644 --- a/pkg/splunk/enterprise/names.go +++ b/pkg/splunk/enterprise/names.go @@ -201,13 +201,23 @@ access = read : [ * ], write : [ admin ] ` // Command to create telemetry app on non SHC scenarios - createTelAppNonShcString = "mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s_%s/default/; mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s_%s/metadata/; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s_%s/default/app.conf; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s_%s/metadata/default.meta" + createTelAppNonShcString = "mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s/default/; mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s/metadata/; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s/default/app.conf; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s/metadata/default.meta" // Command to create telemetry app on SHC scenarios - createTelAppShcString = "mkdir -p %s/app_tel_for_sok8s_%s/default/; mkdir -p %s/app_tel_for_sok8s_%s/metadata/; echo -e \"%s\" > %s/app_tel_for_sok8s_%s/default/app.conf; echo -e \"%s\" > %s/app_tel_for_sok8s_%s/metadata/default.meta" + createTelAppShcString = "mkdir -p %s/app_tel_for_sok8s/default/; mkdir -p %s/app_tel_for_sok8s/metadata/; echo -e \"%s\" > %s/app_tel_for_sok8s/default/app.conf; echo -e \"%s\" > %s/app_tel_for_sok8s/metadata/default.meta" // Command to reload app configuration telAppReloadString = "curl -k -u admin:`cat /mnt/splunk-secrets/password` https://localhost:8089/services/apps/local/_reload" + + // Name of the telemetry configmap: -manager-telemetry + telConfigMapTemplateStr = "%smanager-telemetry" + + // Name of the telemetry app: app_tel_for_sok8s + telAppNameStr = "app_tel_for_sok8s" + telSOKVersionKey = "version" + telLicenseInfoKey = "license_info" + + managerConfigMapTemplateStr = "%smanager-config" ) const ( @@ -363,3 +373,13 @@ func GetLivenessDriverFileDir() string { func GetStartupScriptName() string { return startupScriptName } + +// GetTelemetryConfigMapName returns the name of telemetry configmap +func GetTelemetryConfigMapName(namePrefix string) string { + return fmt.Sprintf(telConfigMapTemplateStr, namePrefix) +} + +// GetManagerConfigMapName returns the name of manager configmap +func GetManagerConfigMapName(namePrefix string) string { + return fmt.Sprintf(managerConfigMapTemplateStr, namePrefix) +} diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go new file mode 100644 index 000000000..3a3152bf0 --- /dev/null +++ b/pkg/splunk/enterprise/telemetry.go @@ -0,0 +1,399 @@ +package enterprise + +import ( + "context" + "encoding/json" + "errors" + "fmt" + enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + splclient "github.com/splunk/splunk-operator/pkg/splunk/client" + splcommon "github.com/splunk/splunk-operator/pkg/splunk/common" + splutil "github.com/splunk/splunk-operator/pkg/splunk/util" + appsv1 "k8s.io/api/apps/v1" + "os" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "time" + + corev1 "k8s.io/api/core/v1" +) + +const ( + // TODO: Should be set to one day for the release + requeAfterInSeconds = 30 + // TODO: Should change to false for the release + isTestMode = true + // TODO: Ideally the version string should be set from the release tag + SOK_VERSION = "3.0.0" + + telStatusKey = "status" +) + +//+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch + +type Telemetry struct { + Type string `json:"type"` + Component string `json:"component"` + OptInRequired int `json:"optInRequired"` + Data map[string]interface{} `json:"data"` + Test bool `json:"test"` +} + +type TelemetryStatus struct { + LastTransmission string `json:"lastTransmission"` +} + +func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap) (reconcile.Result, error) { + + // unless modified, reconcile for this object will be requeued after 10 seconds + result := reconcile.Result{ + Requeue: true, + RequeueAfter: time.Second * requeAfterInSeconds, + } + + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("ApplyTelemetry") + + for k, v := range cm.Data { + scopedLog.Info("Retrieved telemetry keys", "key", k, "value", v) + } + + var data map[string]interface{} + data = make(map[string]interface{}) + + // Add SOK version + data[telSOKVersionKey] = SOK_VERSION + // Add per CR telemetry + crList := getAllCustomResources(ctx, client) + + collectCRTelData(ctx, client, crList, data) + // Add telemetry set in this configmap, i.e splunk POD's telemetry + CollectCMTelData(ctx, cm, data) + + // Now send the telemetry + for _, crs := range crList { + for _, cr := range crs { + success := SendTelemetry(ctx, client, cr, data) + if success { + updateLastTransmissionTime(ctx, client, cm) + return result, nil + } + } + } + + return result, errors.New("Failed to send telemetry data") +} + +func updateLastTransmissionTime(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap) error { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("updateLastTransmissionTime") + + var status TelemetryStatus + status.LastTransmission = time.Now().UTC().Format(time.RFC3339) + + updated, err := json.MarshalIndent(status, "", " ") + if err != nil { + scopedLog.Error(err, "Failed to marshal telemetry status") + return err + } + cm.Data[telStatusKey] = string(updated) + if err = client.Update(ctx, cm); err != nil { + scopedLog.Error(err, "Failed to update telemetry status in configmap") + return err + } + scopedLog.Info("Updated last transmission time in configmap", "newStatus", cm.Data[telStatusKey]) + + return nil +} + +func getAllCustomResources(ctx context.Context, client splcommon.ControllerClient) map[string][]splcommon.MetaObject { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("collectCRTelData") + + var crList map[string][]splcommon.MetaObject + crList = make(map[string][]splcommon.MetaObject) + + //var instanceID InstanceType + //var telAppName string + + var err error + var standaloneList enterpriseApi.StandaloneList + //instanceID = SplunkStandalone + //telAppName = fmt.Sprintf(telAppNameTemplateStr, "stdaln") + err = client.List(ctx, &standaloneList) + if err != nil { + scopedLog.Error(err, "Failed to list standalone objects") + } else if len(standaloneList.Items) > 0 { + crList[standaloneList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range standaloneList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[standaloneList.Items[0].Kind] = append(crList[standaloneList.Items[0].Kind], &cr) + } + } + + var lmanagerList enterpriseApi.LicenseManagerList + //instanceID = SplunkLicenseManager + //telAppName = fmt.Sprintf(telAppNameTemplateStr, "lmanager") + err = client.List(ctx, &lmanagerList) + if err != nil { + scopedLog.Error(err, "Failed to list LicenseManager objects") + } else if len(lmanagerList.Items) > 0 { + crList[lmanagerList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range lmanagerList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[lmanagerList.Items[0].Kind] = append(crList[lmanagerList.Items[0].Kind], &cr) + } + } + + var lmasterList enterpriseApiV3.LicenseMasterList + //instanceID = SplunkLicenseMaster + //telAppName = fmt.Sprintf(telAppNameTemplateStr, "lmaster") + err = client.List(ctx, &lmasterList) + if err != nil { + scopedLog.Error(err, "Failed to list LicenseMaster objects") + } else if len(lmasterList.Items) > 0 { + crList[lmasterList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range lmasterList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[lmasterList.Items[0].Kind] = append(crList[lmasterList.Items[0].Kind], &cr) + } + } + + var shcList enterpriseApi.SearchHeadClusterList + //instanceID = SplunkSearchHead + //telAppName = fmt.Sprintf(telAppNameTemplateStr, "shc") + err = client.List(ctx, &shcList) + if err != nil { + scopedLog.Error(err, "Failed to list SearchHeadCluster objects") + } else if len(shcList.Items) > 0 { + crList[shcList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range shcList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[shcList.Items[0].Kind] = append(crList[shcList.Items[0].Kind], &cr) + } + } + + var cmanagerList enterpriseApi.ClusterManagerList + err = client.List(ctx, &cmanagerList) + if err != nil { + scopedLog.Error(err, "Failed to list ClusterManager objects") + } else if len(cmanagerList.Items) > 0 { + crList[cmanagerList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range cmanagerList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[cmanagerList.Items[0].Kind] = append(crList[cmanagerList.Items[0].Kind], &cr) + } + } + + var cmasterList enterpriseApiV3.ClusterMasterList + err = client.List(ctx, &cmasterList) + if err != nil { + scopedLog.Error(err, "Failed to list ClusterMaster objects") + } else if len(cmasterList.Items) > 0 { + crList[cmasterList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range cmasterList.Items { + if !cr.Status.TelAppInstalled { + scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) + continue + } + crList[cmasterList.Items[0].Kind] = append(crList[cmasterList.Items[0].Kind], &cr) + } + } + + return crList +} + +func getOwnedStatefulSets( + ctx context.Context, + c client.Client, + cr client.Object, +) ([]appsv1.StatefulSet, error) { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("getOwnedStatefulSets") + + stsList := &appsv1.StatefulSetList{} + if err := c.List(ctx, stsList, + client.InNamespace(cr.GetNamespace()), + ); err != nil { + scopedLog.Error(err, "Failed to list StatefulSets", "CR Name", cr.GetName()) + return nil, err + } + + var result []appsv1.StatefulSet + for _, sts := range stsList.Items { + for _, owner := range sts.OwnerReferences { + if owner.UID == cr.GetUID() { + result = append(result, sts) + break + } + } + } + return result, nil +} + +func collectCRTelData(ctx context.Context, client splcommon.ControllerClient, crList map[string][]splcommon.MetaObject, data map[string]interface{}) { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("collectCRTelData") + scopedLog.Info("Start") + + for kind, crs := range crList { + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + for _, cr := range crs { + var perCRData []map[string]string + perCRData = make([]map[string]string, 0) + stsList, err := getOwnedStatefulSets(ctx, client, cr) + if err != nil { + scopedLog.Error(err, "Failed to get owned StatefulSets") + } else if len(stsList) > 0 { + for _, sts := range stsList { + for _, container := range sts.Spec.Template.Spec.Containers { + resPerContainer := map[string]string{ + "container_name": container.Name, + "cpu_request": container.Resources.Requests.Cpu().String(), + "memory_request": container.Resources.Requests.Memory().String(), + "cpu_limit": container.Resources.Limits.Cpu().String(), + "memory_limit": container.Resources.Limits.Memory().String(), + } + perCRData = append(perCRData, resPerContainer) + } + } + } + perKindData[cr.GetName()] = perCRData + } + data[kind] = perKindData + } +} + +// CollectCMTelData is exported for testing +func CollectCMTelData(ctx context.Context, cm *corev1.ConfigMap, data map[string]interface{}) { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("collectCMTelData") + scopedLog.Info("Start") + + for key, val := range cm.Data { + if key == telStatusKey { + continue + } + var compData interface{} + scopedLog.Info("Processing telemetry input from other components", "key", key, "value", val) + err := json.Unmarshal([]byte(val), &compData) + if err != nil { + scopedLog.Info("Not able to unmarshal. Will include the input as string", "key", key, "value", val) + data[key] = val + } else { + data[key] = compData + } + } +} + +func isTest(ctx context.Context) bool { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("checkTestMode") + + // Retrieve SPLUNK_TEST_MODE environment variable + testModeStr := os.Getenv("SPLUNK_TEST_MODE") + if testModeStr == "1" { + scopedLog.Info("Test mode is enabled via SPLUNK_TEST_MODE env variable") + return true + } + + scopedLog.Info("Return test mode", "isTestMode", isTestMode) + return isTestMode +} + +// SendTelemetry is exported for testing +func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr splcommon.MetaObject, data map[string]interface{}) bool { + reqLogger := log.FromContext(ctx) + scopedLog := reqLogger.WithName("sendTelemetry").WithValues( + "name", cr.GetObjectMeta().GetName(), + "namespace", cr.GetObjectMeta().GetNamespace(), + "kind", cr.GetObjectKind().GroupVersionKind().Kind) + scopedLog.Info("Start") + + var instanceID InstanceType + switch cr.GetObjectKind().GroupVersionKind().Kind { + case "Standalone": + instanceID = SplunkStandalone + case "LicenseManager": + instanceID = SplunkLicenseManager + case "LicenseMaster": + instanceID = SplunkLicenseMaster + case "SearchHeadCluster": + instanceID = SplunkSearchHead + case "ClusterMaster": + instanceID = SplunkClusterMaster + case "ClusterManager": + instanceID = SplunkClusterManager + default: + return false + } + + serviceName := GetSplunkServiceName(instanceID, cr.GetName(), false) + scopedLog.Info("Got service name", "serviceName", serviceName) + + defaultSecretObjName := splcommon.GetNamespaceScopedSecretName(cr.GetNamespace()) + defaultSecret, err := splutil.GetSecretByName(ctx, client, cr.GetNamespace(), cr.GetName(), defaultSecretObjName) + if err != nil { + scopedLog.Error(err, "Could not access default secret object") + return false + } + + //Get the admin password from the secret object + adminPwd, foundSecret := defaultSecret.Data["password"] + if !foundSecret { + scopedLog.Info("Failed to find admin password") + return false + } + splunkClient := splclient.NewSplunkClient(fmt.Sprintf("https://%s:8089", serviceName), "admin", string(adminPwd)) + + var licenseInfo *splclient.LicenseInfo + licenseInfo, err = splunkClient.GetLicenseInfo() + if err != nil { + scopedLog.Error(err, "Failed to retrieve the license info") + return false + } else { + data[telLicenseInfoKey] = *licenseInfo + } + telemetry := Telemetry{ + Type: "event", + Component: "sok", + OptInRequired: 2, + Data: data, + Test: isTest(ctx), + } + + path := fmt.Sprintf("/servicesNS/nobody/%s/telemetry-metric", telAppNameStr) + bodyBytes, err := json.Marshal(telemetry) + if err != nil { + scopedLog.Error(err, "Failed to marshal to bytes") + return false + } + scopedLog.Info("Sending request", "path", path, "body", string(bodyBytes)) + + response, err := splunkClient.SendTelemetry(path, bodyBytes) + if err != nil { + scopedLog.Error(err, "Failed to send telemetry") + return false + } + + scopedLog.Info("Successfully sent telemetry", "response", response) + return true +} diff --git a/pkg/splunk/enterprise/telemetry_test.go b/pkg/splunk/enterprise/telemetry_test.go new file mode 100644 index 000000000..76e614def --- /dev/null +++ b/pkg/splunk/enterprise/telemetry_test.go @@ -0,0 +1,578 @@ +// Copyright (c) 2018-2022 Splunk Inc. All rights reserved. + +package enterprise + +import ( + "context" + "encoding/json" + enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" + splcommon "github.com/splunk/splunk-operator/pkg/splunk/common" + "testing" + "time" + + "errors" + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + spltest "github.com/splunk/splunk-operator/pkg/splunk/test" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func TestTelemetryGetAllCustomResources_Empty(t *testing.T) { + mockClient := spltest.NewMockClient() + ctx := context.TODO() + crMap := getAllCustomResources(ctx, mockClient) + if len(crMap) != 0 { + t.Errorf("expected no CRs, got %d", len(crMap)) + } +} + +func TestTelemetryCollectCRTelData_WithMockCR(t *testing.T) { + mockClient := spltest.NewMockClient() + ctx := context.TODO() + cr := &enterpriseApi.Standalone{} + cr.TypeMeta.Kind = "Standalone" + cr.ObjectMeta.Name = "test-standalone" + crList := map[string][]splcommon.MetaObject{"Standalone": {cr}} + data := make(map[string]interface{}) + collectCRTelData(ctx, mockClient, crList, data) + if _, ok := data["Standalone"]; !ok { + t.Errorf("expected Standalone key in data map") + } +} + +func TestApplyTelemetry_ConfigMapNoData(t *testing.T) { + mockClient := spltest.NewMockClient() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{}, + } + ctx := context.TODO() + result, err := ApplyTelemetry(ctx, mockClient, cm) + if err == nil { + t.Errorf("expected error when no CRs present, got nil") + } + if !result.Requeue { + t.Errorf("expected requeue to be true, got false") + } +} + +func TestTelemetryCollectCMTelData_UnmarshalError(t *testing.T) { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{"bad": "notjson"}, + } + ctx := context.TODO() + data := make(map[string]interface{}) + CollectCMTelData(ctx, cm, data) + if data["bad"] != "notjson" { + t.Errorf("expected fallback to string on unmarshal error") + } +} + +func TestTelemetryCollectCMTelData_ValidJSON(t *testing.T) { + val := map[string]interface{}{"foo": "bar"} + b, _ := json.Marshal(val) + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{"good": string(b)}, + } + ctx := context.TODO() + data := make(map[string]interface{}) + CollectCMTelData(ctx, cm, data) + if m, ok := data["good"].(map[string]interface{}); !ok || m["foo"] != "bar" { + t.Errorf("expected valid JSON to be unmarshaled") + } +} + +func TestSendTelemetry_UnknownKind(t *testing.T) { + cr := &enterpriseApi.Standalone{} + cr.TypeMeta.Kind = "UnknownKind" + ok := SendTelemetry(context.TODO(), spltest.NewMockClient(), cr, map[string]interface{}{}) + if ok { + t.Errorf("expected SendTelemetry to return false for unknown kind") + } +} + +func TestSendTelemetry_NoSecret(t *testing.T) { + cr := &enterpriseApi.Standalone{} + cr.TypeMeta.Kind = "Standalone" + cr.ObjectMeta.Name = "test" + cr.ObjectMeta.Namespace = "default" + ok := SendTelemetry(context.TODO(), spltest.NewMockClient(), cr, map[string]interface{}{}) + if ok { + t.Errorf("expected SendTelemetry to return false if no secret found") + } +} + +func TestTelemetryGetAllCustomResources_AllKinds(t *testing.T) { + ctx := context.TODO() + fakeClient := &FakeListClient{ + crs: map[string][]client.Object{ + "Standalone": {&enterpriseApi.Standalone{TypeMeta: metav1.TypeMeta{Kind: "Standalone"}, ObjectMeta: metav1.ObjectMeta{Name: "test-standalone"}}}, + "LicenseManager": {&enterpriseApi.LicenseManager{TypeMeta: metav1.TypeMeta{Kind: "LicenseManager"}, ObjectMeta: metav1.ObjectMeta{Name: "test-licensemanager"}}}, + "LicenseMaster": {&enterpriseApiV3.LicenseMaster{TypeMeta: metav1.TypeMeta{Kind: "LicenseMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-licensemaster"}}}, + "SearchHeadCluster": {&enterpriseApi.SearchHeadCluster{TypeMeta: metav1.TypeMeta{Kind: "SearchHeadCluster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-shc"}}}, + "ClusterManager": {&enterpriseApi.ClusterManager{TypeMeta: metav1.TypeMeta{Kind: "ClusterManager"}, ObjectMeta: metav1.ObjectMeta{Name: "test-cmanager"}}}, + "ClusterMaster": {&enterpriseApiV3.ClusterMaster{TypeMeta: metav1.TypeMeta{Kind: "ClusterMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-cmaster"}}}, + }, + sts: []apps.StatefulSet{}, // ensure all keys are present + } + crMap := getAllCustomResources(ctx, fakeClient) + kinds := []string{"Standalone", "LicenseManager", "LicenseMaster", "SearchHeadCluster", "ClusterManager", "ClusterMaster"} + for _, kind := range kinds { + if _, ok := crMap[kind]; !ok { + t.Errorf("expected kind %s in CR map", kind) + } + } +} + +func TestTelemetryCollectCRTelData_StandaloneData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApi.Standalone{} + cr.TypeMeta.Kind = "Standalone" + cr.ObjectMeta.Name = "test-standalone" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"Standalone": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-standalone-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("128Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + standaloneData, ok := data["Standalone"].(map[string]interface{}) + if !ok { + t.Fatalf("expected Standalone data map") + } + crData, ok := standaloneData["test-standalone"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "500m" || container["memory_request"] != "128Mi" || container["cpu_limit"] != "1" || container["memory_limit"] != "256Mi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_LicenseManagerData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApi.LicenseManager{} + cr.TypeMeta.Kind = "LicenseManager" + cr.ObjectMeta.Name = "test-licensemanager" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"LicenseManager": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-licensemanager-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("600m"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("512Mi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + lmData, ok := data["LicenseManager"].(map[string]interface{}) + if !ok { + t.Fatalf("expected LicenseManager data map") + } + crData, ok := lmData["test-licensemanager"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "600m" || container["memory_request"] != "256Mi" || container["cpu_limit"] != "2" || container["memory_limit"] != "512Mi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_LicenseMasterData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApiV3.LicenseMaster{} + cr.TypeMeta.Kind = "LicenseMaster" + cr.ObjectMeta.Name = "test-licensemaster" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"LicenseMaster": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-licensemaster-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("700m"), + corev1.ResourceMemory: resource.MustParse("384Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("3"), + corev1.ResourceMemory: resource.MustParse("768Mi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + lmData, ok := data["LicenseMaster"].(map[string]interface{}) + if !ok { + t.Fatalf("expected LicenseMaster data map") + } + crData, ok := lmData["test-licensemaster"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "700m" || container["memory_request"] != "384Mi" || container["cpu_limit"] != "3" || container["memory_limit"] != "768Mi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_SearchHeadClusterData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApi.SearchHeadCluster{} + cr.TypeMeta.Kind = "SearchHeadCluster" + cr.ObjectMeta.Name = "test-shc" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"SearchHeadCluster": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-shc-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("800m"), + corev1.ResourceMemory: resource.MustParse("512Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("4"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + shcData, ok := data["SearchHeadCluster"].(map[string]interface{}) + if !ok { + t.Fatalf("expected SearchHeadCluster data map") + } + crData, ok := shcData["test-shc"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "800m" || container["memory_request"] != "512Mi" || container["cpu_limit"] != "4" || container["memory_limit"] != "1Gi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_ClusterManagerData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApi.ClusterManager{} + cr.TypeMeta.Kind = "ClusterManager" + cr.ObjectMeta.Name = "test-cmanager" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"ClusterManager": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cmanager-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("900m"), + corev1.ResourceMemory: resource.MustParse("640Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("5"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + cmData, ok := data["ClusterManager"].(map[string]interface{}) + if !ok { + t.Fatalf("expected ClusterManager data map") + } + crData, ok := cmData["test-cmanager"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "900m" || container["memory_request"] != "640Mi" || container["cpu_limit"] != "5" || container["memory_limit"] != "2Gi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryCollectCRTelData_ClusterMasterData(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApiV3.ClusterMaster{} + cr.TypeMeta.Kind = "ClusterMaster" + cr.ObjectMeta.Name = "test-cmaster" + cr.ObjectMeta.Namespace = "default" + crList := map[string][]splcommon.MetaObject{"ClusterMaster": {cr}} + sts := apps.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cmaster-sts", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{{ + UID: cr.GetUID(), + }}, + }, + Spec: apps.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1000m"), + corev1.ResourceMemory: resource.MustParse("768Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("6"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }}, + }, + }, + }, + } + fakeClient := &FakeListClient{ + sts: []apps.StatefulSet{sts}, + } + data := make(map[string]interface{}) + collectCRTelData(ctx, fakeClient, crList, data) + cmData, ok := data["ClusterMaster"].(map[string]interface{}) + if !ok { + t.Fatalf("expected ClusterMaster data map") + } + crData, ok := cmData["test-cmaster"].([]map[string]string) + if !ok || len(crData) == 0 { + t.Fatalf("expected resource data slice") + } + container := crData[0] + if container["cpu_request"] != "1" || container["memory_request"] != "768Mi" || container["cpu_limit"] != "6" || container["memory_limit"] != "4Gi" { + t.Errorf("unexpected resource values: got %+v", container) + } +} + +func TestTelemetryUpdateLastTransmissionTime_SetsTimestamp(t *testing.T) { + mockClient := spltest.NewMockClient() + ctx := context.TODO() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{}, + } + + err := updateLastTransmissionTime(ctx, mockClient, cm) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + statusStr, ok := cm.Data[telStatusKey] + if !ok { + t.Fatalf("expected telStatusKey in configmap data") + } + var status TelemetryStatus + if err := json.Unmarshal([]byte(statusStr), &status); err != nil { + t.Fatalf("failed to unmarshal status: %v", err) + } + if status.LastTransmission == "" { + t.Errorf("expected LastTransmission to be set") + } + if _, err := time.Parse(time.RFC3339, status.LastTransmission); err != nil { + t.Errorf("LastTransmission is not RFC3339: %v", status.LastTransmission) + } +} + +func TestTelemetryUpdateLastTransmissionTime_UpdateError(t *testing.T) { + ctx := context.TODO() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{}, + } + badClient := &errorUpdateClient{} + err := updateLastTransmissionTime(ctx, badClient, cm) + if err == nil { + t.Errorf("expected error from client.Update, got nil") + } +} + +func TestTelemetryUpdateLastTransmissionTime_RepeatedCalls(t *testing.T) { + mockClient := spltest.NewMockClient() + ctx := context.TODO() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, + Data: map[string]string{}, + } + err := updateLastTransmissionTime(ctx, mockClient, cm) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + firstStatus := cm.Data[telStatusKey] + time.Sleep(1 * time.Second) + err = updateLastTransmissionTime(ctx, mockClient, cm) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + secondStatus := cm.Data[telStatusKey] + if firstStatus == secondStatus { + t.Errorf("expected status to change on repeated call") + } +} + +// errorUpdateClient is a mock client that always returns an error on Update +// Used for testing updateLastTransmissionTime error handling +type errorUpdateClient struct { + spltest.MockClient +} + +func (c *errorUpdateClient) Update(_ context.Context, _ client.Object, _ ...client.UpdateOption) error { + return errors.New("forced update error") +} + +// FakeListClient is a local mock client that supports List for CRs and StatefulSets for testing +// Only implements List for the types needed in these tests +type FakeListClient struct { + spltest.MockClient + crs map[string][]client.Object + sts []apps.StatefulSet +} + +func (c *FakeListClient) List(_ context.Context, list client.ObjectList, _ ...client.ListOption) error { + switch l := list.(type) { + case *enterpriseApi.StandaloneList: + l.Items = nil + for _, obj := range c.crs["Standalone"] { + l.Items = append(l.Items, *(obj.(*enterpriseApi.Standalone))) + } + case *enterpriseApi.LicenseManagerList: + l.Items = nil + for _, obj := range c.crs["LicenseManager"] { + l.Items = append(l.Items, *(obj.(*enterpriseApi.LicenseManager))) + } + case *enterpriseApiV3.LicenseMasterList: + l.Items = nil + for _, obj := range c.crs["LicenseMaster"] { + l.Items = append(l.Items, *(obj.(*enterpriseApiV3.LicenseMaster))) + } + case *enterpriseApi.SearchHeadClusterList: + l.Items = nil + for _, obj := range c.crs["SearchHeadCluster"] { + l.Items = append(l.Items, *(obj.(*enterpriseApi.SearchHeadCluster))) + } + case *enterpriseApi.ClusterManagerList: + l.Items = nil + for _, obj := range c.crs["ClusterManager"] { + l.Items = append(l.Items, *(obj.(*enterpriseApi.ClusterManager))) + } + case *enterpriseApiV3.ClusterMasterList: + l.Items = nil + for _, obj := range c.crs["ClusterMaster"] { + l.Items = append(l.Items, *(obj.(*enterpriseApiV3.ClusterMaster))) + } + case *apps.StatefulSetList: + l.Items = c.sts + default: + return nil + } + return nil +} + +// Additional tests for error paths and success can be added with more advanced mocks. diff --git a/test/custom_resource_crud/custom_resource_crud_s1_test.go b/test/custom_resource_crud/custom_resource_crud_s1_test.go index 3747eeb4d..d8d26d4e7 100644 --- a/test/custom_resource_crud/custom_resource_crud_s1_test.go +++ b/test/custom_resource_crud/custom_resource_crud_s1_test.go @@ -71,6 +71,9 @@ var _ = Describe("Crcrud test for SVA S1", func() { // Verify Standalone goes to ready state testenv.StandaloneReady(ctx, deployment, deployment.GetName(), standalone, testcaseEnvInst) + // Verify telemetry is sent successfully + testenv.VerifyTelemetry(ctx, deployment) + // Deploy Monitoring Console CRD mc, err := deployment.DeployMonitoringConsole(ctx, deployment.GetName(), "") Expect(err).To(Succeed(), "Unable to deploy Monitoring Console One instance") diff --git a/test/testenv/deployment.go b/test/testenv/deployment.go index 85e753a84..263ea1147 100644 --- a/test/testenv/deployment.go +++ b/test/testenv/deployment.go @@ -1830,3 +1830,13 @@ func (d *Deployment) DeployMultisiteClusterMasterWithMonitoringConsole(ctx conte } return nil } + +// GetConfigMap retrieves a ConfigMap by name in the deployment's namespace. +func (d *Deployment) GetConfigMap(ctx context.Context, name string) (*corev1.ConfigMap, error) { + cm := &corev1.ConfigMap{} + err := d.testenv.GetKubeClient().Get(ctx, client.ObjectKey{Name: name, Namespace: d.testenv.namespace}, cm) + if err != nil { + return nil, err + } + return cm, nil +} diff --git a/test/testenv/verificationutils.go b/test/testenv/verificationutils.go index e5c734405..c8e863ef9 100644 --- a/test/testenv/verificationutils.go +++ b/test/testenv/verificationutils.go @@ -1213,3 +1213,29 @@ func VerifyFilesInDirectoryOnPod(ctx context.Context, deployment *Deployment, te }, deployment.GetTimeout(), PollInterval).Should(gomega.Equal(true)) } } + +// VerifyTelemetry checks that the telemetry ConfigMap has a non-empty lastTransmission field in its status key. +func VerifyTelemetry(ctx context.Context, deployment *Deployment) { + const ( + configMapName = "splunk-operator-manager-telemetry" + statusKey = "status" + ) + type telemetryStatus struct { + LastTransmission string `json:"lastTransmission"` + } + gomega.Eventually(func() bool { + cm, err := deployment.GetConfigMap(ctx, configMapName) + if err != nil { + return false + } + statusVal, ok := cm.Data[statusKey] + if !ok || statusVal == "" { + return false + } + var status telemetryStatus + if err := json.Unmarshal([]byte(statusVal), &status); err != nil { + return false + } + return status.LastTransmission != "" + }, deployment.GetTimeout(), PollInterval).Should(gomega.Equal(true)) +} From 656737bfcc155c86e8fdc08d3c9039a69b6c6c71 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Thu, 29 Jan 2026 13:02:52 -0800 Subject: [PATCH 02/16] Add more unit tests --- pkg/splunk/client/enterprise_test.go | 88 ++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/pkg/splunk/client/enterprise_test.go b/pkg/splunk/client/enterprise_test.go index 9850b17c5..c07c3b747 100644 --- a/pkg/splunk/client/enterprise_test.go +++ b/pkg/splunk/client/enterprise_test.go @@ -16,6 +16,7 @@ package client import ( + "bytes" "fmt" "net/http" "net/url" @@ -642,6 +643,93 @@ func TestSetIdxcSecret(t *testing.T) { splunkClientErrorTester(t, test) } +func TestGetLicenseInfo_Success(t *testing.T) { + wantRequest, _ := http.NewRequest("GET", "https://localhost:8089/services/licenser/licenses?count=0&output_mode=json", nil) + wantLicenseInfo := LicenseInfo{ + ID: "1234-5678-90AB-CDEF", + Type: "Enterprise", + } + test := func(c SplunkClient) error { + info, err := c.GetLicenseInfo() + if err != nil { + return err + } + if info.ID != wantLicenseInfo.ID || info.Type != wantLicenseInfo.Type { + t.Errorf("LicenseInfo = %+v; want %+v", info, wantLicenseInfo) + } + return nil + } + body := `{"entry":[{"content":{"guid":"1234-5678-90AB-CDEF","type":"Enterprise"}}]}` + splunkClientTester(t, "TestGetLicenseInfo", 200, body, wantRequest, test) + + // test body with no entries + test = func(c SplunkClient) error { + _, err := c.GetLicenseInfo() + if err == nil { + t.Errorf("GetLicenseInfo returned nil; want error") + } + return nil + } + body = `{"entry":[]}` + splunkClientTester(t, "TestGetLicenseInfo", 200, body, wantRequest, test) +} + +func TestGetLicenseInfo_Error(t *testing.T) { + wantRequest, _ := http.NewRequest("GET", "https://localhost:8089/services/licenser/licenses?count=0&output_mode=json", nil) + + test := func(c SplunkClient) error { + _, err := c.GetLicenseInfo() + if err == nil { + t.Errorf("GetLicenseInfo should return error for 500 response code") + } + return nil + } + + // Simulate a 500 error response from the mock client + splunkClientTester(t, "TestGetLicenseInfo_Error", 500, "", wantRequest, test) +} + +func TestSendTelemetry_Success(t *testing.T) { + path := "/services/telemetry/metrics" + bodyBytes := []byte(`{"metric":"value"}`) + wantRequest, _ := http.NewRequest("POST", "https://localhost:8089/services/telemetry/metrics", bytes.NewReader(bodyBytes)) + wantRequest.Header.Set("Content-Type", "application/json") + wantResponse := TelemetryResponse{ + Message: "Telemetry sent successfully", + MetricValueID: "abc123", + } + test := func(c SplunkClient) error { + resp, err := c.SendTelemetry(path, bodyBytes) + if err != nil { + return err + } + if resp.Message != wantResponse.Message || resp.MetricValueID != wantResponse.MetricValueID { + t.Errorf("SendTelemetry = %+v; want %+v", resp, wantResponse) + } + return nil + } + responseBody := `{"message":"Telemetry sent successfully","metricValueId":"abc123"}` + splunkClientTester(t, "TestSendTelemetry", 201, responseBody, wantRequest, test) +} + +func TestSendTelemetry_Error(t *testing.T) { + path := "/services/telemetry/metrics" + bodyBytes := []byte(`{"metric":"value"}`) + wantRequest, _ := http.NewRequest("POST", "https://localhost:8089/services/telemetry/metrics", bytes.NewReader(bodyBytes)) + wantRequest.Header.Set("Content-Type", "application/json") + + test := func(c SplunkClient) error { + _, err := c.SendTelemetry(path, bodyBytes) + if err == nil { + t.Errorf("SendTelemetry should return error for 500 response code") + } + return nil + } + + // Simulate a 500 error response from the mock client + splunkClientTester(t, "TestSendTelemetry_Error", 500, "", wantRequest, test) +} + func TestRestartSplunk(t *testing.T) { wantRequest, _ := http.NewRequest("POST", "https://localhost:8089/services/server/control/restart", nil) test := func(c SplunkClient) error { From 930784eb8810fdcdd63a27ff471d245ec52000e6 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Thu, 29 Jan 2026 23:06:27 -0800 Subject: [PATCH 03/16] fix test --- pkg/splunk/enterprise/telemetry.go | 5 ++- .../custom_resource_crud_s1_test.go | 3 +- test/testenv/verificationutils.go | 43 +++++++++++++------ 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go index 3a3152bf0..f46940e1e 100644 --- a/pkg/splunk/enterprise/telemetry.go +++ b/pkg/splunk/enterprise/telemetry.go @@ -347,7 +347,8 @@ func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr sp } serviceName := GetSplunkServiceName(instanceID, cr.GetName(), false) - scopedLog.Info("Got service name", "serviceName", serviceName) + serviceFQDN := splcommon.GetServiceFQDN(cr.GetNamespace(), serviceName) + scopedLog.Info("Got service FQDN", "serviceFQDN", serviceFQDN) defaultSecretObjName := splcommon.GetNamespaceScopedSecretName(cr.GetNamespace()) defaultSecret, err := splutil.GetSecretByName(ctx, client, cr.GetNamespace(), cr.GetName(), defaultSecretObjName) @@ -362,7 +363,7 @@ func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr sp scopedLog.Info("Failed to find admin password") return false } - splunkClient := splclient.NewSplunkClient(fmt.Sprintf("https://%s:8089", serviceName), "admin", string(adminPwd)) + splunkClient := splclient.NewSplunkClient(fmt.Sprintf("https://%s:8089", serviceFQDN), "admin", string(adminPwd)) var licenseInfo *splclient.LicenseInfo licenseInfo, err = splunkClient.GetLicenseInfo() diff --git a/test/custom_resource_crud/custom_resource_crud_s1_test.go b/test/custom_resource_crud/custom_resource_crud_s1_test.go index d8d26d4e7..ef3589171 100644 --- a/test/custom_resource_crud/custom_resource_crud_s1_test.go +++ b/test/custom_resource_crud/custom_resource_crud_s1_test.go @@ -65,6 +65,7 @@ var _ = Describe("Crcrud test for SVA S1", func() { // Deploy Standalone mcRef := deployment.GetName() + prevTelemetrySubmissionTime := testenv.GetTelemetryLastSubmissionTime(ctx, deployment) standalone, err := deployment.DeployStandalone(ctx, deployment.GetName(), mcRef, "") Expect(err).To(Succeed(), "Unable to deploy standalone instance") @@ -72,7 +73,7 @@ var _ = Describe("Crcrud test for SVA S1", func() { testenv.StandaloneReady(ctx, deployment, deployment.GetName(), standalone, testcaseEnvInst) // Verify telemetry is sent successfully - testenv.VerifyTelemetry(ctx, deployment) + testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) // Deploy Monitoring Console CRD mc, err := deployment.DeployMonitoringConsole(ctx, deployment.GetName(), "") diff --git a/test/testenv/verificationutils.go b/test/testenv/verificationutils.go index c8e863ef9..c451628ab 100644 --- a/test/testenv/verificationutils.go +++ b/test/testenv/verificationutils.go @@ -1214,8 +1214,7 @@ func VerifyFilesInDirectoryOnPod(ctx context.Context, deployment *Deployment, te } } -// VerifyTelemetry checks that the telemetry ConfigMap has a non-empty lastTransmission field in its status key. -func VerifyTelemetry(ctx context.Context, deployment *Deployment) { +func GetTelemetryLastSubmissionTime(ctx context.Context, deployment *Deployment) string { const ( configMapName = "splunk-operator-manager-telemetry" statusKey = "status" @@ -1223,19 +1222,35 @@ func VerifyTelemetry(ctx context.Context, deployment *Deployment) { type telemetryStatus struct { LastTransmission string `json:"lastTransmission"` } + cm, err := deployment.GetConfigMap(ctx, configMapName) + if err != nil { + logf.Log.Error(err, "GetTelemetryLastSubmissionTime: failed to retrieve configmap") + return "" + } + statusVal, ok := cm.Data[statusKey] + if !ok || statusVal == "" { + logf.Log.Info("GetTelemetryLastSubmissionTime: failed to retrieve status") + return "" + } + logf.Log.Info("GetTelemetryLastSubmissionTime: retrieved status", "status", statusVal) + + var status telemetryStatus + if err := json.Unmarshal([]byte(statusVal), &status); err != nil { + logf.Log.Error(err, "GetTelemetryLastSubmissionTime: failed to unmarshal status", "statusVal", statusVal) + return "" + } + return status.LastTransmission +} + +// VerifyTelemetry checks that the telemetry ConfigMap has a non-empty lastTransmission field in its status key. +func VerifyTelemetry(ctx context.Context, deployment *Deployment, prevVal string) { + logf.Log.Info("VerifyTelemetry: start") gomega.Eventually(func() bool { - cm, err := deployment.GetConfigMap(ctx, configMapName) - if err != nil { - return false - } - statusVal, ok := cm.Data[statusKey] - if !ok || statusVal == "" { - return false - } - var status telemetryStatus - if err := json.Unmarshal([]byte(statusVal), &status); err != nil { - return false + currentVal := GetTelemetryLastSubmissionTime(ctx, deployment) + if currentVal != "" && currentVal != prevVal { + logf.Log.Info("VerifyTelemetry: success", "previous", prevVal, "current", currentVal) + return true } - return status.LastTransmission != "" + return false }, deployment.GetTimeout(), PollInterval).Should(gomega.Equal(true)) } From 694e7666ce65628ba92ab189e956be9496947511 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Fri, 30 Jan 2026 11:45:43 -0800 Subject: [PATCH 04/16] Pass test mode as false in testing --- .../manager/controller_manager_telemetry.yaml | 1 + pkg/splunk/enterprise/telemetry.go | 53 ++++++++++++------- pkg/splunk/enterprise/telemetry_test.go | 20 +++---- .../custom_resource_crud_c3_test.go | 4 ++ .../custom_resource_crud_m4_test.go | 4 ++ .../custom_resource_crud_s1_test.go | 6 +-- test/trigger-tests.sh | 4 +- 7 files changed, 59 insertions(+), 33 deletions(-) diff --git a/config/manager/controller_manager_telemetry.yaml b/config/manager/controller_manager_telemetry.yaml index 0ed5a866b..ac26f3e73 100644 --- a/config/manager/controller_manager_telemetry.yaml +++ b/config/manager/controller_manager_telemetry.yaml @@ -6,4 +6,5 @@ data: status: | { "lastTransmission": "" + "test": "true" } \ No newline at end of file diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go index f46940e1e..b5357aafa 100644 --- a/pkg/splunk/enterprise/telemetry.go +++ b/pkg/splunk/enterprise/telemetry.go @@ -11,7 +11,6 @@ import ( splcommon "github.com/splunk/splunk-operator/pkg/splunk/common" splutil "github.com/splunk/splunk-operator/pkg/splunk/util" appsv1 "k8s.io/api/apps/v1" - "os" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -21,10 +20,8 @@ import ( ) const ( - // TODO: Should be set to one day for the release - requeAfterInSeconds = 30 - // TODO: Should change to false for the release - isTestMode = true + requeAfterInSeconds = 86400 // Send telemetry once a day + defaultTestMode = true // TODO: Ideally the version string should be set from the release tag SOK_VERSION = "3.0.0" @@ -42,7 +39,8 @@ type Telemetry struct { } type TelemetryStatus struct { - LastTransmission string `json:"lastTransmission"` + LastTransmission string `json:"lastTransmission,omitempty"` + Test string `json:"test,omitempty"` } func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap) (reconcile.Result, error) { @@ -75,9 +73,10 @@ func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm * // Now send the telemetry for _, crs := range crList { for _, cr := range crs { - success := SendTelemetry(ctx, client, cr, data) + test := isTest(ctx, cm) + success := SendTelemetry(ctx, client, cr, data, test) if success { - updateLastTransmissionTime(ctx, client, cm) + updateLastTransmissionTime(ctx, client, cm, test) return result, nil } } @@ -86,12 +85,17 @@ func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm * return result, errors.New("Failed to send telemetry data") } -func updateLastTransmissionTime(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap) error { +func updateLastTransmissionTime(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap, test bool) error { reqLogger := log.FromContext(ctx) scopedLog := reqLogger.WithName("updateLastTransmissionTime") var status TelemetryStatus status.LastTransmission = time.Now().UTC().Format(time.RFC3339) + if test { + status.Test = "true" + } else { + status.Test = "false" + } updated, err := json.MarshalIndent(status, "", " ") if err != nil { @@ -304,23 +308,34 @@ func CollectCMTelData(ctx context.Context, cm *corev1.ConfigMap, data map[string } } -func isTest(ctx context.Context) bool { +func isTest(ctx context.Context, cm *corev1.ConfigMap) bool { reqLogger := log.FromContext(ctx) scopedLog := reqLogger.WithName("checkTestMode") - // Retrieve SPLUNK_TEST_MODE environment variable - testModeStr := os.Getenv("SPLUNK_TEST_MODE") - if testModeStr == "1" { - scopedLog.Info("Test mode is enabled via SPLUNK_TEST_MODE env variable") - return true + if cm.Data != nil { + if val, ok := cm.Data[telStatusKey]; ok { + var status TelemetryStatus + err := json.Unmarshal([]byte(val), &status) + if err != nil { + scopedLog.Error(err, "Failed to unmarshal telemetry status") + return defaultTestMode + } else { + if status.Test == "true" { + scopedLog.Info("Test is true") + return true + } + scopedLog.Info("Test is false") + return false + } + } } - scopedLog.Info("Return test mode", "isTestMode", isTestMode) - return isTestMode + scopedLog.Info("Failed to retrieve test mode") + return defaultTestMode } // SendTelemetry is exported for testing -func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr splcommon.MetaObject, data map[string]interface{}) bool { +func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr splcommon.MetaObject, data map[string]interface{}, test bool) bool { reqLogger := log.FromContext(ctx) scopedLog := reqLogger.WithName("sendTelemetry").WithValues( "name", cr.GetObjectMeta().GetName(), @@ -378,7 +393,7 @@ func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr sp Component: "sok", OptInRequired: 2, Data: data, - Test: isTest(ctx), + Test: test, } path := fmt.Sprintf("/servicesNS/nobody/%s/telemetry-metric", telAppNameStr) diff --git a/pkg/splunk/enterprise/telemetry_test.go b/pkg/splunk/enterprise/telemetry_test.go index 76e614def..0e73df0ec 100644 --- a/pkg/splunk/enterprise/telemetry_test.go +++ b/pkg/splunk/enterprise/telemetry_test.go @@ -50,13 +50,10 @@ func TestApplyTelemetry_ConfigMapNoData(t *testing.T) { Data: map[string]string{}, } ctx := context.TODO() - result, err := ApplyTelemetry(ctx, mockClient, cm) + _, err := ApplyTelemetry(ctx, mockClient, cm) if err == nil { t.Errorf("expected error when no CRs present, got nil") } - if !result.Requeue { - t.Errorf("expected requeue to be true, got false") - } } func TestTelemetryCollectCMTelData_UnmarshalError(t *testing.T) { @@ -90,7 +87,7 @@ func TestTelemetryCollectCMTelData_ValidJSON(t *testing.T) { func TestSendTelemetry_UnknownKind(t *testing.T) { cr := &enterpriseApi.Standalone{} cr.TypeMeta.Kind = "UnknownKind" - ok := SendTelemetry(context.TODO(), spltest.NewMockClient(), cr, map[string]interface{}{}) + ok := SendTelemetry(context.TODO(), spltest.NewMockClient(), cr, map[string]interface{}{}, false) if ok { t.Errorf("expected SendTelemetry to return false for unknown kind") } @@ -101,7 +98,7 @@ func TestSendTelemetry_NoSecret(t *testing.T) { cr.TypeMeta.Kind = "Standalone" cr.ObjectMeta.Name = "test" cr.ObjectMeta.Namespace = "default" - ok := SendTelemetry(context.TODO(), spltest.NewMockClient(), cr, map[string]interface{}{}) + ok := SendTelemetry(context.TODO(), spltest.NewMockClient(), cr, map[string]interface{}{}, false) if ok { t.Errorf("expected SendTelemetry to return false if no secret found") } @@ -461,7 +458,7 @@ func TestTelemetryUpdateLastTransmissionTime_SetsTimestamp(t *testing.T) { Data: map[string]string{}, } - err := updateLastTransmissionTime(ctx, mockClient, cm) + err := updateLastTransmissionTime(ctx, mockClient, cm, false) if err != nil { t.Fatalf("expected no error, got: %v", err) } @@ -479,6 +476,9 @@ func TestTelemetryUpdateLastTransmissionTime_SetsTimestamp(t *testing.T) { if _, err := time.Parse(time.RFC3339, status.LastTransmission); err != nil { t.Errorf("LastTransmission is not RFC3339: %v", status.LastTransmission) } + if status.Test != "false" { + t.Errorf("expected Test to be 'false', got %v", status.Test) + } } func TestTelemetryUpdateLastTransmissionTime_UpdateError(t *testing.T) { @@ -488,7 +488,7 @@ func TestTelemetryUpdateLastTransmissionTime_UpdateError(t *testing.T) { Data: map[string]string{}, } badClient := &errorUpdateClient{} - err := updateLastTransmissionTime(ctx, badClient, cm) + err := updateLastTransmissionTime(ctx, badClient, cm, false) if err == nil { t.Errorf("expected error from client.Update, got nil") } @@ -501,13 +501,13 @@ func TestTelemetryUpdateLastTransmissionTime_RepeatedCalls(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, Data: map[string]string{}, } - err := updateLastTransmissionTime(ctx, mockClient, cm) + err := updateLastTransmissionTime(ctx, mockClient, cm, false) if err != nil { t.Fatalf("expected no error, got: %v", err) } firstStatus := cm.Data[telStatusKey] time.Sleep(1 * time.Second) - err = updateLastTransmissionTime(ctx, mockClient, cm) + err = updateLastTransmissionTime(ctx, mockClient, cm, false) if err != nil { t.Fatalf("expected no error, got: %v", err) } diff --git a/test/custom_resource_crud/custom_resource_crud_c3_test.go b/test/custom_resource_crud/custom_resource_crud_c3_test.go index 5ec5f4f12..45d896b62 100644 --- a/test/custom_resource_crud/custom_resource_crud_c3_test.go +++ b/test/custom_resource_crud/custom_resource_crud_c3_test.go @@ -69,6 +69,7 @@ var _ = Describe("Crcrud test for SVA C3", func() { // Deploy Single site Cluster and Search Head Clusters mcRef := deployment.GetName() + prevTelemetrySubmissionTime := testenv.GetTelemetryLastSubmissionTime(ctx, deployment) err := deployment.DeploySingleSiteCluster(ctx, deployment.GetName(), 3, true /*shc*/, mcRef) Expect(err).To(Succeed(), "Unable to deploy cluster") @@ -98,6 +99,9 @@ var _ = Describe("Crcrud test for SVA C3", func() { testenv.VerifyCPULimits(deployment, testcaseEnvInst.GetName(), indexerPodName, defaultCPULimits) } + // Verify telemetry is sent successfully + testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) + // Change CPU limits to trigger CR update idxc := &enterpriseApi.IndexerCluster{} instanceName := fmt.Sprintf("%s-idxc", deployment.GetName()) diff --git a/test/custom_resource_crud/custom_resource_crud_m4_test.go b/test/custom_resource_crud/custom_resource_crud_m4_test.go index 3f5af549d..a00472fb0 100644 --- a/test/custom_resource_crud/custom_resource_crud_m4_test.go +++ b/test/custom_resource_crud/custom_resource_crud_m4_test.go @@ -65,6 +65,7 @@ var _ = Describe("Crcrud test for SVA M4", func() { // Deploy Multisite Cluster and Search Head Clusters mcRef := deployment.GetName() + prevTelemetrySubmissionTime := testenv.GetTelemetryLastSubmissionTime(ctx, deployment) siteCount := 3 err := deployment.DeployMultisiteClusterMasterWithSearchHead(ctx, deployment.GetName(), 1, siteCount, mcRef) Expect(err).To(Succeed(), "Unable to deploy cluster") @@ -97,6 +98,9 @@ var _ = Describe("Crcrud test for SVA M4", func() { testenv.VerifyCPULimits(deployment, testcaseEnvInst.GetName(), podName, defaultCPULimits) } + // Verify telemetry is sent successfully + testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) + // Change CPU limits to trigger CR update idxc := &enterpriseApi.IndexerCluster{} for i := 1; i <= siteCount; i++ { diff --git a/test/custom_resource_crud/custom_resource_crud_s1_test.go b/test/custom_resource_crud/custom_resource_crud_s1_test.go index ef3589171..9cca04c2b 100644 --- a/test/custom_resource_crud/custom_resource_crud_s1_test.go +++ b/test/custom_resource_crud/custom_resource_crud_s1_test.go @@ -72,9 +72,6 @@ var _ = Describe("Crcrud test for SVA S1", func() { // Verify Standalone goes to ready state testenv.StandaloneReady(ctx, deployment, deployment.GetName(), standalone, testcaseEnvInst) - // Verify telemetry is sent successfully - testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) - // Deploy Monitoring Console CRD mc, err := deployment.DeployMonitoringConsole(ctx, deployment.GetName(), "") Expect(err).To(Succeed(), "Unable to deploy Monitoring Console One instance") @@ -86,6 +83,9 @@ var _ = Describe("Crcrud test for SVA S1", func() { standalonePodName := fmt.Sprintf(testenv.StandalonePod, deployment.GetName(), 0) testenv.VerifyCPULimits(deployment, testcaseEnvInst.GetName(), standalonePodName, defaultCPULimits) + // Verify telemetry is sent successfully + testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) + // Change CPU limits to trigger CR update standalone.Spec.Resources.Limits = corev1.ResourceList{ "cpu": resource.MustParse(newCPULimits), diff --git a/test/trigger-tests.sh b/test/trigger-tests.sh index dc967546d..6cfbd1eb5 100644 --- a/test/trigger-tests.sh +++ b/test/trigger-tests.sh @@ -141,7 +141,9 @@ if [[ -z "${DEBUG}" ]]; then export DEBUG="${DEBUG_RUN}" fi - +# Always set telemetry test to true before running tests +echo "Setting telemetry test to true" +kubectl patch configmap splunk-operator-manager-telemetry --type merge -p '{"data":{"status":"{\"test\":\"true\",\"lastTransmission\":\"\"}"}}' echo "Skipping following test :: ${TEST_TO_SKIP}" From a01170dae1e609af3e3db1ac1739c968bf5ce347 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Fri, 30 Jan 2026 14:15:42 -0800 Subject: [PATCH 05/16] fix --- test/trigger-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/trigger-tests.sh b/test/trigger-tests.sh index 6cfbd1eb5..b04698e0c 100644 --- a/test/trigger-tests.sh +++ b/test/trigger-tests.sh @@ -143,7 +143,7 @@ fi # Always set telemetry test to true before running tests echo "Setting telemetry test to true" -kubectl patch configmap splunk-operator-manager-telemetry --type merge -p '{"data":{"status":"{\"test\":\"true\",\"lastTransmission\":\"\"}"}}' +kubectl patch configmap splunk-operator-manager-telemetry -n splunk-operator --type merge -p '{"data":{"status":"{\"test\":\"true\",\"lastTransmission\":\"\"}"}}' echo "Skipping following test :: ${TEST_TO_SKIP}" From 7092e25cac099e58e6010d6b140c8035f440670c Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Fri, 30 Jan 2026 19:26:39 -0800 Subject: [PATCH 06/16] cleanup --- internal/controller/telemetry_controller.go | 5 +- pkg/splunk/enterprise/telemetry.go | 70 +++++++++---------- pkg/splunk/enterprise/telemetry_test.go | 16 ++++- .../custom_resource_crud_c3_test.go | 6 +- .../custom_resource_crud_m4_test.go | 6 +- .../custom_resource_crud_s1_test.go | 6 +- 6 files changed, 58 insertions(+), 51 deletions(-) diff --git a/internal/controller/telemetry_controller.go b/internal/controller/telemetry_controller.go index 8f49faa00..e81698823 100644 --- a/internal/controller/telemetry_controller.go +++ b/internal/controller/telemetry_controller.go @@ -40,11 +40,9 @@ const ( ConfigMapNamePrefix = "splunk-operator-" ConfigMapLabelName = "splunk-operator" - telemetryRetryDelay = time.Second * 60 + telemetryRetryDelay = time.Second * 600 ) -// TelemetryReconciler periodically reads all keys under the "telemetry" configmap -// in the Splunk operator namespace and logs all key values. type TelemetryReconciler struct { client.Client Scheme *runtime.Scheme @@ -72,7 +70,6 @@ func (r *TelemetryReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, errors.Wrap(err, "could not load telemetry configmap") } - // Log all key/value pairs. No sorting per your request. if len(cm.Data) == 0 { reqLogger.Info("telemetry configmap has no data keys") return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go index b5357aafa..92d4f7391 100644 --- a/pkg/splunk/enterprise/telemetry.go +++ b/pkg/splunk/enterprise/telemetry.go @@ -63,15 +63,17 @@ func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm * // Add SOK version data[telSOKVersionKey] = SOK_VERSION - // Add per CR telemetry - crList := getAllCustomResources(ctx, client) - + // Add SOK telemetry + crWithTelAppList, crList := getAllCustomResources(ctx, client) collectCRTelData(ctx, client, crList, data) - // Add telemetry set in this configmap, i.e splunk POD's telemetry + /* + * Add other component's telemetry set in splunk-operator-manager-telemetry configmap. + * i.e splunk POD's telemetry + */ CollectCMTelData(ctx, cm, data) // Now send the telemetry - for _, crs := range crList { + for _, crs := range crWithTelAppList { for _, cr := range crs { test := isTest(ctx, cm) success := SendTelemetry(ctx, client, cr, data, test) @@ -112,15 +114,15 @@ func updateLastTransmissionTime(ctx context.Context, client splcommon.Controller return nil } -func getAllCustomResources(ctx context.Context, client splcommon.ControllerClient) map[string][]splcommon.MetaObject { +func getAllCustomResources(ctx context.Context, client splcommon.ControllerClient) (map[string][]splcommon.MetaObject, map[string][]splcommon.MetaObject) { reqLogger := log.FromContext(ctx) scopedLog := reqLogger.WithName("collectCRTelData") var crList map[string][]splcommon.MetaObject crList = make(map[string][]splcommon.MetaObject) - //var instanceID InstanceType - //var telAppName string + var crWithTelAppList map[string][]splcommon.MetaObject + crWithTelAppList = make(map[string][]splcommon.MetaObject) var err error var standaloneList enterpriseApi.StandaloneList @@ -132,65 +134,67 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien } else if len(standaloneList.Items) > 0 { crList[standaloneList.Items[0].Kind] = make([]splcommon.MetaObject, 0) for _, cr := range standaloneList.Items { - if !cr.Status.TelAppInstalled { - scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) - continue + if cr.Status.TelAppInstalled { + crWithTelAppList[standaloneList.Items[0].Kind] = append(crWithTelAppList[standaloneList.Items[0].Kind], &cr) } crList[standaloneList.Items[0].Kind] = append(crList[standaloneList.Items[0].Kind], &cr) } } var lmanagerList enterpriseApi.LicenseManagerList - //instanceID = SplunkLicenseManager - //telAppName = fmt.Sprintf(telAppNameTemplateStr, "lmanager") err = client.List(ctx, &lmanagerList) if err != nil { scopedLog.Error(err, "Failed to list LicenseManager objects") } else if len(lmanagerList.Items) > 0 { crList[lmanagerList.Items[0].Kind] = make([]splcommon.MetaObject, 0) for _, cr := range lmanagerList.Items { - if !cr.Status.TelAppInstalled { - scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) - continue + if cr.Status.TelAppInstalled { + crWithTelAppList[lmanagerList.Items[0].Kind] = append(crWithTelAppList[lmanagerList.Items[0].Kind], &cr) } crList[lmanagerList.Items[0].Kind] = append(crList[lmanagerList.Items[0].Kind], &cr) } } var lmasterList enterpriseApiV3.LicenseMasterList - //instanceID = SplunkLicenseMaster - //telAppName = fmt.Sprintf(telAppNameTemplateStr, "lmaster") err = client.List(ctx, &lmasterList) if err != nil { scopedLog.Error(err, "Failed to list LicenseMaster objects") } else if len(lmasterList.Items) > 0 { crList[lmasterList.Items[0].Kind] = make([]splcommon.MetaObject, 0) for _, cr := range lmasterList.Items { - if !cr.Status.TelAppInstalled { - scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) - continue + if cr.Status.TelAppInstalled { + crWithTelAppList[lmasterList.Items[0].Kind] = append(crWithTelAppList[lmasterList.Items[0].Kind], &cr) } crList[lmasterList.Items[0].Kind] = append(crList[lmasterList.Items[0].Kind], &cr) } } var shcList enterpriseApi.SearchHeadClusterList - //instanceID = SplunkSearchHead - //telAppName = fmt.Sprintf(telAppNameTemplateStr, "shc") err = client.List(ctx, &shcList) if err != nil { scopedLog.Error(err, "Failed to list SearchHeadCluster objects") } else if len(shcList.Items) > 0 { crList[shcList.Items[0].Kind] = make([]splcommon.MetaObject, 0) for _, cr := range shcList.Items { - if !cr.Status.TelAppInstalled { - scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) - continue + if cr.Status.TelAppInstalled { + crWithTelAppList[shcList.Items[0].Kind] = append(crWithTelAppList[shcList.Items[0].Kind], &cr) } crList[shcList.Items[0].Kind] = append(crList[shcList.Items[0].Kind], &cr) } } + var idxList enterpriseApi.IndexerClusterList + err = client.List(ctx, &idxList) + if err != nil { + scopedLog.Error(err, "Failed to list IndexerCluster objects") + } else if len(idxList.Items) > 0 { + crList[idxList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + for _, cr := range idxList.Items { + // IndexerCluster does not have telemetry app installed + crList[idxList.Items[0].Kind] = append(crList[idxList.Items[0].Kind], &cr) + } + } + var cmanagerList enterpriseApi.ClusterManagerList err = client.List(ctx, &cmanagerList) if err != nil { @@ -198,9 +202,8 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien } else if len(cmanagerList.Items) > 0 { crList[cmanagerList.Items[0].Kind] = make([]splcommon.MetaObject, 0) for _, cr := range cmanagerList.Items { - if !cr.Status.TelAppInstalled { - scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) - continue + if cr.Status.TelAppInstalled { + crWithTelAppList[cmanagerList.Items[0].Kind] = append(crWithTelAppList[cmanagerList.Items[0].Kind], &cr) } crList[cmanagerList.Items[0].Kind] = append(crList[cmanagerList.Items[0].Kind], &cr) } @@ -213,15 +216,14 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien } else if len(cmasterList.Items) > 0 { crList[cmasterList.Items[0].Kind] = make([]splcommon.MetaObject, 0) for _, cr := range cmasterList.Items { - if !cr.Status.TelAppInstalled { - scopedLog.Info("Skipping telemetry for this CR as tel app is not installed", "kind", cr.Kind, "name", cr.Name) - continue + if cr.Status.TelAppInstalled { + crWithTelAppList[cmasterList.Items[0].Kind] = append(crWithTelAppList[cmasterList.Items[0].Kind], &cr) } crList[cmasterList.Items[0].Kind] = append(crList[cmasterList.Items[0].Kind], &cr) } } - return crList + return crWithTelAppList, crList } func getOwnedStatefulSets( @@ -286,7 +288,6 @@ func collectCRTelData(ctx context.Context, client splcommon.ControllerClient, cr } } -// CollectCMTelData is exported for testing func CollectCMTelData(ctx context.Context, cm *corev1.ConfigMap, data map[string]interface{}) { reqLogger := log.FromContext(ctx) scopedLog := reqLogger.WithName("collectCMTelData") @@ -334,7 +335,6 @@ func isTest(ctx context.Context, cm *corev1.ConfigMap) bool { return defaultTestMode } -// SendTelemetry is exported for testing func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr splcommon.MetaObject, data map[string]interface{}, test bool) bool { reqLogger := log.FromContext(ctx) scopedLog := reqLogger.WithName("sendTelemetry").WithValues( diff --git a/pkg/splunk/enterprise/telemetry_test.go b/pkg/splunk/enterprise/telemetry_test.go index 0e73df0ec..73f7c7a9d 100644 --- a/pkg/splunk/enterprise/telemetry_test.go +++ b/pkg/splunk/enterprise/telemetry_test.go @@ -23,7 +23,10 @@ import ( func TestTelemetryGetAllCustomResources_Empty(t *testing.T) { mockClient := spltest.NewMockClient() ctx := context.TODO() - crMap := getAllCustomResources(ctx, mockClient) + crWithTelAppList, crMap := getAllCustomResources(ctx, mockClient) + if len(crWithTelAppList) != 0 { + t.Errorf("expected no CRs with telemetry app, got %d", len(crWithTelAppList)) + } if len(crMap) != 0 { t.Errorf("expected no CRs, got %d", len(crMap)) } @@ -112,18 +115,20 @@ func TestTelemetryGetAllCustomResources_AllKinds(t *testing.T) { "LicenseManager": {&enterpriseApi.LicenseManager{TypeMeta: metav1.TypeMeta{Kind: "LicenseManager"}, ObjectMeta: metav1.ObjectMeta{Name: "test-licensemanager"}}}, "LicenseMaster": {&enterpriseApiV3.LicenseMaster{TypeMeta: metav1.TypeMeta{Kind: "LicenseMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-licensemaster"}}}, "SearchHeadCluster": {&enterpriseApi.SearchHeadCluster{TypeMeta: metav1.TypeMeta{Kind: "SearchHeadCluster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-shc"}}}, + "IndexerCluster": {&enterpriseApi.IndexerCluster{TypeMeta: metav1.TypeMeta{Kind: "IndexerCluster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-idx"}}}, "ClusterManager": {&enterpriseApi.ClusterManager{TypeMeta: metav1.TypeMeta{Kind: "ClusterManager"}, ObjectMeta: metav1.ObjectMeta{Name: "test-cmanager"}}}, "ClusterMaster": {&enterpriseApiV3.ClusterMaster{TypeMeta: metav1.TypeMeta{Kind: "ClusterMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-cmaster"}}}, }, sts: []apps.StatefulSet{}, // ensure all keys are present } - crMap := getAllCustomResources(ctx, fakeClient) - kinds := []string{"Standalone", "LicenseManager", "LicenseMaster", "SearchHeadCluster", "ClusterManager", "ClusterMaster"} + _, crMap := getAllCustomResources(ctx, fakeClient) + kinds := []string{"Standalone", "LicenseManager", "LicenseMaster", "SearchHeadCluster", "IndexerCluster", "ClusterManager", "ClusterMaster"} for _, kind := range kinds { if _, ok := crMap[kind]; !ok { t.Errorf("expected kind %s in CR map", kind) } } + // crWithTelAppList may be empty if TelAppInstalled is not set in the test CRs } func TestTelemetryCollectCRTelData_StandaloneData(t *testing.T) { @@ -557,6 +562,11 @@ func (c *FakeListClient) List(_ context.Context, list client.ObjectList, _ ...cl for _, obj := range c.crs["SearchHeadCluster"] { l.Items = append(l.Items, *(obj.(*enterpriseApi.SearchHeadCluster))) } + case *enterpriseApi.IndexerClusterList: + l.Items = nil + for _, obj := range c.crs["IndexerCluster"] { + l.Items = append(l.Items, *(obj.(*enterpriseApi.IndexerCluster))) + } case *enterpriseApi.ClusterManagerList: l.Items = nil for _, obj := range c.crs["ClusterManager"] { diff --git a/test/custom_resource_crud/custom_resource_crud_c3_test.go b/test/custom_resource_crud/custom_resource_crud_c3_test.go index 45d896b62..8e62938d1 100644 --- a/test/custom_resource_crud/custom_resource_crud_c3_test.go +++ b/test/custom_resource_crud/custom_resource_crud_c3_test.go @@ -82,6 +82,9 @@ var _ = Describe("Crcrud test for SVA C3", func() { // Ensure Indexers go to Ready phase testenv.SingleSiteIndexersReady(ctx, deployment, testcaseEnvInst) + // Verify telemetry is sent successfully + testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) + // Deploy Monitoring Console CRD mc, err := deployment.DeployMonitoringConsole(ctx, mcRef, "") Expect(err).To(Succeed(), "Unable to deploy Monitoring Console One instance") @@ -99,9 +102,6 @@ var _ = Describe("Crcrud test for SVA C3", func() { testenv.VerifyCPULimits(deployment, testcaseEnvInst.GetName(), indexerPodName, defaultCPULimits) } - // Verify telemetry is sent successfully - testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) - // Change CPU limits to trigger CR update idxc := &enterpriseApi.IndexerCluster{} instanceName := fmt.Sprintf("%s-idxc", deployment.GetName()) diff --git a/test/custom_resource_crud/custom_resource_crud_m4_test.go b/test/custom_resource_crud/custom_resource_crud_m4_test.go index a00472fb0..76938e3dd 100644 --- a/test/custom_resource_crud/custom_resource_crud_m4_test.go +++ b/test/custom_resource_crud/custom_resource_crud_m4_test.go @@ -82,6 +82,9 @@ var _ = Describe("Crcrud test for SVA M4", func() { // Ensure search head cluster go to Ready phase testenv.SearchHeadClusterReady(ctx, deployment, testcaseEnvInst) + // Verify telemetry is sent successfully + testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) + // Deploy Monitoring Console CRD mc, err := deployment.DeployMonitoringConsole(ctx, mcRef, "") Expect(err).To(Succeed(), "Unable to deploy Monitoring Console One instance") @@ -98,9 +101,6 @@ var _ = Describe("Crcrud test for SVA M4", func() { testenv.VerifyCPULimits(deployment, testcaseEnvInst.GetName(), podName, defaultCPULimits) } - // Verify telemetry is sent successfully - testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) - // Change CPU limits to trigger CR update idxc := &enterpriseApi.IndexerCluster{} for i := 1; i <= siteCount; i++ { diff --git a/test/custom_resource_crud/custom_resource_crud_s1_test.go b/test/custom_resource_crud/custom_resource_crud_s1_test.go index 9cca04c2b..ef3589171 100644 --- a/test/custom_resource_crud/custom_resource_crud_s1_test.go +++ b/test/custom_resource_crud/custom_resource_crud_s1_test.go @@ -72,6 +72,9 @@ var _ = Describe("Crcrud test for SVA S1", func() { // Verify Standalone goes to ready state testenv.StandaloneReady(ctx, deployment, deployment.GetName(), standalone, testcaseEnvInst) + // Verify telemetry is sent successfully + testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) + // Deploy Monitoring Console CRD mc, err := deployment.DeployMonitoringConsole(ctx, deployment.GetName(), "") Expect(err).To(Succeed(), "Unable to deploy Monitoring Console One instance") @@ -83,9 +86,6 @@ var _ = Describe("Crcrud test for SVA S1", func() { standalonePodName := fmt.Sprintf(testenv.StandalonePod, deployment.GetName(), 0) testenv.VerifyCPULimits(deployment, testcaseEnvInst.GetName(), standalonePodName, defaultCPULimits) - // Verify telemetry is sent successfully - testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) - // Change CPU limits to trigger CR update standalone.Spec.Resources.Limits = corev1.ResourceList{ "cpu": resource.MustParse(newCPULimits), From f7c5c881ba20f6eedf3664ae2d85085e8abbc959 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Tue, 3 Feb 2026 21:15:03 -0800 Subject: [PATCH 07/16] Set value for test and sokVersion --- .github/workflows/pre-release-workflow.yml | 18 ++++++- Makefile | 3 +- .../manager/controller_manager_telemetry.yaml | 1 + pkg/splunk/enterprise/telemetry.go | 49 +++++++++---------- pkg/splunk/enterprise/telemetry_test.go | 25 +++++----- 5 files changed, 58 insertions(+), 38 deletions(-) diff --git a/.github/workflows/pre-release-workflow.yml b/.github/workflows/pre-release-workflow.yml index b5b48bacc..608c165b2 100644 --- a/.github/workflows/pre-release-workflow.yml +++ b/.github/workflows/pre-release-workflow.yml @@ -232,6 +232,22 @@ jobs: replace: "SPLUNK_ENTERPRISE_IMAGE" include: "config/default/kustomization.yaml" + - name: Update Telemetry Test Value + uses: jacobtomlinson/gha-find-replace@v3 + with: + find: '"test"\s*:\s*"[^"]*"' + replace: '"test": "false"' + isRegexp: true + include: 'config/manager/controller_manager_telemetry.yaml' + + - name: Update sokVersion in controller_manager_telemetry.yaml + uses: jacobtomlinson/gha-find-replace@v3 + with: + find: '"sokVersion"\s*:\s*"[^"]*"' + replace: '"sokVersion": "${{ github.event.inputs.release_version }}"' + isRegexp: true + include: 'config/manager/controller_manager_telemetry.yaml' + - name: Reset files before creating Pull Request run: | git checkout go.sum @@ -249,4 +265,4 @@ jobs: body: | ### Automated Pull Request for Splunk Operator Release ${{ github.event.inputs.release_version }} * Changes added to docs/ChangeLog-NEW.md. Please filter and update ChangeLog.md - * Delete ChangeLog-New.md \ No newline at end of file + * Delete ChangeLog-New.md diff --git a/Makefile b/Makefile index d5f06bdd1..a8c330696 100644 --- a/Makefile +++ b/Makefile @@ -206,6 +206,7 @@ deploy: manifests kustomize uninstall ## Deploy controller to the K8s cluster sp $(SED) "s/value: WATCH_NAMESPACE_VALUE/value: \"${WATCH_NAMESPACE}\"/g" config/${ENVIRONMENT}/kustomization.yaml $(SED) "s|SPLUNK_ENTERPRISE_IMAGE|${SPLUNK_ENTERPRISE_IMAGE}|g" config/${ENVIRONMENT}/kustomization.yaml $(SED) "s/value: SPLUNK_GENERAL_TERMS_VALUE/value: \"${SPLUNK_GENERAL_TERMS}\"/g" config/${ENVIRONMENT}/kustomization.yaml + $(SED) 's/\("sokVersion": \)"[^"]*"/\1"$(VERSION)"/' config/manager/controller_manager_telemetry.yaml cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} RELATED_IMAGE_SPLUNK_ENTERPRISE=${SPLUNK_ENTERPRISE_IMAGE} WATCH_NAMESPACE=${WATCH_NAMESPACE} SPLUNK_GENERAL_TERMS=${SPLUNK_GENERAL_TERMS} $(KUSTOMIZE) build config/${ENVIRONMENT} | kubectl apply --server-side --force-conflicts -f - $(SED) "s/namespace: ${NAMESPACE}/namespace: splunk-operator/g" config/${ENVIRONMENT}/kustomization.yaml @@ -428,4 +429,4 @@ setup/ginkgo: build-installer: manifests generate kustomize mkdir -p dist cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} - $(KUSTOMIZE) build config/default > dist/install.yaml \ No newline at end of file + diff --git a/config/manager/controller_manager_telemetry.yaml b/config/manager/controller_manager_telemetry.yaml index ac26f3e73..b026c2c66 100644 --- a/config/manager/controller_manager_telemetry.yaml +++ b/config/manager/controller_manager_telemetry.yaml @@ -7,4 +7,5 @@ data: { "lastTransmission": "" "test": "true" + "sokVersion": "3.0.0" } \ No newline at end of file diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go index 92d4f7391..9c5c5d8ed 100644 --- a/pkg/splunk/enterprise/telemetry.go +++ b/pkg/splunk/enterprise/telemetry.go @@ -21,9 +21,8 @@ import ( const ( requeAfterInSeconds = 86400 // Send telemetry once a day - defaultTestMode = true - // TODO: Ideally the version string should be set from the release tag - SOK_VERSION = "3.0.0" + defaultTestMode = "false" + defaultTestVersion = "unknown" telStatusKey = "status" ) @@ -41,6 +40,7 @@ type Telemetry struct { type TelemetryStatus struct { LastTransmission string `json:"lastTransmission,omitempty"` Test string `json:"test,omitempty"` + SokVersion string `json:"sokVersion,omitempty"` } func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap) (reconcile.Result, error) { @@ -61,8 +61,9 @@ func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm * var data map[string]interface{} data = make(map[string]interface{}) + currentStatus := getCurrentStatus(ctx, cm) // Add SOK version - data[telSOKVersionKey] = SOK_VERSION + data[telSOKVersionKey] = currentStatus.SokVersion // Add SOK telemetry crWithTelAppList, crList := getAllCustomResources(ctx, client) collectCRTelData(ctx, client, crList, data) @@ -75,10 +76,13 @@ func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm * // Now send the telemetry for _, crs := range crWithTelAppList { for _, cr := range crs { - test := isTest(ctx, cm) + test := false + if currentStatus.Test == "true" { + test = true + } success := SendTelemetry(ctx, client, cr, data, test) if success { - updateLastTransmissionTime(ctx, client, cm, test) + updateLastTransmissionTime(ctx, client, cm, currentStatus) return result, nil } } @@ -87,18 +91,11 @@ func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm * return result, errors.New("Failed to send telemetry data") } -func updateLastTransmissionTime(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap, test bool) error { +func updateLastTransmissionTime(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap, status *TelemetryStatus) error { reqLogger := log.FromContext(ctx) scopedLog := reqLogger.WithName("updateLastTransmissionTime") - var status TelemetryStatus status.LastTransmission = time.Now().UTC().Format(time.RFC3339) - if test { - status.Test = "true" - } else { - status.Test = "false" - } - updated, err := json.MarshalIndent(status, "", " ") if err != nil { scopedLog.Error(err, "Failed to marshal telemetry status") @@ -309,30 +306,32 @@ func CollectCMTelData(ctx context.Context, cm *corev1.ConfigMap, data map[string } } -func isTest(ctx context.Context, cm *corev1.ConfigMap) bool { +func getCurrentStatus(ctx context.Context, cm *corev1.ConfigMap) *TelemetryStatus { reqLogger := log.FromContext(ctx) - scopedLog := reqLogger.WithName("checkTestMode") + scopedLog := reqLogger.WithName("getCurrentStatus") + defaultStatus := &TelemetryStatus{ + LastTransmission: "", + Test: defaultTestMode, + SokVersion: defaultTestVersion, + } + defaultStatus.LastTransmission = "" + defaultStatus.Test = "true" if cm.Data != nil { if val, ok := cm.Data[telStatusKey]; ok { var status TelemetryStatus err := json.Unmarshal([]byte(val), &status) if err != nil { scopedLog.Error(err, "Failed to unmarshal telemetry status") - return defaultTestMode + return defaultStatus } else { - if status.Test == "true" { - scopedLog.Info("Test is true") - return true - } - scopedLog.Info("Test is false") - return false + return defaultStatus } } } - scopedLog.Info("Failed to retrieve test mode") - return defaultTestMode + scopedLog.Info("Failed") + return defaultStatus } func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr splcommon.MetaObject, data map[string]interface{}, test bool) bool { diff --git a/pkg/splunk/enterprise/telemetry_test.go b/pkg/splunk/enterprise/telemetry_test.go index 73f7c7a9d..1ce1b178b 100644 --- a/pkg/splunk/enterprise/telemetry_test.go +++ b/pkg/splunk/enterprise/telemetry_test.go @@ -462,8 +462,9 @@ func TestTelemetryUpdateLastTransmissionTime_SetsTimestamp(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, Data: map[string]string{}, } + status := &TelemetryStatus{Test: "false"} - err := updateLastTransmissionTime(ctx, mockClient, cm, false) + err := updateLastTransmissionTime(ctx, mockClient, cm, status) if err != nil { t.Fatalf("expected no error, got: %v", err) } @@ -471,18 +472,18 @@ func TestTelemetryUpdateLastTransmissionTime_SetsTimestamp(t *testing.T) { if !ok { t.Fatalf("expected telStatusKey in configmap data") } - var status TelemetryStatus - if err := json.Unmarshal([]byte(statusStr), &status); err != nil { + var statusObj TelemetryStatus + if err := json.Unmarshal([]byte(statusStr), &statusObj); err != nil { t.Fatalf("failed to unmarshal status: %v", err) } - if status.LastTransmission == "" { + if statusObj.LastTransmission == "" { t.Errorf("expected LastTransmission to be set") } - if _, err := time.Parse(time.RFC3339, status.LastTransmission); err != nil { - t.Errorf("LastTransmission is not RFC3339: %v", status.LastTransmission) + if _, err := time.Parse(time.RFC3339, statusObj.LastTransmission); err != nil { + t.Errorf("LastTransmission is not RFC3339: %v", statusObj.LastTransmission) } - if status.Test != "false" { - t.Errorf("expected Test to be 'false', got %v", status.Test) + if statusObj.Test != "false" { + t.Errorf("expected Test to be 'false', got %v", statusObj.Test) } } @@ -493,7 +494,8 @@ func TestTelemetryUpdateLastTransmissionTime_UpdateError(t *testing.T) { Data: map[string]string{}, } badClient := &errorUpdateClient{} - err := updateLastTransmissionTime(ctx, badClient, cm, false) + status := &TelemetryStatus{Test: "false"} + err := updateLastTransmissionTime(ctx, badClient, cm, status) if err == nil { t.Errorf("expected error from client.Update, got nil") } @@ -506,13 +508,14 @@ func TestTelemetryUpdateLastTransmissionTime_RepeatedCalls(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, Data: map[string]string{}, } - err := updateLastTransmissionTime(ctx, mockClient, cm, false) + status := &TelemetryStatus{Test: "false"} + err := updateLastTransmissionTime(ctx, mockClient, cm, status) if err != nil { t.Fatalf("expected no error, got: %v", err) } firstStatus := cm.Data[telStatusKey] time.Sleep(1 * time.Second) - err = updateLastTransmissionTime(ctx, mockClient, cm, false) + err = updateLastTransmissionTime(ctx, mockClient, cm, status) if err != nil { t.Fatalf("expected no error, got: %v", err) } From bcf5434124709170b102b9c306240e6e680a3ae7 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Tue, 3 Feb 2026 21:33:54 -0800 Subject: [PATCH 08/16] Address some comments --- internal/controller/telemetry_controller.go | 9 ++++----- internal/controller/telemetry_controller_test.go | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/internal/controller/telemetry_controller.go b/internal/controller/telemetry_controller.go index e81698823..572b88a2e 100644 --- a/internal/controller/telemetry_controller.go +++ b/internal/controller/telemetry_controller.go @@ -1,11 +1,11 @@ /* -Copyright (c) 2018-2022 Splunk Inc. All rights reserved. +Copyright (c) 2026 Splunk Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -13,7 +13,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - package controller import ( @@ -36,7 +35,7 @@ import ( ) const ( - // TODO: Below two contants are defined at default/kustomizatio.yaml, need to get it programatically? + // Below two contants are defined at kustomizatio*.yaml ConfigMapNamePrefix = "splunk-operator-" ConfigMapLabelName = "splunk-operator" @@ -79,7 +78,7 @@ func (r *TelemetryReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( result, err := enterprise.ApplyTelemetry(ctx, r.Client, cm) if err != nil { - reqLogger.Error(err, "Failed") + reqLogger.Error(err, "Failed to send telemetry") return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil } if result.Requeue && result.RequeueAfter != 0 { diff --git a/internal/controller/telemetry_controller_test.go b/internal/controller/telemetry_controller_test.go index c73ac5a23..b15a5f787 100644 --- a/internal/controller/telemetry_controller_test.go +++ b/internal/controller/telemetry_controller_test.go @@ -1,4 +1,18 @@ -package controller +/* +Copyright (c) 2026 Splunk Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/package controller import ( "context" From ddd31867f08949a938c7c2d737bcd9726d2e7175 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Tue, 3 Feb 2026 21:50:15 -0800 Subject: [PATCH 09/16] fix --- pkg/splunk/enterprise/telemetry.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go index 9c5c5d8ed..8f7b5494f 100644 --- a/pkg/splunk/enterprise/telemetry.go +++ b/pkg/splunk/enterprise/telemetry.go @@ -123,8 +123,6 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien var err error var standaloneList enterpriseApi.StandaloneList - //instanceID = SplunkStandalone - //telAppName = fmt.Sprintf(telAppNameTemplateStr, "stdaln") err = client.List(ctx, &standaloneList) if err != nil { scopedLog.Error(err, "Failed to list standalone objects") From d0e0f5e05cc2b76ed5ec8b27d1cad07fc4cf2b68 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Wed, 4 Feb 2026 21:51:48 -0800 Subject: [PATCH 10/16] Update deployment telemetry --- pkg/splunk/enterprise/configuration.go | 15 +- pkg/splunk/enterprise/telemetry.go | 244 +++++++---- pkg/splunk/enterprise/telemetry_test.go | 512 +++++------------------- 3 files changed, 277 insertions(+), 494 deletions(-) diff --git a/pkg/splunk/enterprise/configuration.go b/pkg/splunk/enterprise/configuration.go index a0d90b354..392312004 100644 --- a/pkg/splunk/enterprise/configuration.go +++ b/pkg/splunk/enterprise/configuration.go @@ -85,6 +85,13 @@ var defaultStartupProbe corev1.Probe = corev1.Probe{ }, } +const ( + defaultRequestsCPU = "0.1" + defaultRequestsMemory = "512Mi" + defaultLimitsCPU = "4" + defaultLimitsMemory = "8Gi" +) + // getSplunkLabels returns a map of labels to use for Splunk Enterprise components. func getSplunkLabels(instanceIdentifier string, instanceType InstanceType, partOfIdentifier string) map[string]string { // For multisite / multipart IndexerCluster, the name of the part containing the cluster-manager is used @@ -366,12 +373,12 @@ func validateCommonSplunkSpec(ctx context.Context, c splcommon.ControllerClient, defaultResources := corev1.ResourceRequirements{ Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("0.1"), - corev1.ResourceMemory: resource.MustParse("512Mi"), + corev1.ResourceCPU: resource.MustParse(defaultRequestsCPU), + corev1.ResourceMemory: resource.MustParse(defaultRequestsMemory), }, Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("4"), - corev1.ResourceMemory: resource.MustParse("8Gi"), + corev1.ResourceCPU: resource.MustParse(defaultLimitsCPU), + corev1.ResourceMemory: resource.MustParse(defaultLimitsMemory), }, } diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go index 8f7b5494f..1eaf019a7 100644 --- a/pkg/splunk/enterprise/telemetry.go +++ b/pkg/splunk/enterprise/telemetry.go @@ -10,8 +10,7 @@ import ( splclient "github.com/splunk/splunk-operator/pkg/splunk/client" splcommon "github.com/splunk/splunk-operator/pkg/splunk/common" splutil "github.com/splunk/splunk-operator/pkg/splunk/util" - appsv1 "k8s.io/api/apps/v1" - "sigs.k8s.io/controller-runtime/pkg/client" + "k8s.io/apimachinery/pkg/api/resource" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" "time" @@ -24,7 +23,12 @@ const ( defaultTestMode = "false" defaultTestVersion = "unknown" - telStatusKey = "status" + telStatusKey = "status" + telDeploymentKey = "deployment" + cpuRequestKey = "cpu_request" + memoryRequestKey = "memory_request" + cpuLimitKey = "cpu_limit" + memoryLimitKey = "memory_limit" ) //+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch @@ -64,9 +68,11 @@ func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm * currentStatus := getCurrentStatus(ctx, cm) // Add SOK version data[telSOKVersionKey] = currentStatus.SokVersion + var telDeployment map[string]interface{} + telDeployment = make(map[string]interface{}) + data[telDeploymentKey] = telDeployment // Add SOK telemetry - crWithTelAppList, crList := getAllCustomResources(ctx, client) - collectCRTelData(ctx, client, crList, data) + crWithTelAppList := collectDeploymentTelData(ctx, client, telDeployment) /* * Add other component's telemetry set in splunk-operator-manager-telemetry configmap. * i.e splunk POD's telemetry @@ -91,7 +97,7 @@ func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm * return result, errors.New("Failed to send telemetry data") } -func updateLastTransmissionTime(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap, status *TelemetryStatus) error { +func updateLastTransmissionTime(ctx context.Context, client splcommon.ControllerClient, cm *corev1.ConfigMap, status *TelemetryStatus) { reqLogger := log.FromContext(ctx) scopedLog := reqLogger.WithName("updateLastTransmissionTime") @@ -99,24 +105,72 @@ func updateLastTransmissionTime(ctx context.Context, client splcommon.Controller updated, err := json.MarshalIndent(status, "", " ") if err != nil { scopedLog.Error(err, "Failed to marshal telemetry status") - return err + return } cm.Data[telStatusKey] = string(updated) if err = client.Update(ctx, cm); err != nil { scopedLog.Error(err, "Failed to update telemetry status in configmap") - return err + return } scopedLog.Info("Updated last transmission time in configmap", "newStatus", cm.Data[telStatusKey]) +} + +func collectResourceTelData(resources corev1.ResourceRequirements, data map[string]string) { + defaultResources := corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(defaultRequestsCPU), + corev1.ResourceMemory: resource.MustParse(defaultRequestsMemory), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(defaultLimitsCPU), + corev1.ResourceMemory: resource.MustParse(defaultLimitsMemory), + }, + } + + if resources.Requests == nil { + cpu := defaultResources.Requests[corev1.ResourceCPU] + mem := defaultResources.Requests[corev1.ResourceMemory] + data[cpuRequestKey] = (&cpu).String() + data[memoryRequestKey] = (&mem).String() + } else { + if cpuReq, ok := resources.Requests[corev1.ResourceCPU]; ok { + data[cpuRequestKey] = cpuReq.String() + } else { + cpu := defaultResources.Requests[corev1.ResourceCPU] + data[cpuRequestKey] = (&cpu).String() + } + if memReq, ok := resources.Requests[corev1.ResourceMemory]; ok { + data[memoryRequestKey] = memReq.String() + } else { + mem := defaultResources.Requests[corev1.ResourceMemory] + data[memoryRequestKey] = (&mem).String() + } + } - return nil + if resources.Limits == nil { + cpu := defaultResources.Limits[corev1.ResourceCPU] + mem := defaultResources.Limits[corev1.ResourceMemory] + data[cpuLimitKey] = (&cpu).String() + data[memoryLimitKey] = (&mem).String() + } else { + if cpuLim, ok := resources.Limits[corev1.ResourceCPU]; ok { + data[cpuLimitKey] = cpuLim.String() + } else { + cpu := defaultResources.Limits[corev1.ResourceCPU] + data[cpuLimitKey] = (&cpu).String() + } + if memLim, ok := resources.Limits[corev1.ResourceMemory]; ok { + data[memoryLimitKey] = memLim.String() + } else { + mem := defaultResources.Limits[corev1.ResourceMemory] + data[memoryLimitKey] = (&mem).String() + } + } } -func getAllCustomResources(ctx context.Context, client splcommon.ControllerClient) (map[string][]splcommon.MetaObject, map[string][]splcommon.MetaObject) { +func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerClient, deploymentData map[string]interface{}) map[string][]splcommon.MetaObject { reqLogger := log.FromContext(ctx) - scopedLog := reqLogger.WithName("collectCRTelData") - - var crList map[string][]splcommon.MetaObject - crList = make(map[string][]splcommon.MetaObject) + scopedLog := reqLogger.WithName("collectDeploymentTelData") var crWithTelAppList map[string][]splcommon.MetaObject crWithTelAppList = make(map[string][]splcommon.MetaObject) @@ -127,12 +181,17 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien if err != nil { scopedLog.Error(err, "Failed to list standalone objects") } else if len(standaloneList.Items) > 0 { - crList[standaloneList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + deploymentData[standaloneList.Items[0].Kind] = perKindData for _, cr := range standaloneList.Items { + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[standaloneList.Items[0].Kind] = append(crWithTelAppList[standaloneList.Items[0].Kind], &cr) } - crList[standaloneList.Items[0].Kind] = append(crList[standaloneList.Items[0].Kind], &cr) } } @@ -141,12 +200,17 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien if err != nil { scopedLog.Error(err, "Failed to list LicenseManager objects") } else if len(lmanagerList.Items) > 0 { - crList[lmanagerList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + deploymentData[lmanagerList.Items[0].Kind] = perKindData for _, cr := range lmanagerList.Items { + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[lmanagerList.Items[0].Kind] = append(crWithTelAppList[lmanagerList.Items[0].Kind], &cr) } - crList[lmanagerList.Items[0].Kind] = append(crList[lmanagerList.Items[0].Kind], &cr) } } @@ -155,12 +219,17 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien if err != nil { scopedLog.Error(err, "Failed to list LicenseMaster objects") } else if len(lmasterList.Items) > 0 { - crList[lmasterList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + deploymentData[lmasterList.Items[0].Kind] = perKindData for _, cr := range lmasterList.Items { + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[lmasterList.Items[0].Kind] = append(crWithTelAppList[lmasterList.Items[0].Kind], &cr) } - crList[lmasterList.Items[0].Kind] = append(crList[lmasterList.Items[0].Kind], &cr) } } @@ -169,12 +238,17 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien if err != nil { scopedLog.Error(err, "Failed to list SearchHeadCluster objects") } else if len(shcList.Items) > 0 { - crList[shcList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + deploymentData[shcList.Items[0].Kind] = perKindData for _, cr := range shcList.Items { + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[shcList.Items[0].Kind] = append(crWithTelAppList[shcList.Items[0].Kind], &cr) } - crList[shcList.Items[0].Kind] = append(crList[shcList.Items[0].Kind], &cr) } } @@ -183,10 +257,14 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien if err != nil { scopedLog.Error(err, "Failed to list IndexerCluster objects") } else if len(idxList.Items) > 0 { - crList[idxList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + deploymentData[idxList.Items[0].Kind] = perKindData for _, cr := range idxList.Items { - // IndexerCluster does not have telemetry app installed - crList[idxList.Items[0].Kind] = append(crList[idxList.Items[0].Kind], &cr) + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) } } @@ -195,12 +273,17 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien if err != nil { scopedLog.Error(err, "Failed to list ClusterManager objects") } else if len(cmanagerList.Items) > 0 { - crList[cmanagerList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + deploymentData[cmanagerList.Items[0].Kind] = perKindData for _, cr := range cmanagerList.Items { + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[cmanagerList.Items[0].Kind] = append(crWithTelAppList[cmanagerList.Items[0].Kind], &cr) } - crList[cmanagerList.Items[0].Kind] = append(crList[cmanagerList.Items[0].Kind], &cr) } } @@ -209,78 +292,75 @@ func getAllCustomResources(ctx context.Context, client splcommon.ControllerClien if err != nil { scopedLog.Error(err, "Failed to list ClusterMaster objects") } else if len(cmasterList.Items) > 0 { - crList[cmasterList.Items[0].Kind] = make([]splcommon.MetaObject, 0) + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + deploymentData[cmasterList.Items[0].Kind] = perKindData for _, cr := range cmasterList.Items { + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[cmasterList.Items[0].Kind] = append(crWithTelAppList[cmasterList.Items[0].Kind], &cr) } - crList[cmasterList.Items[0].Kind] = append(crList[cmasterList.Items[0].Kind], &cr) } } - return crWithTelAppList, crList -} - -func getOwnedStatefulSets( - ctx context.Context, - c client.Client, - cr client.Object, -) ([]appsv1.StatefulSet, error) { - reqLogger := log.FromContext(ctx) - scopedLog := reqLogger.WithName("getOwnedStatefulSets") - - stsList := &appsv1.StatefulSetList{} - if err := c.List(ctx, stsList, - client.InNamespace(cr.GetNamespace()), - ); err != nil { - scopedLog.Error(err, "Failed to list StatefulSets", "CR Name", cr.GetName()) - return nil, err + var licenseMasterList enterpriseApiV3.LicenseMasterList + err = client.List(ctx, &licenseMasterList) + if err != nil { + scopedLog.Error(err, "Failed to list ClusterMaster objects") + } else if len(licenseMasterList.Items) > 0 { + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + deploymentData[licenseMasterList.Items[0].Kind] = perKindData + for _, cr := range licenseMasterList.Items { + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) + if cr.Status.TelAppInstalled { + crWithTelAppList[licenseMasterList.Items[0].Kind] = append(crWithTelAppList[licenseMasterList.Items[0].Kind], &cr) + } + } } - var result []appsv1.StatefulSet - for _, sts := range stsList.Items { - for _, owner := range sts.OwnerReferences { - if owner.UID == cr.GetUID() { - result = append(result, sts) - break + var licenseManagerList enterpriseApi.LicenseManagerList + err = client.List(ctx, &licenseManagerList) + if err != nil { + scopedLog.Error(err, "Failed to list ClusterMaster objects") + } else if len(licenseManagerList.Items) > 0 { + var perKindData map[string]interface{} + perKindData = make(map[string]interface{}) + deploymentData[licenseManagerList.Items[0].Kind] = perKindData + for _, cr := range licenseManagerList.Items { + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) + if cr.Status.TelAppInstalled { + crWithTelAppList[licenseManagerList.Items[0].Kind] = append(crWithTelAppList[licenseManagerList.Items[0].Kind], &cr) } } } - return result, nil -} - -func collectCRTelData(ctx context.Context, client splcommon.ControllerClient, crList map[string][]splcommon.MetaObject, data map[string]interface{}) { - reqLogger := log.FromContext(ctx) - scopedLog := reqLogger.WithName("collectCRTelData") - scopedLog.Info("Start") - for kind, crs := range crList { + var mconsoleList enterpriseApi.MonitoringConsoleList + err = client.List(ctx, &mconsoleList) + if err != nil { + scopedLog.Error(err, "Failed to list ClusterMaster objects") + } else if len(mconsoleList.Items) > 0 { var perKindData map[string]interface{} perKindData = make(map[string]interface{}) - for _, cr := range crs { - var perCRData []map[string]string - perCRData = make([]map[string]string, 0) - stsList, err := getOwnedStatefulSets(ctx, client, cr) - if err != nil { - scopedLog.Error(err, "Failed to get owned StatefulSets") - } else if len(stsList) > 0 { - for _, sts := range stsList { - for _, container := range sts.Spec.Template.Spec.Containers { - resPerContainer := map[string]string{ - "container_name": container.Name, - "cpu_request": container.Resources.Requests.Cpu().String(), - "memory_request": container.Resources.Requests.Memory().String(), - "cpu_limit": container.Resources.Limits.Cpu().String(), - "memory_limit": container.Resources.Limits.Memory().String(), - } - perCRData = append(perCRData, resPerContainer) - } - } - } - perKindData[cr.GetName()] = perCRData + deploymentData[mconsoleList.Items[0].Kind] = perKindData + for _, cr := range mconsoleList.Items { + var crResourceData map[string]string + crResourceData = make(map[string]string) + perKindData[cr.GetName()] = crResourceData + collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) } - data[kind] = perKindData } + + return crWithTelAppList } func CollectCMTelData(ctx context.Context, cm *corev1.ConfigMap, data map[string]interface{}) { diff --git a/pkg/splunk/enterprise/telemetry_test.go b/pkg/splunk/enterprise/telemetry_test.go index 1ce1b178b..f591724c3 100644 --- a/pkg/splunk/enterprise/telemetry_test.go +++ b/pkg/splunk/enterprise/telemetry_test.go @@ -6,7 +6,6 @@ import ( "context" "encoding/json" enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" - splcommon "github.com/splunk/splunk-operator/pkg/splunk/common" "testing" "time" @@ -20,73 +19,88 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -func TestTelemetryGetAllCustomResources_Empty(t *testing.T) { - mockClient := spltest.NewMockClient() - ctx := context.TODO() - crWithTelAppList, crMap := getAllCustomResources(ctx, mockClient) - if len(crWithTelAppList) != 0 { - t.Errorf("expected no CRs with telemetry app, got %d", len(crWithTelAppList)) - } - if len(crMap) != 0 { - t.Errorf("expected no CRs, got %d", len(crMap)) +func TestCollectResourceTelData_NilMaps(t *testing.T) { + data := make(map[string]string) + collectResourceTelData(corev1.ResourceRequirements{}, data) + if data[cpuRequestKey] == "" || data[memoryRequestKey] == "" || data[cpuLimitKey] == "" || data[memoryLimitKey] == "" { + t.Errorf("expected default values for nil maps") } } -func TestTelemetryCollectCRTelData_WithMockCR(t *testing.T) { - mockClient := spltest.NewMockClient() - ctx := context.TODO() - cr := &enterpriseApi.Standalone{} - cr.TypeMeta.Kind = "Standalone" - cr.ObjectMeta.Name = "test-standalone" - crList := map[string][]splcommon.MetaObject{"Standalone": {cr}} - data := make(map[string]interface{}) - collectCRTelData(ctx, mockClient, crList, data) - if _, ok := data["Standalone"]; !ok { - t.Errorf("expected Standalone key in data map") +func TestCollectResourceTelData_MissingKeys(t *testing.T) { + data := make(map[string]string) + reqs := corev1.ResourceRequirements{ + Requests: corev1.ResourceList{}, + Limits: corev1.ResourceList{}, + } + collectResourceTelData(reqs, data) + if data[cpuRequestKey] == "" || data[memoryRequestKey] == "" || data[cpuLimitKey] == "" || data[memoryLimitKey] == "" { + t.Errorf("expected default values for missing keys") } } -func TestApplyTelemetry_ConfigMapNoData(t *testing.T) { - mockClient := spltest.NewMockClient() - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, - Data: map[string]string{}, +func TestCollectResourceTelData_ValuesPresent(t *testing.T) { + data := make(map[string]string) + reqs := corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("123m"), + corev1.ResourceMemory: resource.MustParse("456Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("789m"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, } - ctx := context.TODO() - _, err := ApplyTelemetry(ctx, mockClient, cm) - if err == nil { - t.Errorf("expected error when no CRs present, got nil") + collectResourceTelData(reqs, data) + if data[cpuRequestKey] != "123m" || data[memoryRequestKey] != "456Mi" || data[cpuLimitKey] != "789m" || data[memoryLimitKey] != "1Gi" { + t.Errorf("unexpected values: got %+v", data) } } -func TestTelemetryCollectCMTelData_UnmarshalError(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, - Data: map[string]string{"bad": "notjson"}, - } - ctx := context.TODO() +func TestCollectCMTelData_UnmarshalError(t *testing.T) { + cm := &corev1.ConfigMap{Data: map[string]string{"bad": "notjson"}} data := make(map[string]interface{}) - CollectCMTelData(ctx, cm, data) + CollectCMTelData(context.TODO(), cm, data) if data["bad"] != "notjson" { t.Errorf("expected fallback to string on unmarshal error") } } -func TestTelemetryCollectCMTelData_ValidJSON(t *testing.T) { +func TestCollectCMTelData_ValidJSON(t *testing.T) { val := map[string]interface{}{"foo": "bar"} b, _ := json.Marshal(val) - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"}, - Data: map[string]string{"good": string(b)}, - } - ctx := context.TODO() + cm := &corev1.ConfigMap{Data: map[string]string{"good": string(b)}} data := make(map[string]interface{}) - CollectCMTelData(ctx, cm, data) + CollectCMTelData(context.TODO(), cm, data) if m, ok := data["good"].(map[string]interface{}); !ok || m["foo"] != "bar" { t.Errorf("expected valid JSON to be unmarshaled") } } +func TestGetCurrentStatus_Default(t *testing.T) { + cm := &corev1.ConfigMap{Data: nil} + status := getCurrentStatus(context.TODO(), cm) + if status == nil || status.Test != "true" { + t.Errorf("expected default status") + } +} + +func TestGetCurrentStatus_UnmarshalError(t *testing.T) { + cm := &corev1.ConfigMap{Data: map[string]string{"status": "notjson"}} + status := getCurrentStatus(context.TODO(), cm) + if status == nil || status.Test != "true" { + t.Errorf("expected default status on unmarshal error") + } +} + +func TestUpdateLastTransmissionTime_MarshalError(t *testing.T) { + ctx := context.TODO() + cm := &corev1.ConfigMap{Data: map[string]string{}} + // Use a struct with a channel field to cause json.MarshalIndent to fail + // Should not panic + updateLastTransmissionTime(ctx, spltest.NewMockClient(), cm, (*TelemetryStatus)(nil)) // pass nil to avoid panic +} + func TestSendTelemetry_UnknownKind(t *testing.T) { cr := &enterpriseApi.Standalone{} cr.TypeMeta.Kind = "UnknownKind" @@ -107,354 +121,6 @@ func TestSendTelemetry_NoSecret(t *testing.T) { } } -func TestTelemetryGetAllCustomResources_AllKinds(t *testing.T) { - ctx := context.TODO() - fakeClient := &FakeListClient{ - crs: map[string][]client.Object{ - "Standalone": {&enterpriseApi.Standalone{TypeMeta: metav1.TypeMeta{Kind: "Standalone"}, ObjectMeta: metav1.ObjectMeta{Name: "test-standalone"}}}, - "LicenseManager": {&enterpriseApi.LicenseManager{TypeMeta: metav1.TypeMeta{Kind: "LicenseManager"}, ObjectMeta: metav1.ObjectMeta{Name: "test-licensemanager"}}}, - "LicenseMaster": {&enterpriseApiV3.LicenseMaster{TypeMeta: metav1.TypeMeta{Kind: "LicenseMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-licensemaster"}}}, - "SearchHeadCluster": {&enterpriseApi.SearchHeadCluster{TypeMeta: metav1.TypeMeta{Kind: "SearchHeadCluster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-shc"}}}, - "IndexerCluster": {&enterpriseApi.IndexerCluster{TypeMeta: metav1.TypeMeta{Kind: "IndexerCluster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-idx"}}}, - "ClusterManager": {&enterpriseApi.ClusterManager{TypeMeta: metav1.TypeMeta{Kind: "ClusterManager"}, ObjectMeta: metav1.ObjectMeta{Name: "test-cmanager"}}}, - "ClusterMaster": {&enterpriseApiV3.ClusterMaster{TypeMeta: metav1.TypeMeta{Kind: "ClusterMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "test-cmaster"}}}, - }, - sts: []apps.StatefulSet{}, // ensure all keys are present - } - _, crMap := getAllCustomResources(ctx, fakeClient) - kinds := []string{"Standalone", "LicenseManager", "LicenseMaster", "SearchHeadCluster", "IndexerCluster", "ClusterManager", "ClusterMaster"} - for _, kind := range kinds { - if _, ok := crMap[kind]; !ok { - t.Errorf("expected kind %s in CR map", kind) - } - } - // crWithTelAppList may be empty if TelAppInstalled is not set in the test CRs -} - -func TestTelemetryCollectCRTelData_StandaloneData(t *testing.T) { - ctx := context.TODO() - cr := &enterpriseApi.Standalone{} - cr.TypeMeta.Kind = "Standalone" - cr.ObjectMeta.Name = "test-standalone" - cr.ObjectMeta.Namespace = "default" - crList := map[string][]splcommon.MetaObject{"Standalone": {cr}} - sts := apps.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-standalone-sts", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{{ - UID: cr.GetUID(), - }}, - }, - Spec: apps.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "test-container", - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("500m"), - corev1.ResourceMemory: resource.MustParse("128Mi"), - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("1"), - corev1.ResourceMemory: resource.MustParse("256Mi"), - }, - }, - }}, - }, - }, - }, - } - fakeClient := &FakeListClient{ - sts: []apps.StatefulSet{sts}, - } - data := make(map[string]interface{}) - collectCRTelData(ctx, fakeClient, crList, data) - standaloneData, ok := data["Standalone"].(map[string]interface{}) - if !ok { - t.Fatalf("expected Standalone data map") - } - crData, ok := standaloneData["test-standalone"].([]map[string]string) - if !ok || len(crData) == 0 { - t.Fatalf("expected resource data slice") - } - container := crData[0] - if container["cpu_request"] != "500m" || container["memory_request"] != "128Mi" || container["cpu_limit"] != "1" || container["memory_limit"] != "256Mi" { - t.Errorf("unexpected resource values: got %+v", container) - } -} - -func TestTelemetryCollectCRTelData_LicenseManagerData(t *testing.T) { - ctx := context.TODO() - cr := &enterpriseApi.LicenseManager{} - cr.TypeMeta.Kind = "LicenseManager" - cr.ObjectMeta.Name = "test-licensemanager" - cr.ObjectMeta.Namespace = "default" - crList := map[string][]splcommon.MetaObject{"LicenseManager": {cr}} - sts := apps.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-licensemanager-sts", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{{ - UID: cr.GetUID(), - }}, - }, - Spec: apps.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "test-container", - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("600m"), - corev1.ResourceMemory: resource.MustParse("256Mi"), - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("2"), - corev1.ResourceMemory: resource.MustParse("512Mi"), - }, - }, - }}, - }, - }, - }, - } - fakeClient := &FakeListClient{ - sts: []apps.StatefulSet{sts}, - } - data := make(map[string]interface{}) - collectCRTelData(ctx, fakeClient, crList, data) - lmData, ok := data["LicenseManager"].(map[string]interface{}) - if !ok { - t.Fatalf("expected LicenseManager data map") - } - crData, ok := lmData["test-licensemanager"].([]map[string]string) - if !ok || len(crData) == 0 { - t.Fatalf("expected resource data slice") - } - container := crData[0] - if container["cpu_request"] != "600m" || container["memory_request"] != "256Mi" || container["cpu_limit"] != "2" || container["memory_limit"] != "512Mi" { - t.Errorf("unexpected resource values: got %+v", container) - } -} - -func TestTelemetryCollectCRTelData_LicenseMasterData(t *testing.T) { - ctx := context.TODO() - cr := &enterpriseApiV3.LicenseMaster{} - cr.TypeMeta.Kind = "LicenseMaster" - cr.ObjectMeta.Name = "test-licensemaster" - cr.ObjectMeta.Namespace = "default" - crList := map[string][]splcommon.MetaObject{"LicenseMaster": {cr}} - sts := apps.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-licensemaster-sts", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{{ - UID: cr.GetUID(), - }}, - }, - Spec: apps.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "test-container", - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("700m"), - corev1.ResourceMemory: resource.MustParse("384Mi"), - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("3"), - corev1.ResourceMemory: resource.MustParse("768Mi"), - }, - }, - }}, - }, - }, - }, - } - fakeClient := &FakeListClient{ - sts: []apps.StatefulSet{sts}, - } - data := make(map[string]interface{}) - collectCRTelData(ctx, fakeClient, crList, data) - lmData, ok := data["LicenseMaster"].(map[string]interface{}) - if !ok { - t.Fatalf("expected LicenseMaster data map") - } - crData, ok := lmData["test-licensemaster"].([]map[string]string) - if !ok || len(crData) == 0 { - t.Fatalf("expected resource data slice") - } - container := crData[0] - if container["cpu_request"] != "700m" || container["memory_request"] != "384Mi" || container["cpu_limit"] != "3" || container["memory_limit"] != "768Mi" { - t.Errorf("unexpected resource values: got %+v", container) - } -} - -func TestTelemetryCollectCRTelData_SearchHeadClusterData(t *testing.T) { - ctx := context.TODO() - cr := &enterpriseApi.SearchHeadCluster{} - cr.TypeMeta.Kind = "SearchHeadCluster" - cr.ObjectMeta.Name = "test-shc" - cr.ObjectMeta.Namespace = "default" - crList := map[string][]splcommon.MetaObject{"SearchHeadCluster": {cr}} - sts := apps.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-shc-sts", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{{ - UID: cr.GetUID(), - }}, - }, - Spec: apps.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "test-container", - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("800m"), - corev1.ResourceMemory: resource.MustParse("512Mi"), - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("4"), - corev1.ResourceMemory: resource.MustParse("1Gi"), - }, - }, - }}, - }, - }, - }, - } - fakeClient := &FakeListClient{ - sts: []apps.StatefulSet{sts}, - } - data := make(map[string]interface{}) - collectCRTelData(ctx, fakeClient, crList, data) - shcData, ok := data["SearchHeadCluster"].(map[string]interface{}) - if !ok { - t.Fatalf("expected SearchHeadCluster data map") - } - crData, ok := shcData["test-shc"].([]map[string]string) - if !ok || len(crData) == 0 { - t.Fatalf("expected resource data slice") - } - container := crData[0] - if container["cpu_request"] != "800m" || container["memory_request"] != "512Mi" || container["cpu_limit"] != "4" || container["memory_limit"] != "1Gi" { - t.Errorf("unexpected resource values: got %+v", container) - } -} - -func TestTelemetryCollectCRTelData_ClusterManagerData(t *testing.T) { - ctx := context.TODO() - cr := &enterpriseApi.ClusterManager{} - cr.TypeMeta.Kind = "ClusterManager" - cr.ObjectMeta.Name = "test-cmanager" - cr.ObjectMeta.Namespace = "default" - crList := map[string][]splcommon.MetaObject{"ClusterManager": {cr}} - sts := apps.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cmanager-sts", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{{ - UID: cr.GetUID(), - }}, - }, - Spec: apps.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "test-container", - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("900m"), - corev1.ResourceMemory: resource.MustParse("640Mi"), - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("5"), - corev1.ResourceMemory: resource.MustParse("2Gi"), - }, - }, - }}, - }, - }, - }, - } - fakeClient := &FakeListClient{ - sts: []apps.StatefulSet{sts}, - } - data := make(map[string]interface{}) - collectCRTelData(ctx, fakeClient, crList, data) - cmData, ok := data["ClusterManager"].(map[string]interface{}) - if !ok { - t.Fatalf("expected ClusterManager data map") - } - crData, ok := cmData["test-cmanager"].([]map[string]string) - if !ok || len(crData) == 0 { - t.Fatalf("expected resource data slice") - } - container := crData[0] - if container["cpu_request"] != "900m" || container["memory_request"] != "640Mi" || container["cpu_limit"] != "5" || container["memory_limit"] != "2Gi" { - t.Errorf("unexpected resource values: got %+v", container) - } -} - -func TestTelemetryCollectCRTelData_ClusterMasterData(t *testing.T) { - ctx := context.TODO() - cr := &enterpriseApiV3.ClusterMaster{} - cr.TypeMeta.Kind = "ClusterMaster" - cr.ObjectMeta.Name = "test-cmaster" - cr.ObjectMeta.Namespace = "default" - crList := map[string][]splcommon.MetaObject{"ClusterMaster": {cr}} - sts := apps.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cmaster-sts", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{{ - UID: cr.GetUID(), - }}, - }, - Spec: apps.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: "test-container", - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("1000m"), - corev1.ResourceMemory: resource.MustParse("768Mi"), - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("6"), - corev1.ResourceMemory: resource.MustParse("4Gi"), - }, - }, - }}, - }, - }, - }, - } - fakeClient := &FakeListClient{ - sts: []apps.StatefulSet{sts}, - } - data := make(map[string]interface{}) - collectCRTelData(ctx, fakeClient, crList, data) - cmData, ok := data["ClusterMaster"].(map[string]interface{}) - if !ok { - t.Fatalf("expected ClusterMaster data map") - } - crData, ok := cmData["test-cmaster"].([]map[string]string) - if !ok || len(crData) == 0 { - t.Fatalf("expected resource data slice") - } - container := crData[0] - if container["cpu_request"] != "1" || container["memory_request"] != "768Mi" || container["cpu_limit"] != "6" || container["memory_limit"] != "4Gi" { - t.Errorf("unexpected resource values: got %+v", container) - } -} - func TestTelemetryUpdateLastTransmissionTime_SetsTimestamp(t *testing.T) { mockClient := spltest.NewMockClient() ctx := context.TODO() @@ -464,10 +130,7 @@ func TestTelemetryUpdateLastTransmissionTime_SetsTimestamp(t *testing.T) { } status := &TelemetryStatus{Test: "false"} - err := updateLastTransmissionTime(ctx, mockClient, cm, status) - if err != nil { - t.Fatalf("expected no error, got: %v", err) - } + updateLastTransmissionTime(ctx, mockClient, cm, status) statusStr, ok := cm.Data[telStatusKey] if !ok { t.Fatalf("expected telStatusKey in configmap data") @@ -495,10 +158,7 @@ func TestTelemetryUpdateLastTransmissionTime_UpdateError(t *testing.T) { } badClient := &errorUpdateClient{} status := &TelemetryStatus{Test: "false"} - err := updateLastTransmissionTime(ctx, badClient, cm, status) - if err == nil { - t.Errorf("expected error from client.Update, got nil") - } + updateLastTransmissionTime(ctx, badClient, cm, status) } func TestTelemetryUpdateLastTransmissionTime_RepeatedCalls(t *testing.T) { @@ -509,22 +169,58 @@ func TestTelemetryUpdateLastTransmissionTime_RepeatedCalls(t *testing.T) { Data: map[string]string{}, } status := &TelemetryStatus{Test: "false"} - err := updateLastTransmissionTime(ctx, mockClient, cm, status) - if err != nil { - t.Fatalf("expected no error, got: %v", err) - } + updateLastTransmissionTime(ctx, mockClient, cm, status) firstStatus := cm.Data[telStatusKey] time.Sleep(1 * time.Second) - err = updateLastTransmissionTime(ctx, mockClient, cm, status) - if err != nil { - t.Fatalf("expected no error, got: %v", err) - } + updateLastTransmissionTime(ctx, mockClient, cm, status) secondStatus := cm.Data[telStatusKey] if firstStatus == secondStatus { t.Errorf("expected status to change on repeated call") } } +func TestCollectDeploymentTelData_AllKinds(t *testing.T) { + ctx := context.TODO() + crs := map[string][]client.Object{ + "Standalone": {&enterpriseApi.Standalone{TypeMeta: metav1.TypeMeta{Kind: "Standalone"}, ObjectMeta: metav1.ObjectMeta{Name: "standalone1"}, Spec: enterpriseApi.StandaloneSpec{CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{Spec: enterpriseApi.Spec{Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi")}, Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("2"), corev1.ResourceMemory: resource.MustParse("2Gi")}}}}}}}, + "LicenseManager": {&enterpriseApi.LicenseManager{TypeMeta: metav1.TypeMeta{Kind: "LicenseManager"}, ObjectMeta: metav1.ObjectMeta{Name: "lm1"}, Spec: enterpriseApi.LicenseManagerSpec{CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{Spec: enterpriseApi.Spec{Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("3"), corev1.ResourceMemory: resource.MustParse("3Gi")}, Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("4"), corev1.ResourceMemory: resource.MustParse("4Gi")}}}}}}}, + "LicenseMaster": {&enterpriseApiV3.LicenseMaster{TypeMeta: metav1.TypeMeta{Kind: "LicenseMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "lmast1"}, Spec: enterpriseApiV3.LicenseMasterSpec{CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{Spec: enterpriseApi.Spec{Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("5"), corev1.ResourceMemory: resource.MustParse("5Gi")}, Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("6"), corev1.ResourceMemory: resource.MustParse("6Gi")}}}}}}}, + "SearchHeadCluster": {&enterpriseApi.SearchHeadCluster{TypeMeta: metav1.TypeMeta{Kind: "SearchHeadCluster"}, ObjectMeta: metav1.ObjectMeta{Name: "shc1"}, Spec: enterpriseApi.SearchHeadClusterSpec{CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{Spec: enterpriseApi.Spec{Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("7"), corev1.ResourceMemory: resource.MustParse("7Gi")}, Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("8"), corev1.ResourceMemory: resource.MustParse("8Gi")}}}}}}}, + "IndexerCluster": {&enterpriseApi.IndexerCluster{TypeMeta: metav1.TypeMeta{Kind: "IndexerCluster"}, ObjectMeta: metav1.ObjectMeta{Name: "idx1"}, Spec: enterpriseApi.IndexerClusterSpec{CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{Spec: enterpriseApi.Spec{Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("9"), corev1.ResourceMemory: resource.MustParse("9Gi")}, Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("10"), corev1.ResourceMemory: resource.MustParse("10Gi")}}}}}}}, + "ClusterManager": {&enterpriseApi.ClusterManager{TypeMeta: metav1.TypeMeta{Kind: "ClusterManager"}, ObjectMeta: metav1.ObjectMeta{Name: "cmgr1"}, Spec: enterpriseApi.ClusterManagerSpec{CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{Spec: enterpriseApi.Spec{Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("11"), corev1.ResourceMemory: resource.MustParse("11Gi")}, Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("12"), corev1.ResourceMemory: resource.MustParse("12Gi")}}}}}}}, + "ClusterMaster": {&enterpriseApiV3.ClusterMaster{TypeMeta: metav1.TypeMeta{Kind: "ClusterMaster"}, ObjectMeta: metav1.ObjectMeta{Name: "cmast1"}, Spec: enterpriseApiV3.ClusterMasterSpec{CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{Spec: enterpriseApi.Spec{Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("13"), corev1.ResourceMemory: resource.MustParse("13Gi")}, Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("14"), corev1.ResourceMemory: resource.MustParse("14Gi")}}}}}}}, + } + fakeClient := &FakeListClient{crs: crs} + deploymentData := make(map[string]interface{}) + crWithTelAppList := collectDeploymentTelData(ctx, fakeClient, deploymentData) + kinds := []string{"Standalone", "LicenseManager", "LicenseMaster", "SearchHeadCluster", "IndexerCluster", "ClusterManager", "ClusterMaster"} + for _, kind := range kinds { + if _, ok := deploymentData[kind]; !ok { + t.Errorf("expected deploymentData to have key %s", kind) + } + // Check resource data for at least one CR per kind + kindData, ok := deploymentData[kind].(map[string]interface{}) + if !ok { + t.Errorf("expected deploymentData[%s] to be map[string]interface{}", kind) + continue + } + for crName, v := range kindData { + resData, ok := v.(map[string]string) + if !ok { + t.Errorf("expected resource data for %s/%s to be map[string]string", kind, crName) + } + // Spot check a value + if resData[cpuRequestKey] == "" || resData[memoryRequestKey] == "" { + t.Errorf("expected resource data for %s/%s to have cpu/memory", kind, crName) + } + } + } + // crWithTelAppList should be empty since TelAppInstalled is not set + if len(crWithTelAppList) != 0 { + t.Errorf("expected crWithTelAppList to be empty if TelAppInstalled is not set") + } +} + // errorUpdateClient is a mock client that always returns an error on Update // Used for testing updateLastTransmissionTime error handling type errorUpdateClient struct { From 2dc1851f15d7ffb883ab071722ce76247c400872 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Thu, 5 Feb 2026 17:49:48 -0800 Subject: [PATCH 11/16] Fix unit test --- .../controller/telemetry_controller_test.go | 13 +--- pkg/splunk/enterprise/telemetry.go | 60 +++---------------- pkg/splunk/enterprise/telemetry_test.go | 27 ++++----- 3 files changed, 24 insertions(+), 76 deletions(-) diff --git a/internal/controller/telemetry_controller_test.go b/internal/controller/telemetry_controller_test.go index b15a5f787..7414a22d4 100644 --- a/internal/controller/telemetry_controller_test.go +++ b/internal/controller/telemetry_controller_test.go @@ -48,7 +48,7 @@ var _ = Describe("Telemetry Controller", func() { result, err := r.Reconcile(ctx, req) Expect(err).To(BeNil()) Expect(result.Requeue).To(BeTrue()) - Expect(result.RequeueAfter).To(Equal(time.Second * 60)) + Expect(result.RequeueAfter).To(Equal(time.Second * 600)) }) It("Reconcile returns requeue when ConfigMap has no data", func() { @@ -63,16 +63,7 @@ var _ = Describe("Telemetry Controller", func() { result, err := r.Reconcile(ctx, req) Expect(err).To(BeNil()) Expect(result.Requeue).To(BeTrue()) - Expect(result.RequeueAfter).To(Equal(time.Second * 60)) + Expect(result.RequeueAfter).To(Equal(time.Second * 600)) }) - // Additional tests for error and success cases can be added here }) - -/* -func TestTelemetryController(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "Telemetry Controller Suite") -} - -*/ diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go index 1eaf019a7..19afad1e3 100644 --- a/pkg/splunk/enterprise/telemetry.go +++ b/pkg/splunk/enterprise/telemetry.go @@ -306,44 +306,6 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl } } - var licenseMasterList enterpriseApiV3.LicenseMasterList - err = client.List(ctx, &licenseMasterList) - if err != nil { - scopedLog.Error(err, "Failed to list ClusterMaster objects") - } else if len(licenseMasterList.Items) > 0 { - var perKindData map[string]interface{} - perKindData = make(map[string]interface{}) - deploymentData[licenseMasterList.Items[0].Kind] = perKindData - for _, cr := range licenseMasterList.Items { - var crResourceData map[string]string - crResourceData = make(map[string]string) - perKindData[cr.GetName()] = crResourceData - collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) - if cr.Status.TelAppInstalled { - crWithTelAppList[licenseMasterList.Items[0].Kind] = append(crWithTelAppList[licenseMasterList.Items[0].Kind], &cr) - } - } - } - - var licenseManagerList enterpriseApi.LicenseManagerList - err = client.List(ctx, &licenseManagerList) - if err != nil { - scopedLog.Error(err, "Failed to list ClusterMaster objects") - } else if len(licenseManagerList.Items) > 0 { - var perKindData map[string]interface{} - perKindData = make(map[string]interface{}) - deploymentData[licenseManagerList.Items[0].Kind] = perKindData - for _, cr := range licenseManagerList.Items { - var crResourceData map[string]string - crResourceData = make(map[string]string) - perKindData[cr.GetName()] = crResourceData - collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) - if cr.Status.TelAppInstalled { - crWithTelAppList[licenseManagerList.Items[0].Kind] = append(crWithTelAppList[licenseManagerList.Items[0].Kind], &cr) - } - } - } - var mconsoleList enterpriseApi.MonitoringConsoleList err = client.List(ctx, &mconsoleList) if err != nil { @@ -393,22 +355,18 @@ func getCurrentStatus(ctx context.Context, cm *corev1.ConfigMap) *TelemetryStatu Test: defaultTestMode, SokVersion: defaultTestVersion, } - defaultStatus.LastTransmission = "" - defaultStatus.Test = "true" - if cm.Data != nil { - if val, ok := cm.Data[telStatusKey]; ok { - var status TelemetryStatus - err := json.Unmarshal([]byte(val), &status) - if err != nil { - scopedLog.Error(err, "Failed to unmarshal telemetry status") - return defaultStatus - } else { - return defaultStatus - } + if val, ok := cm.Data[telStatusKey]; ok { + var status TelemetryStatus + err := json.Unmarshal([]byte(val), &status) + if err != nil { + scopedLog.Error(err, "Failed to unmarshal telemetry status") + return defaultStatus + } else { + return &status } } - scopedLog.Info("Failed") + scopedLog.Info("No status set in configmap") return defaultStatus } diff --git a/pkg/splunk/enterprise/telemetry_test.go b/pkg/splunk/enterprise/telemetry_test.go index f591724c3..196baabd1 100644 --- a/pkg/splunk/enterprise/telemetry_test.go +++ b/pkg/splunk/enterprise/telemetry_test.go @@ -19,7 +19,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -func TestCollectResourceTelData_NilMaps(t *testing.T) { +func TestTelemetryCollectResourceTelData_NilMaps(t *testing.T) { data := make(map[string]string) collectResourceTelData(corev1.ResourceRequirements{}, data) if data[cpuRequestKey] == "" || data[memoryRequestKey] == "" || data[cpuLimitKey] == "" || data[memoryLimitKey] == "" { @@ -27,7 +27,7 @@ func TestCollectResourceTelData_NilMaps(t *testing.T) { } } -func TestCollectResourceTelData_MissingKeys(t *testing.T) { +func TestTelemetryCollectResourceTelData_MissingKeys(t *testing.T) { data := make(map[string]string) reqs := corev1.ResourceRequirements{ Requests: corev1.ResourceList{}, @@ -39,7 +39,7 @@ func TestCollectResourceTelData_MissingKeys(t *testing.T) { } } -func TestCollectResourceTelData_ValuesPresent(t *testing.T) { +func TestTelemetryCollectResourceTelData_ValuesPresent(t *testing.T) { data := make(map[string]string) reqs := corev1.ResourceRequirements{ Requests: corev1.ResourceList{ @@ -57,7 +57,7 @@ func TestCollectResourceTelData_ValuesPresent(t *testing.T) { } } -func TestCollectCMTelData_UnmarshalError(t *testing.T) { +func TestTelemetryCollectCMTelData_UnmarshalError(t *testing.T) { cm := &corev1.ConfigMap{Data: map[string]string{"bad": "notjson"}} data := make(map[string]interface{}) CollectCMTelData(context.TODO(), cm, data) @@ -66,7 +66,7 @@ func TestCollectCMTelData_UnmarshalError(t *testing.T) { } } -func TestCollectCMTelData_ValidJSON(t *testing.T) { +func TestTelemetryCollectCMTelData_ValidJSON(t *testing.T) { val := map[string]interface{}{"foo": "bar"} b, _ := json.Marshal(val) cm := &corev1.ConfigMap{Data: map[string]string{"good": string(b)}} @@ -77,28 +77,27 @@ func TestCollectCMTelData_ValidJSON(t *testing.T) { } } -func TestGetCurrentStatus_Default(t *testing.T) { +func TestTelemetryGetCurrentStatus_Default(t *testing.T) { cm := &corev1.ConfigMap{Data: nil} status := getCurrentStatus(context.TODO(), cm) - if status == nil || status.Test != "true" { + if status == nil || status.Test != defaultTestMode { t.Errorf("expected default status") } } -func TestGetCurrentStatus_UnmarshalError(t *testing.T) { +func TestTelemetryGetCurrentStatus_UnmarshalError(t *testing.T) { cm := &corev1.ConfigMap{Data: map[string]string{"status": "notjson"}} status := getCurrentStatus(context.TODO(), cm) - if status == nil || status.Test != "true" { + if status == nil || status.Test != defaultTestMode { t.Errorf("expected default status on unmarshal error") } } -func TestUpdateLastTransmissionTime_MarshalError(t *testing.T) { +func TestTelemetryUpdateLastTransmissionTime_MarshalError(t *testing.T) { ctx := context.TODO() cm := &corev1.ConfigMap{Data: map[string]string{}} - // Use a struct with a channel field to cause json.MarshalIndent to fail - // Should not panic - updateLastTransmissionTime(ctx, spltest.NewMockClient(), cm, (*TelemetryStatus)(nil)) // pass nil to avoid panic + status := &TelemetryStatus{Test: "false"} + updateLastTransmissionTime(ctx, spltest.NewMockClient(), cm, status) // pass nil to avoid panic } func TestSendTelemetry_UnknownKind(t *testing.T) { @@ -179,7 +178,7 @@ func TestTelemetryUpdateLastTransmissionTime_RepeatedCalls(t *testing.T) { } } -func TestCollectDeploymentTelData_AllKinds(t *testing.T) { +func TestTelemetryCollectDeploymentTelData_AllKinds(t *testing.T) { ctx := context.TODO() crs := map[string][]client.Object{ "Standalone": {&enterpriseApi.Standalone{TypeMeta: metav1.TypeMeta{Kind: "Standalone"}, ObjectMeta: metav1.ObjectMeta{Name: "standalone1"}, Spec: enterpriseApi.StandaloneSpec{CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{Spec: enterpriseApi.Spec{Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("1"), corev1.ResourceMemory: resource.MustParse("1Gi")}, Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("2"), corev1.ResourceMemory: resource.MustParse("2Gi")}}}}}}}, From 82480ef7885362da249f089ec7076c9441d6608d Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Thu, 5 Feb 2026 20:12:16 -0800 Subject: [PATCH 12/16] fix --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index a8c330696..46dc4e9bd 100644 --- a/Makefile +++ b/Makefile @@ -429,4 +429,5 @@ setup/ginkgo: build-installer: manifests generate kustomize mkdir -p dist cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default > dist/install.yaml From dc08f44a2000cf93daa5c9d15cb3c04dcdf11e63 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Fri, 6 Feb 2026 15:12:14 -0800 Subject: [PATCH 13/16] Address comment for renaming sok app and fix --- .github/workflows/pre-release-workflow.yml | 8 ---- .../manager/controller_manager_telemetry.yaml | 4 +- internal/controller/telemetry_controller.go | 11 +++++- pkg/splunk/enterprise/names.go | 8 ++-- pkg/splunk/enterprise/telemetry.go | 37 +++++++++++++++++-- 5 files changed, 48 insertions(+), 20 deletions(-) diff --git a/.github/workflows/pre-release-workflow.yml b/.github/workflows/pre-release-workflow.yml index 608c165b2..a78820763 100644 --- a/.github/workflows/pre-release-workflow.yml +++ b/.github/workflows/pre-release-workflow.yml @@ -240,14 +240,6 @@ jobs: isRegexp: true include: 'config/manager/controller_manager_telemetry.yaml' - - name: Update sokVersion in controller_manager_telemetry.yaml - uses: jacobtomlinson/gha-find-replace@v3 - with: - find: '"sokVersion"\s*:\s*"[^"]*"' - replace: '"sokVersion": "${{ github.event.inputs.release_version }}"' - isRegexp: true - include: 'config/manager/controller_manager_telemetry.yaml' - - name: Reset files before creating Pull Request run: | git checkout go.sum diff --git a/config/manager/controller_manager_telemetry.yaml b/config/manager/controller_manager_telemetry.yaml index b026c2c66..f3752dbd0 100644 --- a/config/manager/controller_manager_telemetry.yaml +++ b/config/manager/controller_manager_telemetry.yaml @@ -5,7 +5,7 @@ metadata: data: status: | { - "lastTransmission": "" - "test": "true" + "lastTransmission": "", + "test": "true", "sokVersion": "3.0.0" } \ No newline at end of file diff --git a/internal/controller/telemetry_controller.go b/internal/controller/telemetry_controller.go index 572b88a2e..cd23194a3 100644 --- a/internal/controller/telemetry_controller.go +++ b/internal/controller/telemetry_controller.go @@ -17,11 +17,11 @@ package controller import ( "context" + "fmt" enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise" ctrl "sigs.k8s.io/controller-runtime" "time" - "github.com/pkg/errors" metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics" corev1 "k8s.io/api/core/v1" @@ -58,6 +58,12 @@ func (r *TelemetryReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( reqLogger.Info("Reconciling telemetry") + defer func() { + if rec := recover(); rec != nil { + reqLogger.Error(fmt.Errorf("panic: %v", rec), "Recovered from panic in TelemetryReconciler.Reconcile") + } + }() + // Fetch the ConfigMap cm := &corev1.ConfigMap{} err := r.Get(ctx, req.NamespacedName, cm) @@ -66,7 +72,8 @@ func (r *TelemetryReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( reqLogger.Info("telemetry configmap not found; requeueing", "period(seconds)", int(telemetryRetryDelay/time.Second)) return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil } - return ctrl.Result{}, errors.Wrap(err, "could not load telemetry configmap") + reqLogger.Error(err, "could not load telemetry configmap; requeueing", "period(seconds)", int(telemetryRetryDelay/time.Second)) + return ctrl.Result{Requeue: true, RequeueAfter: telemetryRetryDelay}, nil } if len(cm.Data) == 0 { diff --git a/pkg/splunk/enterprise/names.go b/pkg/splunk/enterprise/names.go index f788edfe1..623f361f8 100644 --- a/pkg/splunk/enterprise/names.go +++ b/pkg/splunk/enterprise/names.go @@ -201,10 +201,10 @@ access = read : [ * ], write : [ admin ] ` // Command to create telemetry app on non SHC scenarios - createTelAppNonShcString = "mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s/default/; mkdir -p /opt/splunk/etc/apps/app_tel_for_sok8s/metadata/; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s/default/app.conf; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok8s/metadata/default.meta" + createTelAppNonShcString = "mkdir -p /opt/splunk/etc/apps/app_tel_for_sok/default/; mkdir -p /opt/splunk/etc/apps/app_tel_for_sok/metadata/; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok/default/app.conf; echo -e \"%s\" > /opt/splunk/etc/apps/app_tel_for_sok/metadata/default.meta" // Command to create telemetry app on SHC scenarios - createTelAppShcString = "mkdir -p %s/app_tel_for_sok8s/default/; mkdir -p %s/app_tel_for_sok8s/metadata/; echo -e \"%s\" > %s/app_tel_for_sok8s/default/app.conf; echo -e \"%s\" > %s/app_tel_for_sok8s/metadata/default.meta" + createTelAppShcString = "mkdir -p %s/app_tel_for_sok/default/; mkdir -p %s/app_tel_for_sok/metadata/; echo -e \"%s\" > %s/app_tel_for_sok/default/app.conf; echo -e \"%s\" > %s/app_tel_for_sok/metadata/default.meta" // Command to reload app configuration telAppReloadString = "curl -k -u admin:`cat /mnt/splunk-secrets/password` https://localhost:8089/services/apps/local/_reload" @@ -212,8 +212,8 @@ access = read : [ * ], write : [ admin ] // Name of the telemetry configmap: -manager-telemetry telConfigMapTemplateStr = "%smanager-telemetry" - // Name of the telemetry app: app_tel_for_sok8s - telAppNameStr = "app_tel_for_sok8s" + // Name of the telemetry app: app_tel_for_sok + telAppNameStr = "app_tel_for_sok" telSOKVersionKey = "version" telLicenseInfoKey = "license_info" diff --git a/pkg/splunk/enterprise/telemetry.go b/pkg/splunk/enterprise/telemetry.go index 19afad1e3..4b6082abf 100644 --- a/pkg/splunk/enterprise/telemetry.go +++ b/pkg/splunk/enterprise/telemetry.go @@ -58,8 +58,8 @@ func ApplyTelemetry(ctx context.Context, client splcommon.ControllerClient, cm * reqLogger := log.FromContext(ctx) scopedLog := reqLogger.WithName("ApplyTelemetry") - for k, v := range cm.Data { - scopedLog.Info("Retrieved telemetry keys", "key", k, "value", v) + for k, _ := range cm.Data { + scopedLog.Info("Retrieved telemetry keys", "key", k) } var data map[string]interface{} @@ -175,6 +175,7 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl var crWithTelAppList map[string][]splcommon.MetaObject crWithTelAppList = make(map[string][]splcommon.MetaObject) + scopedLog.Info("Start collecting deployment telemetry data") var err error var standaloneList enterpriseApi.StandaloneList err = client.List(ctx, &standaloneList) @@ -185,12 +186,15 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl perKindData = make(map[string]interface{}) deploymentData[standaloneList.Items[0].Kind] = perKindData for _, cr := range standaloneList.Items { + scopedLog.Info("Collecting data", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) var crResourceData map[string]string crResourceData = make(map[string]string) perKindData[cr.GetName()] = crResourceData collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[standaloneList.Items[0].Kind] = append(crWithTelAppList[standaloneList.Items[0].Kind], &cr) + } else { + scopedLog.Info("Telemetry app is not installed for this CR.", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) } } } @@ -204,12 +208,15 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl perKindData = make(map[string]interface{}) deploymentData[lmanagerList.Items[0].Kind] = perKindData for _, cr := range lmanagerList.Items { + scopedLog.Info("Collecting data", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) var crResourceData map[string]string crResourceData = make(map[string]string) perKindData[cr.GetName()] = crResourceData collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[lmanagerList.Items[0].Kind] = append(crWithTelAppList[lmanagerList.Items[0].Kind], &cr) + } else { + scopedLog.Info("Telemetry app is not installed for this CR.", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) } } } @@ -223,12 +230,16 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl perKindData = make(map[string]interface{}) deploymentData[lmasterList.Items[0].Kind] = perKindData for _, cr := range lmasterList.Items { + scopedLog.Info("Collecting data", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) + var crResourceData map[string]string crResourceData = make(map[string]string) perKindData[cr.GetName()] = crResourceData collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[lmasterList.Items[0].Kind] = append(crWithTelAppList[lmasterList.Items[0].Kind], &cr) + } else { + scopedLog.Info("Telemetry app is not installed for this CR.", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) } } } @@ -242,12 +253,16 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl perKindData = make(map[string]interface{}) deploymentData[shcList.Items[0].Kind] = perKindData for _, cr := range shcList.Items { + scopedLog.Info("Collecting data", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) + var crResourceData map[string]string crResourceData = make(map[string]string) perKindData[cr.GetName()] = crResourceData collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[shcList.Items[0].Kind] = append(crWithTelAppList[shcList.Items[0].Kind], &cr) + } else { + scopedLog.Info("Telemetry app is not installed for this CR.", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) } } } @@ -261,6 +276,8 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl perKindData = make(map[string]interface{}) deploymentData[idxList.Items[0].Kind] = perKindData for _, cr := range idxList.Items { + scopedLog.Info("Collecting data", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) + var crResourceData map[string]string crResourceData = make(map[string]string) perKindData[cr.GetName()] = crResourceData @@ -277,12 +294,16 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl perKindData = make(map[string]interface{}) deploymentData[cmanagerList.Items[0].Kind] = perKindData for _, cr := range cmanagerList.Items { + scopedLog.Info("Collecting data", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) + var crResourceData map[string]string crResourceData = make(map[string]string) perKindData[cr.GetName()] = crResourceData collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[cmanagerList.Items[0].Kind] = append(crWithTelAppList[cmanagerList.Items[0].Kind], &cr) + } else { + scopedLog.Info("Telemetry app is not installed for this CR.", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) } } } @@ -296,12 +317,16 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl perKindData = make(map[string]interface{}) deploymentData[cmasterList.Items[0].Kind] = perKindData for _, cr := range cmasterList.Items { + scopedLog.Info("Collecting data", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) + var crResourceData map[string]string crResourceData = make(map[string]string) perKindData[cr.GetName()] = crResourceData collectResourceTelData(cr.Spec.CommonSplunkSpec.Resources, crResourceData) if cr.Status.TelAppInstalled { crWithTelAppList[cmasterList.Items[0].Kind] = append(crWithTelAppList[cmasterList.Items[0].Kind], &cr) + } else { + scopedLog.Info("Telemetry app is not installed for this CR.", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) } } } @@ -315,6 +340,8 @@ func collectDeploymentTelData(ctx context.Context, client splcommon.ControllerCl perKindData = make(map[string]interface{}) deploymentData[mconsoleList.Items[0].Kind] = perKindData for _, cr := range mconsoleList.Items { + scopedLog.Info("Collecting data", "kind", cr.Kind, "name", cr.GetName(), "namespace", cr.GetNamespace()) + var crResourceData map[string]string crResourceData = make(map[string]string) perKindData[cr.GetName()] = crResourceData @@ -335,7 +362,7 @@ func CollectCMTelData(ctx context.Context, cm *corev1.ConfigMap, data map[string continue } var compData interface{} - scopedLog.Info("Processing telemetry input from other components", "key", key, "value", val) + scopedLog.Info("Processing telemetry input from other components", "key", key) err := json.Unmarshal([]byte(val), &compData) if err != nil { scopedLog.Info("Not able to unmarshal. Will include the input as string", "key", key, "value", val) @@ -359,9 +386,10 @@ func getCurrentStatus(ctx context.Context, cm *corev1.ConfigMap) *TelemetryStatu var status TelemetryStatus err := json.Unmarshal([]byte(val), &status) if err != nil { - scopedLog.Error(err, "Failed to unmarshal telemetry status") + scopedLog.Error(err, "Failed to unmarshal telemetry status", "value", val) return defaultStatus } else { + scopedLog.Info("Got current telemetry status from configmap", "status", status) return &status } } @@ -393,6 +421,7 @@ func SendTelemetry(ctx context.Context, client splcommon.ControllerClient, cr sp case "ClusterManager": instanceID = SplunkClusterManager default: + scopedLog.Error(fmt.Errorf("unknown CR kind"), "Failed to determine instance type for telemetry") return false } From 381df2c975b50e3ce16fb8573eda80d9a22e27e7 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Sat, 7 Feb 2026 21:11:05 -0800 Subject: [PATCH 14/16] fix int test --- test/testenv/verificationutils.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/testenv/verificationutils.go b/test/testenv/verificationutils.go index c451628ab..e8adc3a39 100644 --- a/test/testenv/verificationutils.go +++ b/test/testenv/verificationutils.go @@ -21,6 +21,7 @@ import ( "encoding/json" "fmt" "os/exec" + "sigs.k8s.io/controller-runtime/pkg/client" "strings" "time" @@ -1222,11 +1223,14 @@ func GetTelemetryLastSubmissionTime(ctx context.Context, deployment *Deployment) type telemetryStatus struct { LastTransmission string `json:"lastTransmission"` } - cm, err := deployment.GetConfigMap(ctx, configMapName) + + cm := &corev1.ConfigMap{} + err := deployment.testenv.GetKubeClient().Get(ctx, client.ObjectKey{Name: configMapName, Namespace: "splunk-operator"}, cm) if err != nil { logf.Log.Error(err, "GetTelemetryLastSubmissionTime: failed to retrieve configmap") return "" } + statusVal, ok := cm.Data[statusKey] if !ok || statusVal == "" { logf.Log.Info("GetTelemetryLastSubmissionTime: failed to retrieve status") From 0009f5078936f7cc64d113bf6801d876bf0a3021 Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Sun, 8 Feb 2026 10:03:43 -0800 Subject: [PATCH 15/16] fix int test --- .../manager/controller_manager_telemetry.yaml | 2 +- .../custom_resource_crud_c3_test.go | 3 +- .../custom_resource_crud_m4_test.go | 3 +- .../custom_resource_crud_s1_test.go | 3 +- test/testenv/verificationutils.go | 38 +++++++++++++++++++ 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/config/manager/controller_manager_telemetry.yaml b/config/manager/controller_manager_telemetry.yaml index f3752dbd0..54cefed83 100644 --- a/config/manager/controller_manager_telemetry.yaml +++ b/config/manager/controller_manager_telemetry.yaml @@ -7,5 +7,5 @@ data: { "lastTransmission": "", "test": "true", - "sokVersion": "3.0.0" + "sokVersion": "" } \ No newline at end of file diff --git a/test/custom_resource_crud/custom_resource_crud_c3_test.go b/test/custom_resource_crud/custom_resource_crud_c3_test.go index 8e62938d1..5d377d8dc 100644 --- a/test/custom_resource_crud/custom_resource_crud_c3_test.go +++ b/test/custom_resource_crud/custom_resource_crud_c3_test.go @@ -82,7 +82,8 @@ var _ = Describe("Crcrud test for SVA C3", func() { // Ensure Indexers go to Ready phase testenv.SingleSiteIndexersReady(ctx, deployment, testcaseEnvInst) - // Verify telemetry is sent successfully + // Verify telemetry + testenv.TriggerTelemetrySubmission(ctx, deployment) testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) // Deploy Monitoring Console CRD diff --git a/test/custom_resource_crud/custom_resource_crud_m4_test.go b/test/custom_resource_crud/custom_resource_crud_m4_test.go index 76938e3dd..887530f94 100644 --- a/test/custom_resource_crud/custom_resource_crud_m4_test.go +++ b/test/custom_resource_crud/custom_resource_crud_m4_test.go @@ -82,7 +82,8 @@ var _ = Describe("Crcrud test for SVA M4", func() { // Ensure search head cluster go to Ready phase testenv.SearchHeadClusterReady(ctx, deployment, testcaseEnvInst) - // Verify telemetry is sent successfully + // Verify telemetry + testenv.TriggerTelemetrySubmission(ctx, deployment) testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) // Deploy Monitoring Console CRD diff --git a/test/custom_resource_crud/custom_resource_crud_s1_test.go b/test/custom_resource_crud/custom_resource_crud_s1_test.go index ef3589171..2b7f1e1e6 100644 --- a/test/custom_resource_crud/custom_resource_crud_s1_test.go +++ b/test/custom_resource_crud/custom_resource_crud_s1_test.go @@ -72,7 +72,8 @@ var _ = Describe("Crcrud test for SVA S1", func() { // Verify Standalone goes to ready state testenv.StandaloneReady(ctx, deployment, deployment.GetName(), standalone, testcaseEnvInst) - // Verify telemetry is sent successfully + // Verify telemetry + testenv.TriggerTelemetrySubmission(ctx, deployment) testenv.VerifyTelemetry(ctx, deployment, prevTelemetrySubmissionTime) // Deploy Monitoring Console CRD diff --git a/test/testenv/verificationutils.go b/test/testenv/verificationutils.go index e8adc3a39..bb3e6c88c 100644 --- a/test/testenv/verificationutils.go +++ b/test/testenv/verificationutils.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "math/rand" "os/exec" "sigs.k8s.io/controller-runtime/pkg/client" "strings" @@ -1258,3 +1259,40 @@ func VerifyTelemetry(ctx context.Context, deployment *Deployment, prevVal string return false }, deployment.GetTimeout(), PollInterval).Should(gomega.Equal(true)) } + +// TriggerTelemetrySubmission updates or adds the 'test_submission' key in the telemetry ConfigMap with a JSON value containing a random number. +func TriggerTelemetrySubmission(ctx context.Context, deployment *Deployment) { + const ( + configMapName = "splunk-operator-manager-telemetry" + testKey = "test_submission" + ) + + // Generate a random number + rand.Seed(time.Now().UnixNano()) + randomNumber := rand.Intn(1000) + + // Create the JSON value + jsonValue, err := json.Marshal(map[string]int{"value": randomNumber}) + if err != nil { + logf.Log.Error(err, "Failed to marshal JSON value") + return + } + + // Update the ConfigMap + cm := &corev1.ConfigMap{} + err = deployment.testenv.GetKubeClient().Get(ctx, client.ObjectKey{Name: configMapName, Namespace: "splunk-operator"}, cm) + if err != nil { + logf.Log.Error(err, "Failed to get ConfigMap") + return + } + + // Update the test_submission key + cm.Data[testKey] = string(jsonValue) + err = deployment.testenv.GetKubeClient().Update(ctx, cm) + if err != nil { + logf.Log.Error(err, "Failed to update ConfigMap") + return + } + + logf.Log.Info("Successfully updated telemetry ConfigMap", "key", testKey, "value", jsonValue) +} From 9c8dcbd7457973391d033266636c90dd45f3f62c Mon Sep 17 00:00:00 2001 From: Minjie Qiu Date: Sun, 8 Feb 2026 16:22:21 -0800 Subject: [PATCH 16/16] Set version in make --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 46dc4e9bd..2a1a741f5 100644 --- a/Makefile +++ b/Makefile @@ -355,6 +355,7 @@ run_clair_scan: # generate artifacts needed to deploy operator, this is current way of doing it, need to fix this generate-artifacts-namespace: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. + $(SED) 's/\("sokVersion": \)"[^"]*"/\1"$(VERSION)"/' config/manager/controller_manager_telemetry.yaml mkdir -p release-${VERSION} cp config/default/kustomization-namespace.yaml config/default/kustomization.yaml cp config/rbac/kustomization-namespace.yaml config/rbac/kustomization.yaml @@ -370,6 +371,7 @@ generate-artifacts-namespace: manifests kustomize ## Deploy controller to the K8 # generate artifacts needed to deploy operator, this is current way of doing it, need to fix this generate-artifacts-cluster: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. + $(SED) 's/\("sokVersion": \)"[^"]*"/\1"$(VERSION)"/' config/manager/controller_manager_telemetry.yaml mkdir -p release-${VERSION} cp config/default/kustomization-cluster.yaml config/default/kustomization.yaml cp config/rbac/kustomization-cluster.yaml config/rbac/kustomization.yaml