From 7b08e23b00507971c46a7fc7187cbe5a27c851ae Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Fri, 12 Dec 2025 13:46:51 -0800 Subject: [PATCH 1/9] no error if no values Signed-off-by: Harper, Jason M --- cmd/telemetry/telemetry.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/telemetry/telemetry.go b/cmd/telemetry/telemetry.go index e3146332..a2f624e2 100644 --- a/cmd/telemetry/telemetry.go +++ b/cmd/telemetry/telemetry.go @@ -396,6 +396,9 @@ func summaryFromTableValues(allTableValues []table.TableValues, _ map[string]scr } func getMetricAverage(tableValues table.TableValues, fieldNames []string, separatorFieldName string) (average string) { + if len(tableValues.Fields) == 0 { + return "" + } sum, seps, err := getSumOfFields(tableValues.Fields, fieldNames, separatorFieldName) if err != nil { slog.Error("failed to get sum of fields for IO metrics", slog.String("error", err.Error())) From 7b3563b2c73e6edf5bad7a327d4b3519e8d0c199 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Fri, 12 Dec 2025 13:47:14 -0800 Subject: [PATCH 2/9] fail to set should result in exit error Signed-off-by: Harper, Jason M --- cmd/config/config.go | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/cmd/config/config.go b/cmd/config/config.go index 7508486e..6981b73a 100644 --- a/cmd/config/config.go +++ b/cmd/config/config.go @@ -179,8 +179,9 @@ func runCmd(cmd *cobra.Command, args []string) error { go setOnTarget(cmd, myTarget, flagGroups, localTempDir, channelError, multiSpinner.Status) } // wait for all targets to finish + var setOnTargetErr error for range myTargets { - <-channelError + setOnTargetErr = <-channelError } multiSpinner.Finish() fmt.Println() // blank line @@ -208,6 +209,12 @@ func runCmd(cmd *cobra.Command, args []string) error { return err } } + if setOnTargetErr != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", setOnTargetErr) + slog.Error(setOnTargetErr.Error()) + cmd.SilenceUsage = true + return setOnTargetErr + } return nil } @@ -239,6 +246,7 @@ func setOnTarget(cmd *cobra.Command, myTarget target.Target, flagGroups []flagGr } var statusMessages []string _ = statusUpdate(myTarget.GetName(), "updating configuration") + var setErrs []error // collect errors but continue setting other flags for _, group := range flagGroups { for _, flag := range group.flags { if flag.HasSetFunc() && cmd.Flags().Lookup(flag.GetName()).Changed { @@ -268,6 +276,7 @@ func setOnTarget(cmd *cobra.Command, myTarget target.Target, flagGroups []flagGr } } if setErr != nil { + setErrs = append(setErrs, setErr) slog.Error(setErr.Error()) statusMessages = append(statusMessages, errorMessage) } else { @@ -279,6 +288,15 @@ func setOnTarget(cmd *cobra.Command, myTarget target.Target, flagGroups []flagGr statusMessage := fmt.Sprintf("configuration update complete: %s", strings.Join(statusMessages, ", ")) slog.Info(statusMessage, slog.String("target", myTarget.GetName())) _ = statusUpdate(myTarget.GetName(), statusMessage) + // aggregate setErrs and send to channel + if len(setErrs) > 0 { + aggregateErrMessages := []string{} + for _, setErr := range setErrs { + aggregateErrMessages = append(aggregateErrMessages, setErr.Error()) + } + channelError <- fmt.Errorf("errors setting configuration on target %s: %s", myTarget.GetName(), strings.Join(aggregateErrMessages, "; ")) + return + } channelError <- nil } From 65251a04066fcec9fc57988b34584cdd9dabf1a7 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Fri, 12 Dec 2025 13:59:32 -0800 Subject: [PATCH 3/9] warn on unrecognized dimm formats Signed-off-by: Harper, Jason M --- cmd/report/dimm.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/report/dimm.go b/cmd/report/dimm.go index 3371997c..87fa821a 100644 --- a/cmd/report/dimm.go +++ b/cmd/report/dimm.go @@ -67,7 +67,7 @@ func installedMemoryFromOutput(outputs map[string]script.ScriptOutput) string { if match != nil { size, err := strconv.Atoi(match[1]) if err != nil { - slog.Error("Don't recognize DIMM size format.", slog.String("field", fields[1])) + slog.Warn("Don't recognize DIMM size format.", slog.String("field", fields[1])) return "" } sum := count * size @@ -87,7 +87,7 @@ func populatedChannelsFromOutput(outputs map[string]script.ScriptOutput) string dimmInfo := dimmInfoFromDmiDecode(outputs[script.DmidecodeScriptName].Stdout) derivedDimmFields := derivedDimmsFieldFromOutput(outputs) if len(derivedDimmFields) != len(dimmInfo) { - slog.Error("derivedDimmFields and dimmInfo have different lengths", slog.Int("derivedDimmFields", len(derivedDimmFields)), slog.Int("dimmInfo", len(dimmInfo))) + slog.Warn("derivedDimmFields and dimmInfo have different lengths", slog.Int("derivedDimmFields", len(derivedDimmFields)), slog.Int("dimmInfo", len(dimmInfo))) return "" } for i, dimm := range dimmInfo { @@ -126,26 +126,26 @@ func derivedDimmsFieldFromOutput(outputs map[string]script.ScriptOutput) []deriv if strings.Contains(platformVendor, "Dell") { derivedFields, err = deriveDIMMInfoDell(dimmInfo, numChannels) if err != nil { - slog.Info("failed to parse dimm info on Dell platform", slog.String("error", err.Error())) + slog.Warn("failed to parse dimm info on Dell platform", slog.String("error", err.Error())) } success = err == nil } else if platformVendor == "HPE" { derivedFields, err = deriveDIMMInfoHPE(dimmInfo, numSockets, numChannels) if err != nil { - slog.Info("failed to parse dimm info on HPE platform", slog.String("error", err.Error())) + slog.Warn("failed to parse dimm info on HPE platform", slog.String("error", err.Error())) } success = err == nil } else if platformVendor == "Amazon EC2" { derivedFields, err = deriveDIMMInfoEC2(dimmInfo, numChannels) if err != nil { - slog.Info("failed to parse dimm info on Amazon EC2 platform", slog.String("error", err.Error())) + slog.Warn("failed to parse dimm info on Amazon EC2 platform", slog.String("error", err.Error())) } success = err == nil } if !success { derivedFields, err = deriveDIMMInfoOther(dimmInfo, numChannels) if err != nil { - slog.Info("failed to parse dimm info on other platform", slog.String("error", err.Error())) + slog.Warn("failed to parse dimm info on other platform", slog.String("error", err.Error())) } } return derivedFields From e3ab986295d589bd833eba8c997810a8820cebcb Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Fri, 12 Dec 2025 14:22:06 -0800 Subject: [PATCH 4/9] warn on expected missing Signed-off-by: Harper, Jason M --- cmd/telemetry/telemetry_tables.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/telemetry/telemetry_tables.go b/cmd/telemetry/telemetry_tables.go index f718485e..18ca6850 100644 --- a/cmd/telemetry/telemetry_tables.go +++ b/cmd/telemetry/telemetry_tables.go @@ -406,7 +406,7 @@ func temperatureTelemetryTableValues(outputs map[string]script.ScriptOutput) []t } platformRows, err := common.TurbostatPlatformRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"CoreTmp"}) if err != nil { - slog.Error(err.Error()) + slog.Warn(err.Error()) // not all systems report core temperature, e.g., cloud VMs return []table.Field{} } packageRows, err := common.TurbostatPackageRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"PkgTmp"}) From 9ed36a12c33002cca236bef72d1aca9c4851937a Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Sat, 13 Dec 2025 07:48:01 -0800 Subject: [PATCH 5/9] warn, not error, on fields that may not be present Signed-off-by: Harper, Jason M --- cmd/telemetry/telemetry_tables.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/telemetry/telemetry_tables.go b/cmd/telemetry/telemetry_tables.go index 18ca6850..97fd1bfe 100644 --- a/cmd/telemetry/telemetry_tables.go +++ b/cmd/telemetry/telemetry_tables.go @@ -373,7 +373,7 @@ func powerTelemetryTableValues(outputs map[string]script.ScriptOutput) []table.F } packageRows, err := common.TurbostatPackageRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"PkgWatt", "RAMWatt"}) if err != nil { - slog.Error(err.Error()) + slog.Warn(err.Error()) return []table.Field{} } for i := range packageRows { @@ -511,7 +511,7 @@ func c6TelemetryTableValues(outputs map[string]script.ScriptOutput) []table.Fiel } platformRows, err := common.TurbostatPlatformRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"C6%", "CPU%c6"}) if err != nil { - slog.Error(err.Error()) + slog.Warn(err.Error()) return []table.Field{} } if len(platformRows) == 0 { From afd4602296bd7db89ac152f3aca62556c4b0aa08 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Sat, 13 Dec 2025 08:09:03 -0800 Subject: [PATCH 6/9] improve log message Signed-off-by: Harper, Jason M --- internal/common/turbostat.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/common/turbostat.go b/internal/common/turbostat.go index 679aebfa..54b6c77a 100644 --- a/internal/common/turbostat.go +++ b/internal/common/turbostat.go @@ -102,8 +102,8 @@ func TurbostatPlatformRows(turboStatScriptOutput string, fieldNames []string) ([ return nil, err } if len(rows) == 0 { - err := fmt.Errorf("turbostat output is empty") - return nil, err + slog.Warn("no platform rows found in turbostat output") + return nil, nil } // filter the rows to the summary rows only var fieldValues [][]string @@ -155,8 +155,8 @@ func TurbostatPackageRows(turboStatScriptOutput string, fieldNames []string) ([] return nil, err } if len(rows) == 0 { - err := fmt.Errorf("turbostat output is empty") - return nil, err + slog.Warn("no package rows found in turbostat output") + return nil, nil } var packageRows [][][]string for _, row := range rows { From 2ae5d6b803de55336e964ad6907e9f1ff2d98fdf Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Sat, 13 Dec 2025 08:29:46 -0800 Subject: [PATCH 7/9] fix turbostat test Signed-off-by: Harper, Jason M --- internal/common/turbostat_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/common/turbostat_test.go b/internal/common/turbostat_test.go index 540097f1..eb12ebca 100644 --- a/internal/common/turbostat_test.go +++ b/internal/common/turbostat_test.go @@ -98,7 +98,7 @@ func TestTurbostatPlatformRows(t *testing.T) { fieldNames: []string{"Avg_MHz", "Busy%"}, wantFirst: nil, wantLen: 0, - expectErr: true, + expectErr: false, }, { name: "No output", @@ -106,7 +106,7 @@ func TestTurbostatPlatformRows(t *testing.T) { fieldNames: []string{"Avg_MHz", "Busy%"}, wantFirst: nil, wantLen: 0, - expectErr: true, + expectErr: false, }, { name: "Only time and interval, no turbostat data", @@ -114,7 +114,7 @@ func TestTurbostatPlatformRows(t *testing.T) { fieldNames: []string{"Avg_MHz", "Busy%"}, wantFirst: nil, wantLen: 0, - expectErr: true, + expectErr: false, }, } @@ -547,7 +547,7 @@ X 0 0 1000 10 turbostatOutput: "", fieldNames: []string{"Avg_MHz"}, want: nil, - wantErr: true, + wantErr: false, }, { name: "Only headers, no data", @@ -558,7 +558,7 @@ Package Core CPU Avg_MHz Busy% `, fieldNames: []string{"Avg_MHz"}, want: nil, - wantErr: true, + wantErr: false, }, } From df372b7fc7032542dfa67072f9f1b7c3cc015fe1 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Sun, 14 Dec 2025 07:28:00 -0800 Subject: [PATCH 8/9] replace error logs with warnings for better clarity in frequency and telemetry outputs Signed-off-by: Harper, Jason M --- cmd/config/config_tables.go | 4 ++-- cmd/report/report_tables.go | 2 +- cmd/telemetry/telemetry_tables.go | 4 ++-- internal/common/cache.go | 6 +++--- internal/common/frequency.go | 2 +- internal/script/script.go | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cmd/config/config_tables.go b/cmd/config/config_tables.go index f3b73b84..12157b09 100644 --- a/cmd/config/config_tables.go +++ b/cmd/config/config_tables.go @@ -144,10 +144,10 @@ func configurationTableValues(outputs map[string]script.ScriptOutput) []table.Fi func l3InstanceFromOutput(outputs map[string]script.ScriptOutput) string { l3InstanceMB, _, err := common.GetL3MSRMB(outputs) if err != nil { - slog.Info("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error())) + slog.Debug("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error())) l3InstanceMB, _, err = common.GetL3LscpuMB(outputs) if err != nil { - slog.Error("Could not get L3 size from lscpu", slog.String("error", err.Error())) + slog.Warn("Could not get L3 size from lscpu", slog.String("error", err.Error())) return "" } } diff --git a/cmd/report/report_tables.go b/cmd/report/report_tables.go index 4081b86d..6e83e1fc 100644 --- a/cmd/report/report_tables.go +++ b/cmd/report/report_tables.go @@ -1797,7 +1797,7 @@ func frequencyBenchmarkTableValues(outputs map[string]script.ScriptOutput) []tab // get the sse, avx256, and avx512 frequencies from the avx-turbo output instructionFreqs, err := avxTurboFrequenciesFromOutput(outputs[script.FrequencyBenchmarkScriptName].Stdout) if err != nil { - slog.Error("unable to get avx turbo frequencies", slog.String("error", err.Error())) + slog.Warn("unable to get avx turbo frequencies", slog.String("error", err.Error())) return []table.Field{} } // we're expecting scalar_iadd, avx256_fma, avx512_fma diff --git a/cmd/telemetry/telemetry_tables.go b/cmd/telemetry/telemetry_tables.go index 97fd1bfe..03f6ce0e 100644 --- a/cmd/telemetry/telemetry_tables.go +++ b/cmd/telemetry/telemetry_tables.go @@ -446,7 +446,7 @@ func frequencyTelemetryTableValues(outputs map[string]script.ScriptOutput) []tab } platformRows, err := common.TurbostatPlatformRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"Bzy_MHz"}) if err != nil { - slog.Error(err.Error()) + slog.Warn(err.Error()) return []table.Field{} } packageRows, err := common.TurbostatPackageRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"UncMHz"}) @@ -486,7 +486,7 @@ func ipcTelemetryTableValues(outputs map[string]script.ScriptOutput) []table.Fie } platformRows, err := common.TurbostatPlatformRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"IPC"}) if err != nil { - slog.Error(err.Error()) + slog.Warn(err.Error()) return []table.Field{} } if len(platformRows) == 0 { diff --git a/internal/common/cache.go b/internal/common/cache.go index adf5a63f..fb7ccd6d 100644 --- a/internal/common/cache.go +++ b/internal/common/cache.go @@ -95,10 +95,10 @@ func GetL3LscpuMB(outputs map[string]script.ScriptOutput) (instance float64, tot func L3FromOutput(outputs map[string]script.ScriptOutput) string { l3InstanceMB, l3TotalMB, err := GetL3MSRMB(outputs) if err != nil { - slog.Info("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error())) + slog.Debug("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error())) l3InstanceMB, l3TotalMB, err = GetL3LscpuMB(outputs) if err != nil { - slog.Error("Could not get L3 size from lscpu", slog.String("error", err.Error())) + slog.Warn("Could not get L3 size from lscpu", slog.String("error", err.Error())) return "" } } @@ -143,7 +143,7 @@ func L3PerCoreFromOutput(outputs map[string]script.ScriptOutput) string { slog.Debug("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error())) _, l3TotalMB, err = GetL3LscpuMB(outputs) if err != nil { - slog.Error("Could not get L3 size from lscpu", slog.String("error", err.Error())) + slog.Warn("Could not get L3 size from lscpu", slog.String("error", err.Error())) return "" } } diff --git a/internal/common/frequency.go b/internal/common/frequency.go index 07b91012..54bd9ac5 100644 --- a/internal/common/frequency.go +++ b/internal/common/frequency.go @@ -359,7 +359,7 @@ func UncoreMinMaxDieFrequencyFromOutput(maxFreq bool, computeDie bool, outputs m } } if !found { - slog.Error("failed to find uncore die type in TPMI output", slog.String("output", outputs[script.UncoreDieTypesFromTPMIScriptName].Stdout)) + slog.Warn("failed to find uncore die type in TPMI output", slog.String("output", outputs[script.UncoreDieTypesFromTPMIScriptName].Stdout)) return "" } // get the frequency for the found die diff --git a/internal/script/script.go b/internal/script/script.go index 368b7a5f..f753fa65 100644 --- a/internal/script/script.go +++ b/internal/script/script.go @@ -161,7 +161,7 @@ func RunScripts(myTarget target.Target, scripts []ScriptDefinition, ignoreScript } stdout, stderr, exitcode, err := myTarget.RunCommand(cmd, 0, false) if err != nil { - slog.Error("error running script on target", slog.String("script", script.ScriptTemplate), slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode), slog.String("error", err.Error())) + slog.Warn("error running script on target", slog.String("name", script.Name), slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode), slog.String("error", err.Error())) } scriptOutputs[script.Name] = ScriptOutput{ScriptDefinition: script, Stdout: stdout, Stderr: stderr, Exitcode: exitcode} if !ignoreScriptErrors { From bb5092d3fc3b7d91f55f9bb560fc13ec8398c2ee Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Sun, 14 Dec 2025 08:29:39 -0800 Subject: [PATCH 9/9] replace error logs with warnings for improved clarity in event parsing and benchmark reporting Signed-off-by: Harper, Jason M --- cmd/metrics/event_frame.go | 2 +- cmd/report/benchmarking.go | 7 +++---- cmd/report/report_tables.go | 12 ++---------- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/cmd/metrics/event_frame.go b/cmd/metrics/event_frame.go index b5d2cc38..d756afe5 100644 --- a/cmd/metrics/event_frame.go +++ b/cmd/metrics/event_frame.go @@ -132,7 +132,7 @@ func parseEvents(rawEvents [][]byte) ([]Event, error) { var event Event if err := json.Unmarshal(rawEvent, &event); err != nil { err = fmt.Errorf("unrecognized event format: %w", err) - slog.Error(err.Error(), slog.String("event", string(rawEvent))) + slog.Warn(err.Error(), slog.String("event", string(rawEvent))) return nil, err } // sometimes perf will prepend "cpu/" to the topdown event names, e.g., cpu/topdown-retiring/ to x86 events, and diff --git a/cmd/report/benchmarking.go b/cmd/report/benchmarking.go index 41fa303a..10a8327e 100644 --- a/cmd/report/benchmarking.go +++ b/cmd/report/benchmarking.go @@ -122,7 +122,9 @@ func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string { func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, error) { output := outputs[script.StorageBenchmarkScriptName].Stdout - + if output == "" { + return fioOutput{}, fmt.Errorf("no output from storage benchmark") + } if strings.Contains(output, "ERROR:") { return fioOutput{}, fmt.Errorf("failed to run storage benchmark: %s", output) } @@ -134,8 +136,6 @@ func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, e slog.Info("fio output snip", "output", output[:outputLen], "stderr", outputs[script.StorageBenchmarkScriptName].Stderr) return fioOutput{}, fmt.Errorf("unable to find fio output") } - - slog.Debug("parsing storage benchmark output") var fioData fioOutput if err := json.Unmarshal([]byte(output), &fioData); err != nil { return fioOutput{}, fmt.Errorf("error unmarshalling JSON: %w", err) @@ -143,7 +143,6 @@ func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, e if len(fioData.Jobs) == 0 { return fioOutput{}, fmt.Errorf("no jobs found in storage benchmark output") } - return fioData, nil } diff --git a/cmd/report/report_tables.go b/cmd/report/report_tables.go index 6e83e1fc..fc3dceaa 100644 --- a/cmd/report/report_tables.go +++ b/cmd/report/report_tables.go @@ -1806,7 +1806,7 @@ func frequencyBenchmarkTableValues(outputs map[string]script.ScriptOutput) []tab avx512FmaFreqs := instructionFreqs["avx512_fma"] // stop if we don't have any scalar_iadd frequencies if len(scalarIaddFreqs) == 0 { - slog.Error("no scalar_iadd frequencies found") + slog.Warn("no scalar_iadd frequencies found") return []table.Field{} } // get the spec core frequencies from the spec output @@ -1976,14 +1976,9 @@ func formatOrEmpty(format string, value any) string { func storageBenchmarkTableValues(outputs map[string]script.ScriptOutput) []table.Field { fioData, err := storagePerfFromOutput(outputs) if err != nil { - slog.Error("failed to get storage benchmark data", slog.String("error", err.Error())) + slog.Warn("failed to get storage benchmark data", slog.String("error", err.Error())) return []table.Field{} } - - if len(fioData.Jobs) == 0 { - return []table.Field{} - } - // Initialize the fields for metrics (column headers) fields := []table.Field{ {Name: "Job"}, @@ -1994,9 +1989,7 @@ func storageBenchmarkTableValues(outputs map[string]script.ScriptOutput) []table {Name: "Write IOPs"}, {Name: "Write Bandwidth (MiB/s)"}, } - // For each FIO job, create a new row and populate its values - slog.Debug("fioData", slog.Any("jobs", fioData.Jobs)) for _, job := range fioData.Jobs { fields[0].Values = append(fields[0].Values, job.Jobname) fields[1].Values = append(fields[1].Values, formatOrEmpty("%.0f", job.Read.LatNs.Mean/1000)) @@ -2006,7 +1999,6 @@ func storageBenchmarkTableValues(outputs map[string]script.ScriptOutput) []table fields[5].Values = append(fields[5].Values, formatOrEmpty("%.0f", job.Write.IopsMean)) fields[6].Values = append(fields[6].Values, formatOrEmpty("%d", job.Write.Bw/1024)) } - return fields }