Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion cmd/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,9 @@ func runCmd(cmd *cobra.Command, args []string) error {
go setOnTarget(cmd, myTarget, flagGroups, localTempDir, channelError, multiSpinner.Status)
}
// wait for all targets to finish
var setOnTargetErr error
for range myTargets {
<-channelError
setOnTargetErr = <-channelError
}
multiSpinner.Finish()
fmt.Println() // blank line
Expand Down Expand Up @@ -208,6 +209,12 @@ func runCmd(cmd *cobra.Command, args []string) error {
return err
}
}
if setOnTargetErr != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", setOnTargetErr)
slog.Error(setOnTargetErr.Error())
cmd.SilenceUsage = true
return setOnTargetErr
}
return nil
}

Expand Down Expand Up @@ -239,6 +246,7 @@ func setOnTarget(cmd *cobra.Command, myTarget target.Target, flagGroups []flagGr
}
var statusMessages []string
_ = statusUpdate(myTarget.GetName(), "updating configuration")
var setErrs []error // collect errors but continue setting other flags
for _, group := range flagGroups {
for _, flag := range group.flags {
if flag.HasSetFunc() && cmd.Flags().Lookup(flag.GetName()).Changed {
Expand Down Expand Up @@ -268,6 +276,7 @@ func setOnTarget(cmd *cobra.Command, myTarget target.Target, flagGroups []flagGr
}
}
if setErr != nil {
setErrs = append(setErrs, setErr)
slog.Error(setErr.Error())
statusMessages = append(statusMessages, errorMessage)
} else {
Expand All @@ -279,6 +288,15 @@ func setOnTarget(cmd *cobra.Command, myTarget target.Target, flagGroups []flagGr
statusMessage := fmt.Sprintf("configuration update complete: %s", strings.Join(statusMessages, ", "))
slog.Info(statusMessage, slog.String("target", myTarget.GetName()))
_ = statusUpdate(myTarget.GetName(), statusMessage)
// aggregate setErrs and send to channel
if len(setErrs) > 0 {
aggregateErrMessages := []string{}
for _, setErr := range setErrs {
aggregateErrMessages = append(aggregateErrMessages, setErr.Error())
}
channelError <- fmt.Errorf("errors setting configuration on target %s: %s", myTarget.GetName(), strings.Join(aggregateErrMessages, "; "))
return
}
channelError <- nil
}

Expand Down
4 changes: 2 additions & 2 deletions cmd/config/config_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,10 @@ func configurationTableValues(outputs map[string]script.ScriptOutput) []table.Fi
func l3InstanceFromOutput(outputs map[string]script.ScriptOutput) string {
l3InstanceMB, _, err := common.GetL3MSRMB(outputs)
if err != nil {
slog.Info("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error()))
slog.Debug("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error()))
l3InstanceMB, _, err = common.GetL3LscpuMB(outputs)
if err != nil {
slog.Error("Could not get L3 size from lscpu", slog.String("error", err.Error()))
slog.Warn("Could not get L3 size from lscpu", slog.String("error", err.Error()))
return ""
}
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/metrics/event_frame.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func parseEvents(rawEvents [][]byte) ([]Event, error) {
var event Event
if err := json.Unmarshal(rawEvent, &event); err != nil {
err = fmt.Errorf("unrecognized event format: %w", err)
slog.Error(err.Error(), slog.String("event", string(rawEvent)))
slog.Warn(err.Error(), slog.String("event", string(rawEvent)))
return nil, err
}
// sometimes perf will prepend "cpu/" to the topdown event names, e.g., cpu/topdown-retiring/ to x86 events, and
Expand Down
7 changes: 3 additions & 4 deletions cmd/report/benchmarking.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ func cpuSpeedFromOutput(outputs map[string]script.ScriptOutput) string {

func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, error) {
output := outputs[script.StorageBenchmarkScriptName].Stdout

if output == "" {
return fioOutput{}, fmt.Errorf("no output from storage benchmark")
}
if strings.Contains(output, "ERROR:") {
return fioOutput{}, fmt.Errorf("failed to run storage benchmark: %s", output)
}
Expand All @@ -134,16 +136,13 @@ func storagePerfFromOutput(outputs map[string]script.ScriptOutput) (fioOutput, e
slog.Info("fio output snip", "output", output[:outputLen], "stderr", outputs[script.StorageBenchmarkScriptName].Stderr)
return fioOutput{}, fmt.Errorf("unable to find fio output")
}

slog.Debug("parsing storage benchmark output")
var fioData fioOutput
if err := json.Unmarshal([]byte(output), &fioData); err != nil {
return fioOutput{}, fmt.Errorf("error unmarshalling JSON: %w", err)
}
if len(fioData.Jobs) == 0 {
return fioOutput{}, fmt.Errorf("no jobs found in storage benchmark output")
}

return fioData, nil
}

Expand Down
12 changes: 6 additions & 6 deletions cmd/report/dimm.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func installedMemoryFromOutput(outputs map[string]script.ScriptOutput) string {
if match != nil {
size, err := strconv.Atoi(match[1])
if err != nil {
slog.Error("Don't recognize DIMM size format.", slog.String("field", fields[1]))
slog.Warn("Don't recognize DIMM size format.", slog.String("field", fields[1]))
return ""
}
sum := count * size
Expand All @@ -87,7 +87,7 @@ func populatedChannelsFromOutput(outputs map[string]script.ScriptOutput) string
dimmInfo := dimmInfoFromDmiDecode(outputs[script.DmidecodeScriptName].Stdout)
derivedDimmFields := derivedDimmsFieldFromOutput(outputs)
if len(derivedDimmFields) != len(dimmInfo) {
slog.Error("derivedDimmFields and dimmInfo have different lengths", slog.Int("derivedDimmFields", len(derivedDimmFields)), slog.Int("dimmInfo", len(dimmInfo)))
slog.Warn("derivedDimmFields and dimmInfo have different lengths", slog.Int("derivedDimmFields", len(derivedDimmFields)), slog.Int("dimmInfo", len(dimmInfo)))
return ""
}
for i, dimm := range dimmInfo {
Expand Down Expand Up @@ -126,26 +126,26 @@ func derivedDimmsFieldFromOutput(outputs map[string]script.ScriptOutput) []deriv
if strings.Contains(platformVendor, "Dell") {
derivedFields, err = deriveDIMMInfoDell(dimmInfo, numChannels)
if err != nil {
slog.Info("failed to parse dimm info on Dell platform", slog.String("error", err.Error()))
slog.Warn("failed to parse dimm info on Dell platform", slog.String("error", err.Error()))
}
success = err == nil
} else if platformVendor == "HPE" {
derivedFields, err = deriveDIMMInfoHPE(dimmInfo, numSockets, numChannels)
if err != nil {
slog.Info("failed to parse dimm info on HPE platform", slog.String("error", err.Error()))
slog.Warn("failed to parse dimm info on HPE platform", slog.String("error", err.Error()))
}
success = err == nil
} else if platformVendor == "Amazon EC2" {
derivedFields, err = deriveDIMMInfoEC2(dimmInfo, numChannels)
if err != nil {
slog.Info("failed to parse dimm info on Amazon EC2 platform", slog.String("error", err.Error()))
slog.Warn("failed to parse dimm info on Amazon EC2 platform", slog.String("error", err.Error()))
}
success = err == nil
}
if !success {
derivedFields, err = deriveDIMMInfoOther(dimmInfo, numChannels)
if err != nil {
slog.Info("failed to parse dimm info on other platform", slog.String("error", err.Error()))
slog.Warn("failed to parse dimm info on other platform", slog.String("error", err.Error()))
}
}
return derivedFields
Expand Down
14 changes: 3 additions & 11 deletions cmd/report/report_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -1797,7 +1797,7 @@ func frequencyBenchmarkTableValues(outputs map[string]script.ScriptOutput) []tab
// get the sse, avx256, and avx512 frequencies from the avx-turbo output
instructionFreqs, err := avxTurboFrequenciesFromOutput(outputs[script.FrequencyBenchmarkScriptName].Stdout)
if err != nil {
slog.Error("unable to get avx turbo frequencies", slog.String("error", err.Error()))
slog.Warn("unable to get avx turbo frequencies", slog.String("error", err.Error()))
return []table.Field{}
}
// we're expecting scalar_iadd, avx256_fma, avx512_fma
Expand All @@ -1806,7 +1806,7 @@ func frequencyBenchmarkTableValues(outputs map[string]script.ScriptOutput) []tab
avx512FmaFreqs := instructionFreqs["avx512_fma"]
// stop if we don't have any scalar_iadd frequencies
if len(scalarIaddFreqs) == 0 {
slog.Error("no scalar_iadd frequencies found")
slog.Warn("no scalar_iadd frequencies found")
return []table.Field{}
}
// get the spec core frequencies from the spec output
Expand Down Expand Up @@ -1976,14 +1976,9 @@ func formatOrEmpty(format string, value any) string {
func storageBenchmarkTableValues(outputs map[string]script.ScriptOutput) []table.Field {
fioData, err := storagePerfFromOutput(outputs)
if err != nil {
slog.Error("failed to get storage benchmark data", slog.String("error", err.Error()))
slog.Warn("failed to get storage benchmark data", slog.String("error", err.Error()))
return []table.Field{}
}

if len(fioData.Jobs) == 0 {
return []table.Field{}
}

// Initialize the fields for metrics (column headers)
fields := []table.Field{
{Name: "Job"},
Expand All @@ -1994,9 +1989,7 @@ func storageBenchmarkTableValues(outputs map[string]script.ScriptOutput) []table
{Name: "Write IOPs"},
{Name: "Write Bandwidth (MiB/s)"},
}

// For each FIO job, create a new row and populate its values
slog.Debug("fioData", slog.Any("jobs", fioData.Jobs))
for _, job := range fioData.Jobs {
fields[0].Values = append(fields[0].Values, job.Jobname)
fields[1].Values = append(fields[1].Values, formatOrEmpty("%.0f", job.Read.LatNs.Mean/1000))
Expand All @@ -2006,7 +1999,6 @@ func storageBenchmarkTableValues(outputs map[string]script.ScriptOutput) []table
fields[5].Values = append(fields[5].Values, formatOrEmpty("%.0f", job.Write.IopsMean))
fields[6].Values = append(fields[6].Values, formatOrEmpty("%d", job.Write.Bw/1024))
}

return fields
}

Expand Down
3 changes: 3 additions & 0 deletions cmd/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,9 @@ func summaryFromTableValues(allTableValues []table.TableValues, _ map[string]scr
}

func getMetricAverage(tableValues table.TableValues, fieldNames []string, separatorFieldName string) (average string) {
if len(tableValues.Fields) == 0 {
return ""
}
sum, seps, err := getSumOfFields(tableValues.Fields, fieldNames, separatorFieldName)
if err != nil {
slog.Error("failed to get sum of fields for IO metrics", slog.String("error", err.Error()))
Expand Down
10 changes: 5 additions & 5 deletions cmd/telemetry/telemetry_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ func powerTelemetryTableValues(outputs map[string]script.ScriptOutput) []table.F
}
packageRows, err := common.TurbostatPackageRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"PkgWatt", "RAMWatt"})
if err != nil {
slog.Error(err.Error())
slog.Warn(err.Error())
return []table.Field{}
}
for i := range packageRows {
Expand Down Expand Up @@ -406,7 +406,7 @@ func temperatureTelemetryTableValues(outputs map[string]script.ScriptOutput) []t
}
platformRows, err := common.TurbostatPlatformRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"CoreTmp"})
if err != nil {
slog.Error(err.Error())
slog.Warn(err.Error()) // not all systems report core temperature, e.g., cloud VMs
return []table.Field{}
}
packageRows, err := common.TurbostatPackageRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"PkgTmp"})
Expand Down Expand Up @@ -446,7 +446,7 @@ func frequencyTelemetryTableValues(outputs map[string]script.ScriptOutput) []tab
}
platformRows, err := common.TurbostatPlatformRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"Bzy_MHz"})
if err != nil {
slog.Error(err.Error())
slog.Warn(err.Error())
return []table.Field{}
}
packageRows, err := common.TurbostatPackageRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"UncMHz"})
Expand Down Expand Up @@ -486,7 +486,7 @@ func ipcTelemetryTableValues(outputs map[string]script.ScriptOutput) []table.Fie
}
platformRows, err := common.TurbostatPlatformRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"IPC"})
if err != nil {
slog.Error(err.Error())
slog.Warn(err.Error())
return []table.Field{}
}
if len(platformRows) == 0 {
Expand All @@ -511,7 +511,7 @@ func c6TelemetryTableValues(outputs map[string]script.ScriptOutput) []table.Fiel
}
platformRows, err := common.TurbostatPlatformRows(outputs[script.TurbostatTelemetryScriptName].Stdout, []string{"C6%", "CPU%c6"})
if err != nil {
slog.Error(err.Error())
slog.Warn(err.Error())
return []table.Field{}
}
if len(platformRows) == 0 {
Expand Down
6 changes: 3 additions & 3 deletions internal/common/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ func GetL3LscpuMB(outputs map[string]script.ScriptOutput) (instance float64, tot
func L3FromOutput(outputs map[string]script.ScriptOutput) string {
l3InstanceMB, l3TotalMB, err := GetL3MSRMB(outputs)
if err != nil {
slog.Info("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error()))
slog.Debug("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error()))
l3InstanceMB, l3TotalMB, err = GetL3LscpuMB(outputs)
if err != nil {
slog.Error("Could not get L3 size from lscpu", slog.String("error", err.Error()))
slog.Warn("Could not get L3 size from lscpu", slog.String("error", err.Error()))
return ""
}
}
Expand Down Expand Up @@ -143,7 +143,7 @@ func L3PerCoreFromOutput(outputs map[string]script.ScriptOutput) string {
slog.Debug("Could not get L3 size from MSR, falling back to lscpu", slog.String("error", err.Error()))
_, l3TotalMB, err = GetL3LscpuMB(outputs)
if err != nil {
slog.Error("Could not get L3 size from lscpu", slog.String("error", err.Error()))
slog.Warn("Could not get L3 size from lscpu", slog.String("error", err.Error()))
return ""
}
}
Expand Down
2 changes: 1 addition & 1 deletion internal/common/frequency.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ func UncoreMinMaxDieFrequencyFromOutput(maxFreq bool, computeDie bool, outputs m
}
}
if !found {
slog.Error("failed to find uncore die type in TPMI output", slog.String("output", outputs[script.UncoreDieTypesFromTPMIScriptName].Stdout))
slog.Warn("failed to find uncore die type in TPMI output", slog.String("output", outputs[script.UncoreDieTypesFromTPMIScriptName].Stdout))
return ""
}
// get the frequency for the found die
Expand Down
8 changes: 4 additions & 4 deletions internal/common/turbostat.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ func TurbostatPlatformRows(turboStatScriptOutput string, fieldNames []string) ([
return nil, err
}
if len(rows) == 0 {
err := fmt.Errorf("turbostat output is empty")
return nil, err
slog.Warn("no platform rows found in turbostat output")
return nil, nil
}
// filter the rows to the summary rows only
var fieldValues [][]string
Expand Down Expand Up @@ -155,8 +155,8 @@ func TurbostatPackageRows(turboStatScriptOutput string, fieldNames []string) ([]
return nil, err
}
if len(rows) == 0 {
err := fmt.Errorf("turbostat output is empty")
return nil, err
slog.Warn("no package rows found in turbostat output")
return nil, nil
}
var packageRows [][][]string
for _, row := range rows {
Expand Down
10 changes: 5 additions & 5 deletions internal/common/turbostat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,23 +98,23 @@ func TestTurbostatPlatformRows(t *testing.T) {
fieldNames: []string{"Avg_MHz", "Busy%"},
wantFirst: nil,
wantLen: 0,
expectErr: true,
expectErr: false,
},
{
name: "No output",
turbostatOutput: "",
fieldNames: []string{"Avg_MHz", "Busy%"},
wantFirst: nil,
wantLen: 0,
expectErr: true,
expectErr: false,
},
{
name: "Only time and interval, no turbostat data",
turbostatOutput: strings.Join(strings.Split(turbostatOutput, "\n")[0:2], "\n"), // Only header and no data
fieldNames: []string{"Avg_MHz", "Busy%"},
wantFirst: nil,
wantLen: 0,
expectErr: true,
expectErr: false,
},
}

Expand Down Expand Up @@ -547,7 +547,7 @@ X 0 0 1000 10
turbostatOutput: "",
fieldNames: []string{"Avg_MHz"},
want: nil,
wantErr: true,
wantErr: false,
},
{
name: "Only headers, no data",
Expand All @@ -558,7 +558,7 @@ Package Core CPU Avg_MHz Busy%
`,
fieldNames: []string{"Avg_MHz"},
want: nil,
wantErr: true,
wantErr: false,
},
}

Expand Down
2 changes: 1 addition & 1 deletion internal/script/script.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func RunScripts(myTarget target.Target, scripts []ScriptDefinition, ignoreScript
}
stdout, stderr, exitcode, err := myTarget.RunCommand(cmd, 0, false)
if err != nil {
slog.Error("error running script on target", slog.String("script", script.ScriptTemplate), slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode), slog.String("error", err.Error()))
slog.Warn("error running script on target", slog.String("name", script.Name), slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode), slog.String("error", err.Error()))
}
scriptOutputs[script.Name] = ScriptOutput{ScriptDefinition: script, Stdout: stdout, Stderr: stderr, Exitcode: exitcode}
if !ignoreScriptErrors {
Expand Down