diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index e724929fc40..38f2f743720 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -860,6 +860,63 @@ func (f *Fixture) ExecStatus(ctx context.Context, opts ...statusOpt) (AgentStatu } } +// ExecStatusRaw executes `elastic-agent status --output=json`. +// +// Returns the output parsed as map[string]any and the error from the execution. Keep in mind the agent exits with status 1 if it's +// unhealthy, but it still outputs the status successfully. This call does require that the Elastic Agent is running +// and communication over the control protocol is working. +// +// By default, retry logic is applied. Use WithNoRetry to disable this behavior. WithRetryTimeout and WithRetryInterval +// can be used to adjust the retry logic timing. The default retry timeout is one minute and the default retry +// interval is one second. +// +// An empty AgentStatusOutput and non nil error means the output could not be parsed. As long as we get some output, +// we don't return any error. It should work with any 8.6+ agent +func (f *Fixture) ExecStatusRaw(ctx context.Context, opts ...statusOpt) (map[string]any, error) { + var opt statusOpts + opt.retryTimeout = 1 * time.Minute + opt.retryInterval = 1 * time.Second + for _, o := range opts { + o(&opt) + } + + var cancel context.CancelFunc + if opt.noRetry || opt.retryTimeout == 0 { + ctx, cancel = context.WithCancel(ctx) + } else { + ctx, cancel = context.WithTimeout(ctx, opt.retryTimeout) + } + defer cancel() + + var lastErr error + for { + if ctx.Err() != nil { + if errors.Is(ctx.Err(), context.DeadlineExceeded) && lastErr != nil { + // return the last observed error + return nil, fmt.Errorf("agent status returned an error: %w", lastErr) + } + return nil, fmt.Errorf("agent status failed: %w", ctx.Err()) + } + out, err := f.Exec(ctx, []string{"status", "--output", "json"}, opt.cmdOptions...) + status := map[string]any{} + if uerr := json.Unmarshal(out, &status); uerr != nil { + // unmarshal error means that json was not outputted due to a communication error + lastErr = fmt.Errorf("could not unmarshal agent status output: %w:\n%s", errors.Join(uerr, err), out) + } else if len(status) == 0 { + // still not correct try again for a successful status + lastErr = fmt.Errorf("agent status output is empty: %w", err) + } else { + return status, nil + } + + if opt.noRetry { + return status, lastErr + } + + sleepFor(ctx, opt.retryInterval) + } +} + // ExecInspect executes to inspect subcommand on the prepared Elastic Agent binary. // It returns the parsed output and the error from the execution or an empty // AgentInspectOutput and the unmarshalling error if it cannot unmarshal the diff --git a/testing/integration/ess/proxy_url_test.go b/testing/integration/ess/proxy_url_test.go index 3af49bb4ec2..43006ee2855 100644 --- a/testing/integration/ess/proxy_url_test.go +++ b/testing/integration/ess/proxy_url_test.go @@ -824,8 +824,6 @@ func TestFleetDownloadProxyURL(t *testing.T) { Sudo: true, }) - t.Skip("Flaky test, see https://github.com/elastic/elastic-agent/issues/11796#issuecomment-3667342065") - ctx := t.Context() kibClient := info.KibanaClient fleetServerURL, err := fleettools.DefaultURL(ctx, kibClient) @@ -948,10 +946,12 @@ func TestFleetDownloadProxyURL(t *testing.T) { t.Log("Ensure upgrade has failed") require.EventuallyWithT(t, func(c *assert.CollectT) { - agent, err := kibClient.GetAgent(ctx, kibana.GetAgentRequest{ID: agentID}) + status, err := startFixture.ExecStatusRaw(ctx) require.NoError(c, err) - require.NotNil(c, agent.UpgradeDetails) - require.Equal(c, "UPG_FAILED", agent.UpgradeDetails.State) + require.NotNil(c, status["upgrade_details"], "Agent status does not contain upgrade_details.") + upgradeDetails, ok := status["upgrade_details"].(map[string]any) + require.True(t, ok, "expected upgrade_details to be an object, got type: %T", status["upgrade_details"]) + require.Equal(c, "UPG_FAILED", upgradeDetails["state"]) }, time.Minute*5, time.Second, "Unable to verify that upgrade has failed.") proxy := proxytest.New(t, @@ -997,9 +997,9 @@ func TestFleetDownloadProxyURL(t *testing.T) { t.Log("Ensure upgrade starts") require.EventuallyWithT(t, func(c *assert.CollectT) { - agent, err := kibClient.GetAgent(ctx, kibana.GetAgentRequest{ID: agentID}) + status, err := startFixture.ExecStatusRaw(ctx) require.NoError(c, err) - require.NotNil(c, agent.UpgradeDetails) + require.NotNil(c, status["upgrade_details"], "Agent status does not contain upgrade_details.") }, time.Minute*5, time.Second, "Unable to verify that upgrade details appear.") t.Log("Waiting for upgrade watcher to start...")