diff --git a/internal/workflow/load.go b/internal/workflow/load.go index a36abfc..5132d24 100644 --- a/internal/workflow/load.go +++ b/internal/workflow/load.go @@ -8,77 +8,21 @@ import ( ) // LoadWorkflowFile loads and parses a GitHub Actions workflow file from the given path. +// The parsing is optimized to extract both the workflow structure and job order +// in a single pass through the YAML node tree. func LoadWorkflowFile(path string) (*WorkflowFile, error) { data, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("failed to read workflow file: %w", err) } - // Parse YAML into yaml.Node once (instead of parsing twice) - var root yaml.Node - if err := yaml.Unmarshal(data, &root); err != nil { - return nil, fmt.Errorf("failed to parse workflow YAML: %w", err) - } - - // Extract both WorkflowFile and JobOrder from the single parsed Node - wf, jobOrder, err := parseWorkflowFromNode(&root) - if err != nil { - return nil, fmt.Errorf("failed to parse workflow: %w", err) - } - wf.JobOrder = jobOrder - - return wf, nil -} - -// parseWorkflowFromNode extracts WorkflowFile and job order from a parsed yaml.Node. -// This avoids parsing the YAML twice by reusing the same Node for both operations. -func parseWorkflowFromNode(root *yaml.Node) (*WorkflowFile, []string, error) { + // Parse YAML and decode in a single operation. + // WorkflowFile.UnmarshalYAML handles both struct decoding and JobOrder extraction + // in a single node traversal, avoiding the overhead of double traversal. var wf WorkflowFile - // Decode the Node directly into the struct (no re-parsing needed) - if err := root.Decode(&wf); err != nil { - return nil, nil, fmt.Errorf("failed to decode workflow: %w", err) - } - - // Extract job order from the same Node - jobOrder := extractJobOrderFromNode(root) - - return &wf, jobOrder, nil -} - -// extractJobOrderFromNode extracts job IDs in definition order from a parsed yaml.Node. -func extractJobOrderFromNode(root *yaml.Node) []string { - if root == nil || len(root.Content) == 0 { - return nil - } - - // root.Content[0] is the document node - doc := root.Content[0] - if doc == nil || doc.Kind != yaml.MappingNode { - return nil - } - - // Find the "jobs" key in the mapping - for i := 0; i < len(doc.Content)-1; i += 2 { - keyNode := doc.Content[i] - valueNode := doc.Content[i+1] - - if keyNode == nil || valueNode == nil { - continue - } - - if keyNode.Value == "jobs" && valueNode.Kind == yaml.MappingNode { - // Extract job IDs in order - var jobOrder []string - for j := 0; j < len(valueNode.Content)-1; j += 2 { - jobKeyNode := valueNode.Content[j] - if jobKeyNode == nil { - continue - } - jobOrder = append(jobOrder, jobKeyNode.Value) - } - return jobOrder - } + if err := yaml.Unmarshal(data, &wf); err != nil { + return nil, fmt.Errorf("failed to parse workflow YAML: %w", err) } - return nil + return &wf, nil } diff --git a/internal/workflow/load_test.go b/internal/workflow/load_test.go index 63361ed..3b2b8f3 100644 --- a/internal/workflow/load_test.go +++ b/internal/workflow/load_test.go @@ -775,14 +775,13 @@ func TestLoadWorkflowFile_YAMLUnmarshalError(t *testing.T) { } } -// TestExtractJobOrderFromNode_SequenceRoot tests extractJobOrderFromNode with sequence at root -// This specifically covers the doc.Kind != yaml.MappingNode branch (line 56-58) -func TestExtractJobOrderFromNode_SequenceRoot(t *testing.T) { +// TestUnmarshalYAML_SequenceRoot tests UnmarshalYAML with sequence at root +// This specifically covers the node.Kind != yaml.MappingNode branch +func TestUnmarshalYAML_SequenceRoot(t *testing.T) { t.Parallel() tmpDir := t.TempDir() // Create YAML with sequence at root - this will parse but decode will fail - // However, we need to test the extractJobOrderFromNode function path yamlContent := `--- - item1 - item2` @@ -792,12 +791,9 @@ func TestExtractJobOrderFromNode_SequenceRoot(t *testing.T) { } _, err := LoadWorkflowFile(workflowPath) - // This should error or return empty JobOrder - if err != nil { - // Error is expected since root is not a mapping - if !strings.Contains(err.Error(), "decode workflow") { - t.Logf("Got error: %v", err) - } + // This should error since root is not a mapping + if err == nil { + t.Error("LoadWorkflowFile() expected error for sequence root") } } @@ -955,147 +951,101 @@ func TestDiscoverWorkflows_SymlinkToDirectory(t *testing.T) { }) } -// TestExtractJobOrderFromNode_DirectCall tests extractJobOrderFromNode directly -// to cover the doc.Kind != yaml.MappingNode branch -func TestExtractJobOrderFromNode_DirectCall(t *testing.T) { +// TestUnmarshalYAML_DirectCall tests WorkflowFile.UnmarshalYAML with various node types +func TestUnmarshalYAML_DirectCall(t *testing.T) { t.Parallel() // Test with sequence node (not mapping) - t.Run("sequence node returns nil", func(t *testing.T) { - var root yaml.Node + t.Run("sequence node returns error", func(t *testing.T) { yamlContent := `- item1 - item2` - if err := yaml.Unmarshal([]byte(yamlContent), &root); err != nil { - t.Fatalf("failed to unmarshal YAML: %v", err) - } - - result := extractJobOrderFromNode(&root) - if result != nil { - t.Errorf("extractJobOrderFromNode() = %v, want nil for sequence node", result) + var wf WorkflowFile + err := yaml.Unmarshal([]byte(yamlContent), &wf) + if err == nil { + t.Error("UnmarshalYAML should return error for sequence node") } }) // Test with scalar node - t.Run("scalar node returns nil", func(t *testing.T) { - var root yaml.Node + t.Run("scalar node returns error", func(t *testing.T) { yamlContent := `just a string` - if err := yaml.Unmarshal([]byte(yamlContent), &root); err != nil { - t.Fatalf("failed to unmarshal YAML: %v", err) - } - - result := extractJobOrderFromNode(&root) - if result != nil { - t.Errorf("extractJobOrderFromNode() = %v, want nil for scalar node", result) + var wf WorkflowFile + err := yaml.Unmarshal([]byte(yamlContent), &wf) + if err == nil { + t.Error("UnmarshalYAML should return error for scalar node") } }) - // Test with empty node - t.Run("empty node returns nil", func(t *testing.T) { - root := &yaml.Node{} - result := extractJobOrderFromNode(root) - if result != nil { - t.Errorf("extractJobOrderFromNode() = %v, want nil for empty node", result) + // Test with empty document + t.Run("empty document returns empty workflow", func(t *testing.T) { + yamlContent := `` + var wf WorkflowFile + err := yaml.Unmarshal([]byte(yamlContent), &wf) + if err != nil { + t.Errorf("UnmarshalYAML error = %v for empty document", err) } - }) - - // Test with nil key node in doc.Content (line 65) - t.Run("nil key node in content is skipped", func(t *testing.T) { - root := &yaml.Node{ - Kind: yaml.DocumentNode, - Content: []*yaml.Node{ - { - Kind: yaml.MappingNode, - Content: []*yaml.Node{ - nil, // nil key node - {Kind: yaml.ScalarNode, Value: "value"}, - {Kind: yaml.ScalarNode, Value: "jobs"}, - { - Kind: yaml.MappingNode, - Content: []*yaml.Node{ - {Kind: yaml.ScalarNode, Value: "job1"}, - {Kind: yaml.MappingNode}, - }, - }, - }, - }, - }, - } - result := extractJobOrderFromNode(root) - // Should skip the nil key and find "jobs" - if len(result) != 1 || result[0] != "job1" { - t.Errorf("extractJobOrderFromNode() = %v, want [job1]", result) + if wf.Name != "" || len(wf.Jobs) != 0 || len(wf.JobOrder) != 0 { + t.Errorf("UnmarshalYAML should return empty workflow for empty document") } }) - // Test with nil value node in doc.Content (line 65) - t.Run("nil value node in content is skipped", func(t *testing.T) { - root := &yaml.Node{ - Kind: yaml.DocumentNode, - Content: []*yaml.Node{ - { - Kind: yaml.MappingNode, - Content: []*yaml.Node{ - {Kind: yaml.ScalarNode, Value: "name"}, - nil, // nil value node - {Kind: yaml.ScalarNode, Value: "jobs"}, - { - Kind: yaml.MappingNode, - Content: []*yaml.Node{ - {Kind: yaml.ScalarNode, Value: "job1"}, - {Kind: yaml.MappingNode}, - }, - }, - }, - }, - }, - } - result := extractJobOrderFromNode(root) - // Should skip the nil value and find "jobs" - if len(result) != 1 || result[0] != "job1" { - t.Errorf("extractJobOrderFromNode() = %v, want [job1]", result) + // Test valid workflow with jobs + t.Run("valid workflow extracts job order correctly", func(t *testing.T) { + yamlContent := `name: Test +jobs: + first: + runs-on: ubuntu-latest + steps: + - run: echo first + second: + runs-on: ubuntu-latest + steps: + - run: echo second` + var wf WorkflowFile + err := yaml.Unmarshal([]byte(yamlContent), &wf) + if err != nil { + t.Fatalf("UnmarshalYAML error = %v", err) + } + if wf.Name != "Test" { + t.Errorf("Name = %q, want %q", wf.Name, "Test") + } + if len(wf.JobOrder) != 2 || wf.JobOrder[0] != "first" || wf.JobOrder[1] != "second" { + t.Errorf("JobOrder = %v, want [first, second]", wf.JobOrder) } }) - // Test with nil job key node in jobs.Content (line 74) - t.Run("nil job key node is skipped", func(t *testing.T) { - root := &yaml.Node{ - Kind: yaml.DocumentNode, - Content: []*yaml.Node{ - { - Kind: yaml.MappingNode, - Content: []*yaml.Node{ - {Kind: yaml.ScalarNode, Value: "jobs"}, - { - Kind: yaml.MappingNode, - Content: []*yaml.Node{ - nil, // nil job key node - {Kind: yaml.MappingNode}, - {Kind: yaml.ScalarNode, Value: "job2"}, - {Kind: yaml.MappingNode}, - }, - }, - }, - }, - }, - } - result := extractJobOrderFromNode(root) - // Should skip the nil job key and find "job2" - if len(result) != 1 || result[0] != "job2" { - t.Errorf("extractJobOrderFromNode() = %v, want [job2]", result) + // Test workflow with env + t.Run("workflow with env extracts correctly", func(t *testing.T) { + yamlContent := `name: Test +env: + KEY1: value1 + KEY2: value2` + var wf WorkflowFile + err := yaml.Unmarshal([]byte(yamlContent), &wf) + if err != nil { + t.Fatalf("UnmarshalYAML error = %v", err) + } + if wf.Env["KEY1"] != "value1" || wf.Env["KEY2"] != "value2" { + t.Errorf("Env = %v, want {KEY1:value1, KEY2:value2}", wf.Env) } }) - // Test with nil doc node (line 56) - t.Run("nil doc node returns nil", func(t *testing.T) { - root := &yaml.Node{ - Kind: yaml.DocumentNode, - Content: []*yaml.Node{ - nil, // nil doc node - }, - } - result := extractJobOrderFromNode(root) - if result != nil { - t.Errorf("extractJobOrderFromNode() = %v, want nil for nil doc node", result) + // Test document node wrapper handling + t.Run("document node wrapper is handled", func(t *testing.T) { + yamlContent := `--- +name: DocTest +jobs: + test: + runs-on: ubuntu-latest + steps: + - run: echo test` + var wf WorkflowFile + err := yaml.Unmarshal([]byte(yamlContent), &wf) + if err != nil { + t.Fatalf("UnmarshalYAML error = %v", err) + } + if wf.Name != "DocTest" { + t.Errorf("Name = %q, want %q", wf.Name, "DocTest") } }) } diff --git a/internal/workflow/model.go b/internal/workflow/model.go index 1cf7e99..53ee1e1 100644 --- a/internal/workflow/model.go +++ b/internal/workflow/model.go @@ -1,6 +1,10 @@ package workflow -import "gopkg.in/yaml.v3" +import ( + "fmt" + + "gopkg.in/yaml.v3" +) // StringOrSlice handles both single string and string array YAML values. // This is used for fields like "needs" which can be either: @@ -17,6 +21,18 @@ func (s *StringOrSlice) UnmarshalYAML(value *yaml.Node) error { } // Array case: needs: [build, test] + if value.Kind == yaml.SequenceNode { + result := make([]string, 0, len(value.Content)) + for _, item := range value.Content { + if item != nil && item.Kind == yaml.ScalarNode { + result = append(result, item.Value) + } + } + *s = result + return nil + } + + // Fallback for unexpected types var slice []string if err := value.Decode(&slice); err != nil { return err @@ -37,6 +53,86 @@ type WorkflowFile struct { JobOrder []string `yaml:"-"` } +// UnmarshalYAML implements custom YAML unmarshaling for WorkflowFile. +// This extracts both the struct fields and JobOrder in a single node traversal, +// avoiding the overhead of traversing the YAML node tree twice. +func (w *WorkflowFile) UnmarshalYAML(node *yaml.Node) error { + // Handle document node wrapper + if node.Kind == yaml.DocumentNode { + if len(node.Content) == 0 { + return nil + } + node = node.Content[0] + } + + if node.Kind != yaml.MappingNode { + // Use default decoder for non-mapping nodes (will likely error) + type rawWorkflowFile WorkflowFile + return node.Decode((*rawWorkflowFile)(w)) + } + + // Single-pass extraction: decode fields and extract job order simultaneously + for i := 0; i < len(node.Content)-1; i += 2 { + keyNode := node.Content[i] + valueNode := node.Content[i+1] + + if keyNode == nil || valueNode == nil { + continue + } + + switch keyNode.Value { + case "name": + w.Name = valueNode.Value + case "env": + if valueNode.Kind == yaml.MappingNode { + w.Env = make(map[string]string, len(valueNode.Content)/2) + for j := 0; j < len(valueNode.Content)-1; j += 2 { + envKey := valueNode.Content[j] + envVal := valueNode.Content[j+1] + if envKey != nil && envVal != nil { + w.Env[envKey.Value] = envVal.Value + } + } + } + case "jobs": + // Jobs must be a mapping node (not sequence or scalar) + if valueNode.Kind == yaml.SequenceNode { + // Return error for invalid jobs format (sequence instead of mapping) + return fmt.Errorf("cannot unmarshal !!seq into map[string]workflow.Job") + } + if valueNode.Kind == yaml.MappingNode { + // Pre-allocate maps with exact capacity + jobCount := len(valueNode.Content) / 2 + w.Jobs = make(map[string]Job, jobCount) + w.JobOrder = make([]string, 0, jobCount) + + // Extract jobs and job order in single pass + for j := 0; j < len(valueNode.Content)-1; j += 2 { + jobKeyNode := valueNode.Content[j] + jobValueNode := valueNode.Content[j+1] + + if jobKeyNode == nil { + continue + } + + jobID := jobKeyNode.Value + w.JobOrder = append(w.JobOrder, jobID) + + if jobValueNode != nil { + var job Job + if err := job.UnmarshalYAML(jobValueNode); err != nil { + return err + } + w.Jobs[jobID] = job + } + } + } + } + } + + return nil +} + // Job represents a job in a workflow. type Job struct { // Name is the display name of the job. @@ -52,6 +148,73 @@ type Job struct { Steps []Step `yaml:"steps"` } +// UnmarshalYAML implements custom YAML unmarshaling for Job. +// This avoids reflection overhead by directly parsing YAML nodes. +func (j *Job) UnmarshalYAML(node *yaml.Node) error { + if node.Kind != yaml.MappingNode { + type rawJob Job + return node.Decode((*rawJob)(j)) + } + + for i := 0; i < len(node.Content)-1; i += 2 { + keyNode := node.Content[i] + valueNode := node.Content[i+1] + + if keyNode == nil || valueNode == nil { + continue + } + + switch keyNode.Value { + case "name": + j.Name = valueNode.Value + case "runs-on": + j.RunsOn = valueNode.Value + case "needs": + // Handle needs directly to avoid Decode overhead + if valueNode.Kind == yaml.ScalarNode { + j.Needs = []string{valueNode.Value} + } else if valueNode.Kind == yaml.SequenceNode { + j.Needs = make([]string, 0, len(valueNode.Content)) + for _, item := range valueNode.Content { + if item != nil && item.Kind == yaml.ScalarNode { + j.Needs = append(j.Needs, item.Value) + } + } + } else if valueNode.Kind == yaml.MappingNode { + // needs must be a string or array, not a mapping + return fmt.Errorf("cannot unmarshal !!map into []string") + } + case "env": + if valueNode.Kind == yaml.MappingNode { + j.Env = make(map[string]string, len(valueNode.Content)/2) + for k := 0; k < len(valueNode.Content)-1; k += 2 { + envKey := valueNode.Content[k] + envVal := valueNode.Content[k+1] + if envKey != nil && envVal != nil { + j.Env[envKey.Value] = envVal.Value + } + } + } + case "steps": + if valueNode.Kind == yaml.SequenceNode { + j.Steps = make([]Step, 0, len(valueNode.Content)) + for _, stepNode := range valueNode.Content { + if stepNode == nil { + continue + } + var step Step + if err := step.UnmarshalYAML(stepNode); err != nil { + return err + } + j.Steps = append(j.Steps, step) + } + } + } + } + + return nil +} + // Step represents a step in a job. type Step struct { // ID is an optional unique identifier for the step. @@ -76,6 +239,63 @@ type Step struct { WorkingDirectory string `yaml:"working-directory"` } +// UnmarshalYAML implements custom YAML unmarshaling for Step. +// This avoids reflection overhead by directly parsing YAML nodes. +func (s *Step) UnmarshalYAML(node *yaml.Node) error { + if node.Kind != yaml.MappingNode { + type rawStep Step + return node.Decode((*rawStep)(s)) + } + + for i := 0; i < len(node.Content)-1; i += 2 { + keyNode := node.Content[i] + valueNode := node.Content[i+1] + + if keyNode == nil || valueNode == nil { + continue + } + + switch keyNode.Value { + case "id": + s.ID = valueNode.Value + case "name": + s.Name = valueNode.Value + case "if": + s.If = valueNode.Value + case "run": + s.Run = valueNode.Value + case "uses": + s.Uses = valueNode.Value + case "with": + if valueNode.Kind == yaml.MappingNode { + s.With = make(map[string]string, len(valueNode.Content)/2) + for k := 0; k < len(valueNode.Content)-1; k += 2 { + withKey := valueNode.Content[k] + withVal := valueNode.Content[k+1] + if withKey != nil && withVal != nil { + s.With[withKey.Value] = withVal.Value + } + } + } + case "env": + if valueNode.Kind == yaml.MappingNode { + s.Env = make(map[string]string, len(valueNode.Content)/2) + for k := 0; k < len(valueNode.Content)-1; k += 2 { + envKey := valueNode.Content[k] + envVal := valueNode.Content[k+1] + if envKey != nil && envVal != nil { + s.Env[envKey.Value] = envVal.Value + } + } + } + case "working-directory": + s.WorkingDirectory = valueNode.Value + } + } + + return nil +} + // IsAction returns true if this step uses a GitHub Action (has uses: field). func (s *Step) IsAction() bool { return s.Uses != ""