diff --git a/README.md b/README.md index b5532df..9439b3b 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,16 @@ * [x] Improve CLI output with colors (fatih/color) šŸŽØ * [x] Create a --output flag to save analyses in .md files * [] Create a repo-review command to analyze an entire repository + * [] Allow filtering by file type? (--extensions=.go,.js) + * [] Implement parallel processing for faster analysis? + * [] Integrate repository history analysis? (git blame) + * [x] Use all files to train the AI, for code review and also for giving a general repository summary and description + * [] Optimize large repositories? (Chunking source code before sending to AI) + * [] Implement --skip-tests flag? (Exclude test files from AI training) + * [] Parallelize file fetching? (Speed up repository scanning) +* [] Run tests with coverage reports? (go test -cover) +* [] Integrate GitHub Actions for CI/CD testing? +* [] Add benchmark tests for performance? (go test -bench) * [] Create a commit-review command to analyze individual commits * [] Create a branch-review command to review an entire branch * [] Add support for multiple programming languages @@ -12,7 +22,6 @@ * [] Implement support for third-party plugins * [] Create an improved help command with examples * [] Create a --verbose mode to display detailed logs -* [] Create a web version of the tool with Next.js * [] Add support to run the CLI inside Docker * [] Create an offline mode that works without OpenAI * [] Improve error handling and exception support diff --git a/cmd/pr_review_test.go b/cmd/pr_review_test.go index ad9d233..1df9108 100644 --- a/cmd/pr_review_test.go +++ b/cmd/pr_review_test.go @@ -2,8 +2,7 @@ package cmd import ( "github.com/google/go-github/v49/github" - "github.com/gsoares85/code-guardian/internal/github_internal" - "github.com/gsoares85/code-guardian/internal/openai" + "github.com/gsoares85/code-guardian/mocks" "github.com/stretchr/testify/assert" "os" "path/filepath" @@ -15,10 +14,10 @@ func TestGenerateMarkdownReport(t *testing.T) { prTitle := "Fix memory leak" prOwner := "testUser" prNumber := 23 - pr, err := github_internal.MockGetPullRequest(prOwner, prTitle, prNumber) - files, err := github_internal.MockGetPullRequestFiles(prOwner, prTitle, prNumber) - prDiff, err := github_internal.MockGetPullRequestDiff(prOwner, prTitle, prNumber) - aiFeedback, err := openai.MockAnalyzePRWithAI(prDiff) + pr, err := mocks.MockGetPullRequest(prOwner, prTitle, prNumber) + files, err := mocks.MockGetPullRequestFiles(prOwner, prTitle, prNumber) + prDiff, err := mocks.MockGetPullRequestDiff(prOwner, prTitle, prNumber) + aiFeedback, err := mocks.MockAnalyzePRWithAI(prDiff) report := generateMarkdownReport(pr, files, prDiff, aiFeedback) @@ -49,16 +48,16 @@ func TestSaveAnalysisToFile(t *testing.T) { func TestAnalyzePullRequest(t *testing.T) { // Use local variables in the test function to "mock" external functionality. mockGetPullRequest := func(owner, title string, number int) (*github.PullRequest, error) { - return github_internal.MockGetPullRequest(owner, title, number) + return mocks.MockGetPullRequest(owner, title, number) } mockGetPullRequestFiles := func(owner, title string, number int) ([]string, error) { - return github_internal.MockGetPullRequestFiles(owner, title, number) + return mocks.MockGetPullRequestFiles(owner, title, number) } mockGetPullRequestDiff := func(owner, title string, number int) (string, error) { - return github_internal.MockGetPullRequestDiff(owner, title, number) + return mocks.MockGetPullRequestDiff(owner, title, number) } mockAnalyzePRWithAI := func(prDiff string) (string, error) { - return openai.MockAnalyzePRWithAI(prDiff) + return mocks.MockAnalyzePRWithAI(prDiff) } // Use the mocks for testing by calling them explicitly. diff --git a/cmd/repo_review.go b/cmd/repo_review.go new file mode 100644 index 0000000..8ae30d7 --- /dev/null +++ b/cmd/repo_review.go @@ -0,0 +1,154 @@ +package cmd + +import ( + "fmt" + "github.com/fatih/color" + "github.com/gsoares85/code-guardian/internal/github_internal" + "github.com/gsoares85/code-guardian/internal/openai" + "github.com/spf13/cobra" + "path/filepath" + "strings" + "time" +) + +var repoReviewCmd = &cobra.Command{ + Use: "repo-review [owner] [repo] [flags]", + Short: "Analyse an entire github repository", + Long: `This command scans all source files recursively in a repository + to train the AI about the application and provide: + - A summary of what the application does + - Key use cases + - A high-level code quality review (only critical issues) + - A high-level security review (only critical issues) + - Key improvement areas`, + Args: cobra.ExactArgs(2), + Run: func(cmd *cobra.Command, args []string) { + owner, repo := args[0], args[1] + saveOutput, err := cmd.Flags().GetBool("output") + if err != nil { + color.Red("Error: %v", err) + return + } + + color.Blue("\nšŸ” Fetching all source code files recursively...\n") + files, err := github_internal.GetRepositoryFilesRecursive(owner, repo) + if err != nil { + color.Red("āŒ ERROR: Fetching repository files: %s\n", err) + return + } + + color.Green("šŸ“‚ Repository contains %d files\n", len(files)) + + sourceCode := fetchAllSourceCode(owner, repo, files) + if len(sourceCode) == 0 { + color.Red("āŒ ERROR: No valid source code found for analysis") + return + } + + color.Blue("\nšŸ¤– Training AI with full source code...\n") + summary, useCases, codeReview, securityReview, improvements := analyzeRepositoryWithAI(sourceCode) + + displayRepoAnalysis(repo, summary, useCases, codeReview, securityReview, improvements) + + if saveOutput { + saveRepoAnalysis(repo, owner, summary, useCases, codeReview, securityReview, improvements) + } + }, +} + +func fetchAllSourceCode(owner, repo string, files []string) string { + var allCode strings.Builder + + for _, file := range files { + if strings.HasSuffix(file, ".md") || strings.Contains(file, "LICENSE") { + continue + } + + color.Cyan("\nšŸ“„ Reading file: %s", file) + + content, err := github_internal.GetFileContent(owner, repo, file) + if err != nil { + color.Red("āŒ ERROR fetching file content: %s\n", err) + continue + } + + allCode.WriteString(fmt.Sprintf("\n// File: %s\n%s\n", file, content)) + } + + return allCode.String() +} + +func analyzeRepositoryWithAI(sourceCode string) (string, string, string, string, string) { + summaryPrompt := "Analyze this entire source codebase and provide a concise summary of what the application does." + useCasesPrompt := "Extract the most important use cases from the source code." + codeQualityPrompt := "Identify the most critical code quality issues found in the source code. Provide a brief list." + securityPrompt := "Identify the most critical security vulnerabilities in the source code. Provide a brief list." + improvementPrompt := "Suggest the most important areas to improve in the application." + + summary, _ := openai.AnalyzeCodeWithAI(sourceCode, summaryPrompt) + useCases, _ := openai.AnalyzeCodeWithAI(sourceCode, useCasesPrompt) + codeReview, _ := openai.AnalyzeCodeWithAI(sourceCode, codeQualityPrompt) + securityReview, _ := openai.AnalyzeCodeWithAI(sourceCode, securityPrompt) + improvements, _ := openai.AnalyzeCodeWithAI(sourceCode, improvementPrompt) + + return summary, useCases, codeReview, securityReview, improvements +} + +func displayRepoAnalysis(repo, summary, useCases, codeReview, securityReview, improvements string) { + color.Magenta("\nšŸ“Œ Repository Analysis Summary for %s\n", repo) + color.Cyan("\nšŸ“– Application Summary:\n") + fmt.Println(summary) + + color.Green("\nāœ… Key Use Cases:\n") + fmt.Println(useCases) + + color.Red("\n🚨 Code Quality Issues (Critical Only):\n") + fmt.Println(codeReview) + + color.Yellow("\nšŸ”’ Security Issues (Critical Only):\n") + fmt.Println(securityReview) + + color.Blue("\nšŸ“ˆ Key Areas for Improvement:\n") + fmt.Println(improvements) +} + +func saveRepoAnalysis(repo, owner, summary, useCases, codeReview, securityReview, improvements string) { + timestamp := time.Now().Format("20060102-150405") + outputFile := fmt.Sprintf("%s-%s_%s.md", timestamp, repo, owner) + outputPath := filepath.Join("reports", "repo", outputFile) + + content := generateRepoMarkdown(repo, summary, useCases, codeReview, securityReview, improvements) + + if err := saveAnalysisToFile(outputPath, content); err != nil { + color.Red("āŒ ERROR saving analysis: %s\n", err) + return + } + color.Green("āœ… Repository analysis saved to file: %s\n", outputPath) +} + +func generateRepoMarkdown(repo, summary, useCases, codeReview, securityReview, improvements string) string { + return fmt.Sprintf(`# Repository Analysis Report + +## šŸ“‚ Repository: %s + +## šŸ“– Application Summary: +%s + +## āœ… Key Use Cases: +%s + +## 🚨 Code Quality Issues (Critical Only): +%s + +## šŸ”’ Security Issues (Critical Only): +%s + +## šŸ“ˆ Key Areas for Improvement: +%s +`, repo, summary, useCases, codeReview, securityReview, improvements) +} + +func init() { + rootCmd.AddCommand(repoReviewCmd) + repoReviewCmd.Flags().BoolP("output", "o", false, "Save analysis to a Markdown file in ./reports/repo/") +} diff --git a/internal/github_internal/github.go b/internal/github_internal/github.go index 16dda86..03c281a 100644 --- a/internal/github_internal/github.go +++ b/internal/github_internal/github.go @@ -57,3 +57,52 @@ func GetPullRequestFiles(owner string, repo string, prNumber int) ([]string, err } return fileNames, nil } + +func GetRepositoryFilesRecursive(owner, repo string) ([]string, error) { + client := NewGithubClient() + var files []string + + err := fetchFilesRecursive(client, owner, repo, "", &files) + if err != nil { + return nil, fmt.Errorf("error fetching repository files: %w", err) + } + + return files, nil +} + +func fetchFilesRecursive(client *github.Client, owner, repo, path string, files *[]string) error { + contents, dirContents, _, err := client.Repositories.GetContents(context.Background(), owner, repo, path, nil) + if err != nil { + return err + } + + if contents != nil { + *files = append(*files, contents.GetPath()) + return nil + } + + for _, item := range dirContents { + if item.GetType() == "file" { + *files = append(*files, item.GetPath()) + } else if item.GetType() == "dir" { + fetchFilesRecursive(client, owner, repo, item.GetPath(), files) + } + } + + return nil +} + +func GetFileContent(owner, repo, path string) (string, error) { + client := NewGithubClient() + fileContent, _, _, err := client.Repositories.GetContents(context.Background(), owner, repo, path, nil) + if err != nil { + return "", fmt.Errorf("error fetching file content: %w", err) + } + + content, err := fileContent.GetContent() + if err != nil { + return "", fmt.Errorf("error decoding file content: %w", err) + } + + return content, nil +} diff --git a/internal/openai/openai.go b/internal/openai/openai.go index 44c99fd..8657799 100644 --- a/internal/openai/openai.go +++ b/internal/openai/openai.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/gsoares85/code-guardian/config" "github.com/sashabaranov/go-openai" + "strings" ) func AnalyzePRWithAI(diff string) (string, error) { @@ -43,3 +44,55 @@ func AnalyzePRWithAI(diff string) (string, error) { return resp.Choices[0].Message.Content, nil } + +func AnalyzeCodeWithAI(code string, prompt string) (string, error) { + apiKey := config.GetEnv("OPENAI_API_KEY") + if apiKey == "" { + return "", fmt.Errorf("missing OpenAI API key") + } + + client := openai.NewClient(apiKey) + + codeChunks := SplitLargeCode(code, 3000) + + var fullResponse strings.Builder + + for _, chunk := range codeChunks { + requestPrompt := fmt.Sprintf("%s\n\n%s", prompt, chunk) + + resp, err := client.CreateChatCompletion(context.Background(), openai.ChatCompletionRequest{ + Model: openai.GPT4, + Messages: []openai.ChatCompletionMessage{ + {Role: "system", Content: "You are a senior software engineer reviewing code."}, + {Role: "user", Content: requestPrompt}, + }, + }) + + if err != nil { + return "", err + } + + if len(resp.Choices) == 0 { + return "", fmt.Errorf("empty response from OpenAI") + } + + fullResponse.WriteString(resp.Choices[0].Message.Content + "\n\n") + } + + return fullResponse.String(), nil +} + +func SplitLargeCode(code string, maxTokens int) []string { + words := strings.Fields(code) + var chunks []string + + for i := 0; i < len(words); i += maxTokens { + end := i + maxTokens + if end > len(words) { + end = len(words) + } + chunks = append(chunks, strings.Join(words[i:end], " ")) + } + + return chunks +} diff --git a/internal/github_internal/github_mock.go b/mocks/github_mock.go similarity index 62% rename from internal/github_internal/github_mock.go rename to mocks/github_mock.go index 9219ee3..b304713 100644 --- a/internal/github_internal/github_mock.go +++ b/mocks/github_mock.go @@ -1,8 +1,7 @@ -package github_internal +package mocks import "github.com/google/go-github/v49/github" -// Mock function to simulate fetching a PR func MockGetPullRequest(owner string, repo string, prNumber int) (*github.PullRequest, error) { return &github.PullRequest{ Number: github.Int(prNumber), @@ -12,12 +11,10 @@ func MockGetPullRequest(owner string, repo string, prNumber int) (*github.PullRe }, nil } -// Mock function to simulate fetching changed files func MockGetPullRequestFiles(_ string, _ string, _ int) ([]string, error) { return []string{"src/main.c", "include/utils.h"}, nil } -// Mock function to simulate fetching PR diff func MockGetPullRequestDiff(_ string, _ string, _ int) (string, error) { return `@@ -23,6 +23,7 @@ void fixMemory() { @@ -26,3 +23,26 @@ func MockGetPullRequestDiff(_ string, _ string, _ int) (string, error) { + free(ptr); }`, nil } + +func MockGetRepositoryFilesRecursive(owner, repo string) ([]string, error) { + return []string{ + "src/main.go", + "src/utils/helper.go", + "config/config.yaml", + "README.md", + }, nil +} + +func MockGetFileContent(owner, repo, path string) (string, error) { + if path == "src/main.go" { + return `package main + +import "fmt" + +func main() { + fmt.Println("Hello, world!") +}`, nil + } + + return "", nil +} diff --git a/internal/openai/openai_mock.go b/mocks/openai_mock.go similarity index 54% rename from internal/openai/openai_mock.go rename to mocks/openai_mock.go index 26e6e23..d62bca1 100644 --- a/internal/openai/openai_mock.go +++ b/mocks/openai_mock.go @@ -1,6 +1,9 @@ -package openai +package mocks -// Mock function to simulate AI-generated feedback func MockAnalyzePRWithAI(diff string) (string, error) { return "- āœ… Correctly added memory release (`free(ptr);`).\n- āš ļø Consider checking if `malloc` returned a valid pointer before using it.", nil } + +func MockAnalyzeCodeWithAI(code string, prompt string) (string, error) { + return "This application is a simple web server that handles HTTP requests.", nil +} diff --git a/tests/github_internal_test.go b/tests/github_internal_test.go new file mode 100644 index 0000000..c72fc58 --- /dev/null +++ b/tests/github_internal_test.go @@ -0,0 +1,22 @@ +package tests + +import ( + "github.com/gsoares85/code-guardian/mocks" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestGetRepositoryFilesRecursive(t *testing.T) { + files, err := mocks.MockGetRepositoryFilesRecursive("example", "repo") + + assert.Nil(t, err) + assert.Greater(t, len(files), 0, "Expected to fetch files") + assert.Contains(t, files, "src/main.go") +} + +func TestGetFileContent(t *testing.T) { + content, err := mocks.MockGetFileContent("example", "repo", "src/main.go") + + assert.Nil(t, err) + assert.Contains(t, content, "package main", "Expected Go package definition") +} diff --git a/tests/openai_test.go b/tests/openai_test.go new file mode 100644 index 0000000..a60d50e --- /dev/null +++ b/tests/openai_test.go @@ -0,0 +1,22 @@ +package tests + +import ( + "github.com/gsoares85/code-guardian/internal/openai" + "github.com/gsoares85/code-guardian/mocks" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestAnalyzeCodeWithAI(t *testing.T) { + response, err := mocks.MockAnalyzeCodeWithAI("package main\nfunc main() {}", "Analyze this code") + + assert.Nil(t, err) + assert.NotEmpty(t, response, "Expected AI analysis response") +} + +func TestSplitLargeCode(t *testing.T) { + longCode := "word " + "word " + "word " + "word " // Simulating large input + chunks := openai.SplitLargeCode(longCode, 2) + + assert.Greater(t, len(chunks), 1, "Expected multiple chunks") +}