From 98962e4e4b5be0f8b42b5b1bc1a2d8dc01e9c32c Mon Sep 17 00:00:00 2001 From: luciferrr665 Date: Thu, 21 Nov 2024 23:59:03 -0500 Subject: [PATCH] Additon of python parser --- package-lock.json | 80 +++++++++++++++++ package.json | 2 + src/constants.ts | 16 +++- src/context/language/python-parser.ts | 118 +++++++++++++++++++++++++- tsconfig.json | 18 ++-- 5 files changed, 220 insertions(+), 14 deletions(-) diff --git a/package-lock.json b/package-lock.json index f60579a..2d942c5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,6 +20,8 @@ "gpt-tokenizer": "^2.1.2", "groq-sdk": "^0.8.0", "octokit": "^3.1.1", + "tree-sitter": "^0.22.1", + "tree-sitter-python": "^0.23.4", "xml2js": "^0.6.2" }, "devDependencies": { @@ -1816,6 +1818,15 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, + "node_modules/node-addon-api": { + "version": "8.2.2", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.2.2.tgz", + "integrity": "sha512-9emqXAKhVoNrQ792nLI/wpzPpJ/bj/YXxW0CvAau1+RdGBcCRF1Dmz7719zgVsQNrzHl9Tzn3ImZ4qWFarWL0A==", + "license": "MIT", + "engines": { + "node": "^18 || ^20 || >= 21" + } + }, "node_modules/node-domexception": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", @@ -1853,6 +1864,17 @@ } } }, + "node_modules/node-gyp-build": { + "version": "4.8.4", + "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz", + "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==", + "license": "MIT", + "bin": { + "node-gyp-build": "bin.js", + "node-gyp-build-optional": "optional.js", + "node-gyp-build-test": "build-test.js" + } + }, "node_modules/object-inspect": { "version": "1.12.3", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.3.tgz", @@ -2113,6 +2135,36 @@ "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" }, + "node_modules/tree-sitter": { + "version": "0.22.1", + "resolved": "https://registry.npmjs.org/tree-sitter/-/tree-sitter-0.22.1.tgz", + "integrity": "sha512-gRO+jk2ljxZlIn20QRskIvpLCMtzuLl5T0BY6L9uvPYD17uUrxlxWkvYCiVqED2q2q7CVtY52Uex4WcYo2FEXw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.1", + "node-gyp-build": "^4.8.2" + } + }, + "node_modules/tree-sitter-python": { + "version": "0.23.4", + "resolved": "https://registry.npmjs.org/tree-sitter-python/-/tree-sitter-python-0.23.4.tgz", + "integrity": "sha512-MbmUAl7y5UCUWqHscHke7DdRDwQnVNMNKQYQc4Gq2p09j+fgPxaU8JVsuOI/0HD3BSEEe5k9j3xmdtIWbDtDgw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.1", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/tsx": { "version": "4.19.2", "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.2.tgz", @@ -3482,6 +3534,11 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, + "node-addon-api": { + "version": "8.2.2", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.2.2.tgz", + "integrity": "sha512-9emqXAKhVoNrQ792nLI/wpzPpJ/bj/YXxW0CvAau1+RdGBcCRF1Dmz7719zgVsQNrzHl9Tzn3ImZ4qWFarWL0A==" + }, "node-domexception": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", @@ -3495,6 +3552,11 @@ "whatwg-url": "^5.0.0" } }, + "node-gyp-build": { + "version": "4.8.4", + "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz", + "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==" + }, "object-inspect": { "version": "1.12.3", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.3.tgz", @@ -3687,6 +3749,24 @@ "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" }, + "tree-sitter": { + "version": "0.22.1", + "resolved": "https://registry.npmjs.org/tree-sitter/-/tree-sitter-0.22.1.tgz", + "integrity": "sha512-gRO+jk2ljxZlIn20QRskIvpLCMtzuLl5T0BY6L9uvPYD17uUrxlxWkvYCiVqED2q2q7CVtY52Uex4WcYo2FEXw==", + "requires": { + "node-addon-api": "^8.2.1", + "node-gyp-build": "^4.8.2" + } + }, + "tree-sitter-python": { + "version": "0.23.4", + "resolved": "https://registry.npmjs.org/tree-sitter-python/-/tree-sitter-python-0.23.4.tgz", + "integrity": "sha512-MbmUAl7y5UCUWqHscHke7DdRDwQnVNMNKQYQc4Gq2p09j+fgPxaU8JVsuOI/0HD3BSEEe5k9j3xmdtIWbDtDgw==", + "requires": { + "node-addon-api": "^8.2.1", + "node-gyp-build": "^4.8.2" + } + }, "tsx": { "version": "4.19.2", "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.2.tgz", diff --git a/package.json b/package.json index fb08515..baea346 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,8 @@ "gpt-tokenizer": "^2.1.2", "groq-sdk": "^0.8.0", "octokit": "^3.1.1", + "tree-sitter": "^0.22.1", + "tree-sitter-python": "^0.23.4", "xml2js": "^0.6.2" }, "devDependencies": { diff --git a/src/constants.ts b/src/constants.ts index 14c7de1..4336f99 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -90,9 +90,21 @@ export const processGitFilepath = (filepath: string) => { }; export interface EnclosingContext { - enclosingContext: Node | null; + enclosingContext: Node | TreeSitterNode | null; +} +export interface TreeSitterNode { + type: string; + loc: { + start: { + line: number; + column: number; + }; + end: { + line: number; + column: number; + }; + }; } - export interface AbstractParser { findEnclosingContext( file: string, diff --git a/src/context/language/python-parser.ts b/src/context/language/python-parser.ts index 845e90b..c568bba 100644 --- a/src/context/language/python-parser.ts +++ b/src/context/language/python-parser.ts @@ -1,15 +1,127 @@ -import { AbstractParser, EnclosingContext } from "../../constants"; +import { + AbstractParser, + EnclosingContext, + TreeSitterNode, +} from "../../constants"; +import Parser from "tree-sitter"; +import Python from "tree-sitter-python"; +import * as fs from "fs"; + +/** + * Utility function to process AST node and check if it fully encloses a given line range. + */ +const processNode = ( + node: Parser.SyntaxNode, + lineStart: number, + lineEnd: number, + largestSize: number, + largestEnclosingContext: Parser.SyntaxNode | null +) => { + const { startPosition, endPosition } = node; + + //Check if node fully encloses the target line range + if (startPosition.row + 1 <= lineStart && lineEnd <= endPosition.row + 1) { + const size = endPosition.row - startPosition.row; + if (size > largestSize) { + largestSize = size; + largestEnclosingContext = node; + } + } + return { largestSize, largestEnclosingContext }; +}; + +/** + * Convert a Tree0sitter 'syntax node to a TreeSitternode + */ +const convertSyntaxNodeToTreeSitterNode = ( + syntaxNode: Parser.SyntaxNode +): TreeSitterNode => { + return { + type: syntaxNode.type, + loc: { + start: { + line: syntaxNode.startPosition.row + 1, + column: syntaxNode.startPosition.column, + }, + end: { + line: syntaxNode.endPosition.row + 1, + column: syntaxNode.endPosition.column, + }, + }, + }; +}; + +/** + * Python parser: A parser for python code that identifies syntatic contexts + * and validates code as part of teh AI agents's code review system. + */ export class PythonParser implements AbstractParser { + private parser: Parser; + + constructor() { + this.parser = new Parser(); + this.parser.setLanguage(Python); + } + /** + * + * @param file - Content of the python file + * @param lineStart - Starting line number (1-based) + * @param lineEnd - Ending line number (1-based) + * @returns An EnclosingContext object with the type of the the enclosing node + */ findEnclosingContext( file: string, lineStart: number, lineEnd: number ): EnclosingContext { // TODO: Implement this method for Python - return null; + const tree = this.parser.parse(file); + + let largestEnclosingContext: Parser.SyntaxNode | null = null; + let largestSize = 0; + /** + * Recursive fucntion to traverse AST nodes + */ + const traverseNodes = (node: Parser.SyntaxNode) => { + ({ largestSize, largestEnclosingContext } = processNode( + node, + lineStart, + lineEnd, + largestSize, + largestEnclosingContext + )); + //Recursice inspect child nodes + for (let i = 0; i < node.childCount; i++) { + traverseNodes(node.child(i)); + } + }; + + //Start of the traversal from root node + traverseNodes(tree.rootNode); + + return { + enclosingContext: largestEnclosingContext + ? convertSyntaxNodeToTreeSitterNode(largestEnclosingContext) + : null, + }; } dryRun(file: string): { valid: boolean; error: string } { // TODO: Implement this method for Python - return { valid: false, error: "Not implemented yet" }; + try { + const tree = this.parser.parse(file); + + if (tree.rootNode.hasError) { + return { + valid: false, + error: "Syntax error in Python code", + }; + } + return { valid: true, error: "" }; + } catch (err) { + return { + valid: false, + error: `Error parsing pyhton code: ${err.message}`, + }; + } } } diff --git a/tsconfig.json b/tsconfig.json index 9f08957..ee994d7 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,12 +1,12 @@ { "compilerOptions": { - "outDir": "./dist", - "sourceMap": true, - "noImplicitAny": true, - "module": "commonjs", - "target": "es6" + "allowSyntheticDefaultImports": true, //Addion 1 neccesary for tree sitter + "esModuleInterop": true, //Addition 2 for tree sitter and implmentation of python parser + "outDir": "./dist", + "sourceMap": true, + "noImplicitAny": true, + "module": "commonjs", + "target": "es6" }, - "include": [ - "./src/**/*" - ] -} \ No newline at end of file + "include": ["./src/**/*"] +}