Skip to content

Commit 1591a4b

Browse files
committed
Initial python support
This seems to work. Signed-off-by: Chris Mason <clm@fb.com>
1 parent 3d961e3 commit 1591a4b

File tree

3 files changed

+109
-1
lines changed

3 files changed

+109
-1
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ num_cpus = "1.16"
3131
tree-sitter = "0.24"
3232
tree-sitter-c = "0.23"
3333
tree-sitter-rust = "0.23"
34+
tree-sitter-python = "0.23"
3435
streaming-iterator = "0.1"
3536
once_cell = "1.20"
3637
crossbeam-channel = "0.5"

src/file_extensions.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[
1717
"hxx", // C++ header files
1818
"h++", // C++ header files
1919
"rs", // Rust source files
20+
"py", // Python source files
2021
];
2122

2223
/// Default extensions for indexing (subset of SUPPORTED_EXTENSIONS)
@@ -63,6 +64,11 @@ pub fn is_c_cpp_file(file_path: &str) -> bool {
6364
|| file_path.ends_with(".h++")
6465
}
6566

67+
/// Check if a file path is a Python file
68+
pub fn is_python_file(file_path: &str) -> bool {
69+
file_path.ends_with(".py")
70+
}
71+
6672
#[cfg(test)]
6773
mod tests {
6874
use super::*;
@@ -92,7 +98,7 @@ mod tests {
9298
assert!(is_supported_for_analysis("test.rs"));
9399
assert!(is_supported_for_analysis("test.cpp"));
94100
assert!(is_supported_for_analysis("test.hpp"));
95-
assert!(!is_supported_for_analysis("test.py"));
101+
assert!(is_supported_for_analysis("test.py"));
96102
assert!(!is_supported_for_analysis("test.txt"));
97103
}
98104

@@ -107,4 +113,12 @@ mod tests {
107113
assert!(!is_c_cpp_file("test.rs"));
108114
assert!(!is_c_cpp_file("test.py"));
109115
}
116+
117+
#[test]
118+
fn test_is_python_file() {
119+
assert!(is_python_file("test.py"));
120+
assert!(!is_python_file("test.c"));
121+
assert!(!is_python_file("test.rs"));
122+
assert!(!is_python_file("test.txt"));
123+
}
110124
}

src/treesitter_analyzer.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use crate::hash::compute_file_hash;
1616
pub enum Language {
1717
C,
1818
Rust,
19+
Python,
1920
}
2021

2122
impl Language {
@@ -26,6 +27,7 @@ impl Language {
2627
.and_then(|ext| match ext {
2728
"c" | "h" | "cpp" | "cc" | "cxx" | "hpp" => Some(Language::C),
2829
"rs" => Some(Language::Rust),
30+
"py" => Some(Language::Python),
2931
_ => None,
3032
})
3133
}
@@ -53,8 +55,10 @@ struct LanguageQueries {
5355
pub struct TreeSitterAnalyzer {
5456
c_parser: Parser,
5557
rust_parser: Parser,
58+
python_parser: Parser,
5659
c_queries: LanguageQueries,
5760
rust_queries: LanguageQueries,
61+
python_queries: LanguageQueries,
5862
}
5963

6064
impl TreeSitterAnalyzer {
@@ -69,17 +73,27 @@ impl TreeSitterAnalyzer {
6973
let mut rust_parser = Parser::new();
7074
rust_parser.set_language(&rust_language)?;
7175

76+
// Initialize Python parser and queries
77+
let python_language = tree_sitter_python::LANGUAGE.into();
78+
let mut python_parser = Parser::new();
79+
python_parser.set_language(&python_language)?;
80+
7281
// Create C queries
7382
let c_queries = Self::create_c_queries(&c_language)?;
7483

7584
// Create Rust queries
7685
let rust_queries = Self::create_rust_queries(&rust_language)?;
7786

87+
// Create Python queries
88+
let python_queries = Self::create_python_queries(&python_language)?;
89+
7890
Ok(TreeSitterAnalyzer {
7991
c_parser,
8092
rust_parser,
93+
python_parser,
8194
c_queries,
8295
rust_queries,
96+
python_queries,
8397
})
8498
}
8599

@@ -293,6 +307,75 @@ impl TreeSitterAnalyzer {
293307
})
294308
}
295309

310+
fn create_python_queries(language: &tree_sitter::Language) -> Result<LanguageQueries> {
311+
// Query for function definitions (including methods)
312+
let function_query = Query::new(
313+
language,
314+
r#"
315+
(function_definition
316+
name: (identifier) @function_name
317+
parameters: (parameters) @parameters
318+
return_type: (_)? @return_type
319+
body: (block) @body
320+
) @function
321+
"#,
322+
)?;
323+
324+
// Query for comments
325+
let comment_query = Query::new(
326+
language,
327+
r#"
328+
(comment) @comment
329+
"#,
330+
)?;
331+
332+
// Query for class definitions
333+
let type_query = Query::new(
334+
language,
335+
r#"
336+
(class_definition
337+
name: (identifier) @type_name
338+
body: (block) @body
339+
) @class
340+
"#,
341+
)?;
342+
343+
// Python doesn't have traditional macros, but we can track decorators
344+
let macro_query = Query::new(
345+
language,
346+
r#"
347+
(decorator
348+
(identifier) @macro_name
349+
) @decorator
350+
"#,
351+
)?;
352+
353+
// Query for function calls
354+
let call_query = Query::new(
355+
language,
356+
r#"
357+
(call
358+
function: (identifier) @function_name
359+
) @call
360+
361+
(call
362+
function: (attribute
363+
attribute: (identifier) @function_name
364+
)
365+
) @method_call
366+
"#,
367+
)?;
368+
369+
Ok(LanguageQueries {
370+
function_query,
371+
comment_query,
372+
type_query,
373+
typedef_query: None, // Python doesn't have typedefs
374+
macro_query,
375+
call_query,
376+
})
377+
}
378+
296379
/// Helper method to convert absolute path to relative path based on source root
297380
fn make_relative_path(&self, file_path: &Path, source_root: Option<&Path>) -> String {
298381
if let Some(root) = source_root {
@@ -310,6 +393,7 @@ impl TreeSitterAnalyzer {
310393
match language {
311394
Language::C => &mut self.c_parser,
312395
Language::Rust => &mut self.rust_parser,
396+
Language::Python => &mut self.python_parser,
313397
}
314398
}
315399

@@ -318,6 +402,7 @@ impl TreeSitterAnalyzer {
318402
match language {
319403
Language::C => &self.c_queries,
320404
Language::Rust => &self.rust_queries,
405+
Language::Python => &self.python_queries,
321406
}
322407
}
323408

@@ -945,6 +1030,14 @@ impl TreeSitterAnalyzer {
9451030
line_start = node.start_position().row as u32 + 1;
9461031
}
9471032
}
1033+
"class" => {
1034+
kind = "class".to_string();
1035+
type_start_byte = node.start_byte();
1036+
type_end_byte = node.end_byte();
1037+
if line_start == 0 {
1038+
line_start = node.start_position().row as u32 + 1;
1039+
}
1040+
}
9481041
_ => {}
9491042
}
9501043
}

0 commit comments

Comments
 (0)