Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,7 @@ standalone/coverage_reports/*
out.txt

#Local AI files
.github/copilot-*
.github/copilot-*

# Profiling data
cargo-flamegraph.trace
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@

members = ["ast", "lsp", "shared", "skill", "standalone"]
resolver = "2"

[profile.release]
debug = true
1 change: 1 addition & 0 deletions ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ futures = "0.3.31"
tiktoken-rs = "0.7.0"
ignore = "0.4.23"
fastembed = { version = "5.2.0", optional = true }
dashmap = "6.1.0"


[[example]]
Expand Down
95 changes: 75 additions & 20 deletions ast/src/builder/core.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#[cfg(feature = "neo4j")]
use super::streaming::{nodes_to_bolt_format, StreamingUploadContext};
use super::utils::*;
use super::utils::{timed_stage, timed_stage_async, *};
#[cfg(feature = "neo4j")]
use crate::lang::graphs::Neo4jGraph;
use crate::lang::{
Expand All @@ -20,7 +20,7 @@ use shared::error::Result;
use std::collections::HashSet;
use std::path::PathBuf;
use tokio::fs;
use tracing::{debug, info, trace};
use tracing::{debug, info, instrument, trace};

#[derive(Debug, Clone)]
pub struct ImplementsRelationship {
Expand Down Expand Up @@ -48,6 +48,7 @@ impl Repo {
let streaming = std::env::var("STREAM_UPLOAD").is_ok();
self.build_graph_inner_with_streaming(streaming).await
}

pub async fn build_graph_inner_with_streaming<G: Graph>(&self, streaming: bool) -> Result<G> {
let graph_root = strip_tmp(&self.root).display().to_string();
let mut graph = G::new(graph_root, self.lang.kind.clone());
Expand All @@ -65,6 +66,7 @@ impl Repo {
};

self.send_status_update("initialization", 1);
info!("Starting parse stage: initialization");
self.add_repository_and_language_nodes(&mut graph).await?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
Expand All @@ -74,9 +76,11 @@ impl Repo {
.flush_stage(&ctx.neo, "repository_language", &bolt_nodes)
.await?;
}

let files = self.collect_and_add_directories(&mut graph)?;
stats.insert("directories".to_string(), files.len());

info!("Starting parse stage: files");
let filez = self.process_and_add_files(&mut graph, &files).await?;
stats.insert("files".to_string(), filez.len());
self.send_status_with_stats(stats.clone());
Expand All @@ -101,7 +105,11 @@ impl Repo {
.filter(|(f, _)| is_allowed_file(&std::path::PathBuf::from(f), &self.lang.kind))
.cloned()
.collect::<Vec<_>>();
self.process_libraries(&mut graph, &allowed_files)?;

info!("Starting parse stage: libraries");
timed_stage("libraries", || {
self.process_libraries(&mut graph, &allowed_files)
})?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -114,7 +122,11 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "libraries", &edges)
.await?;
}
self.process_import_sections(&mut graph, &filez)?;

info!("Starting parse stage: imports");
timed_stage("imports", || {
self.process_import_sections(&mut graph, &filez)
})?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -127,7 +139,11 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "imports", &edges)
.await?;
}
self.process_variables(&mut graph, &allowed_files)?;

info!("Starting parse stage: variables");
timed_stage("variables", || {
self.process_variables(&mut graph, &allowed_files)
})?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -140,7 +156,11 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "variables", &edges)
.await?;
}
let impl_relationships = self.process_classes(&mut graph, &allowed_files)?;

info!("Starting parse stage: classes");
let impl_relationships = timed_stage("classes", || {
self.process_classes(&mut graph, &allowed_files)
})?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -153,7 +173,11 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "classes", &edges)
.await?;
}
self.process_instances_and_traits(&mut graph, &allowed_files)?;

info!("Starting parse stage: instances_traits");
timed_stage("instances_traits", || {
self.process_instances_and_traits(&mut graph, &allowed_files)
})?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -166,7 +190,10 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "instances_traits", &edges)
.await?;
}
self.resolve_implements_edges(&mut graph, impl_relationships)?;
info!("Starting parse stage: implements");
timed_stage("implements", || {
self.resolve_implements_edges(&mut graph, impl_relationships)
})?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -179,7 +206,11 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "implements", &edges)
.await?;
}
self.process_data_models(&mut graph, &allowed_files)?;

info!("Starting parse stage: data_models");
timed_stage("data_models", || {
self.process_data_models(&mut graph, &allowed_files)
})?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -192,8 +223,13 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "data_models", &edges)
.await?;
}
self.process_functions_and_tests(&mut graph, &allowed_files)
.await?;

info!("Starting parse stage: functions_tests");
timed_stage_async(
"functions_tests",
self.process_functions_and_tests(&mut graph, &allowed_files),
)
.await?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -206,7 +242,11 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "functions_tests", &edges)
.await?;
}
self.process_pages_and_templates(&mut graph, &filez)?;

info!("Starting parse stage: pages_templates");
timed_stage("pages_templates", || {
self.process_pages_and_templates(&mut graph, &filez)
})?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -219,7 +259,11 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "pages_templates", &edges)
.await?;
}
self.process_endpoints(&mut graph, &allowed_files)?;

info!("Starting parse stage: endpoints");
timed_stage("endpoints", || {
self.process_endpoints(&mut graph, &allowed_files)
})?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand All @@ -232,8 +276,13 @@ impl Repo {
.flush_edges_stage(&ctx.neo, "endpoints", &edges)
.await?;
}
self.finalize_graph(&mut graph, &allowed_files, &mut stats)
.await?;

info!("Starting parse stage: finalize");
timed_stage_async(
"finalize",
self.finalize_graph(&mut graph, &allowed_files, &mut stats),
)
.await?;
#[cfg(feature = "neo4j")]
if let Some(ctx) = &mut streaming_ctx {
let all_nodes = graph.get_all_nodes();
Expand Down Expand Up @@ -691,7 +740,7 @@ impl Repo {

let classes = graph.find_nodes_by_type(NodeType::Class);
let traits = graph.find_nodes_by_type(NodeType::Trait);

let mut classes_by_file: HashMap<&str, Vec<&NodeData>> = HashMap::new();
for class in &classes {
classes_by_file
Expand Down Expand Up @@ -728,9 +777,7 @@ impl Repo {
let trait_node = traits_by_file
.get(rel.file_path.as_str())
.and_then(|traits| traits.iter().find(|t| t.name == rel.trait_name).copied())
.or_else(|| {
traits.iter().find(|t| t.name == rel.trait_name)
});
.or_else(|| traits.iter().find(|t| t.name == rel.trait_name));

if let (Some(class), Some(trait_)) = (class_node, trait_node) {
graph.add_edge(Edge::implements(class, trait_));
Expand Down Expand Up @@ -799,6 +846,8 @@ impl Repo {
info!("=> got {} data models", datamodel_count);
Ok(())
}

#[instrument(skip(self, graph, filez), fields(files=filez.len()))]
async fn process_functions_and_tests<G: Graph>(
&self,
graph: &mut G,
Expand Down Expand Up @@ -826,7 +875,7 @@ impl Repo {
function_count += funcs.len();

graph.add_functions(funcs);

test_count += tests.len();
graph.add_tests(tests);
}
Expand Down Expand Up @@ -961,6 +1010,8 @@ impl Repo {

Ok(())
}

#[instrument(skip(self, graph, filez), fields(files=filez.len()))]
fn process_endpoints<G: Graph>(&self, graph: &mut G, filez: &[(String, String)]) -> Result<()> {
self.send_status_update("process_endpoints", 11);
let mut _i = 0;
Expand Down Expand Up @@ -1021,6 +1072,7 @@ impl Repo {
Ok(())
}

#[instrument(skip(self, graph, stats, filez), fields(files=filez.len()))]
async fn finalize_graph<G: Graph>(
&self,
graph: &mut G,
Expand Down Expand Up @@ -1113,6 +1165,9 @@ impl Repo {
graph.filter_out_nodes_without_children(parent_type, child_type, child_meta_key);
});

crate::utils::log_and_reset_call_finder_stats();
crate::utils::log_and_reset_import_stats();

Ok(())
}
}
4 changes: 2 additions & 2 deletions ast/src/builder/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::lang::graphs::{neo4j::*, Neo4jGraph};
use crate::lang::{EdgeType, NodeData, NodeType};
use neo4rs::BoltMap;
use shared::Result;
use tracing::{debug, info};
use tracing::{debug, info, instrument};
use uuid::Uuid;

pub struct GraphStreamingUploader {}
Expand All @@ -14,7 +14,7 @@ impl GraphStreamingUploader {
pub fn new() -> Self {
Self {}
}

#[instrument(skip(self, neo, delta_node_queries), fields(stage, node_count = delta_node_queries.len()))]
pub async fn flush_stage(
&mut self,
neo: &Neo4jGraph,
Expand Down
17 changes: 17 additions & 0 deletions ast/src/builder/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,26 @@ use crate::utils::create_node_key;
use lsp::{strip_tmp, Language};
use std::collections::HashSet;
use std::path::PathBuf;
use tracing::info;

pub const MAX_FILE_SIZE: u64 = 500_000;

// Usage: `timed_stage("classes", || self.process_classes(&mut graph, &files))?;`
pub fn timed_stage<T, F: FnOnce() -> T>(stage_name: &str, f: F) -> T {
let start = std::time::Instant::now();
let result = f();
info!("[perf][stage] {} took {}ms", stage_name, start.elapsed().as_millis());
result
}

// Usage: `timed_stage_async("finalize", self.finalize_graph(&mut graph, &files, &mut stats)).await?;`
pub async fn timed_stage_async<T, F: std::future::Future<Output = T>>(stage_name: &str, f: F) -> T {
let start = std::time::Instant::now();
let result = f.await;
info!("[perf][stage] {} took {}ms", stage_name, start.elapsed().as_millis());
result
}

#[cfg(feature = "openssl")]
pub fn filter_by_revs<G: Graph>(root: &str, revs: Vec<String>, graph: G, lang_kind: Language) -> G {
if revs.is_empty() {
Expand Down
Loading
Loading