Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/authorship/post_commit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ pub fn post_commit(
);
}

working_log.write_all_checkpoints(&parent_working_log)?;
working_log.write_all_checkpoints(&mut parent_working_log)?;

// Create VirtualAttributions from working log (fast path - no blame)
// We don't need to run blame because we only care about the working log data
Expand Down
105 changes: 59 additions & 46 deletions src/commands/checkpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,29 @@ pub fn run(
storage_start.elapsed()
));

// Read checkpoints once and reuse throughout the function.
// This eliminates multiple redundant read_all_checkpoints() calls that were
// previously the #1 cause of memory overflow (each read deserializes the entire
// JSONL file into memory).
let read_checkpoints_start = Instant::now();
let mut checkpoints = if reset {
working_log.reset_working_log()?;
Vec::new()
} else {
working_log.read_all_checkpoints()?
};
debug_log(&format!(
"[BENCHMARK] Reading {} checkpoints took {:?}",
checkpoints.len(),
read_checkpoints_start.elapsed()
));

// Early exit for human only
if is_pre_commit {
let has_no_ai_edits = working_log
.all_ai_touched_files()
.map(|files| files.is_empty())
.unwrap_or(true);
let has_no_ai_edits = checkpoints.iter().all(|cp| {
cp.entries.is_empty()
|| (cp.kind != CheckpointKind::AiAgent && cp.kind != CheckpointKind::AiTab)
});

// Also check for INITIAL attributions - these are AI attributions from previous
// commits that weren't staged (e.g., after an amend). We must process these.
Expand Down Expand Up @@ -262,6 +279,7 @@ pub fn run(
pathspec_start.elapsed()
));

// Pass pre-loaded checkpoints to avoid redundant reads inside get_all_tracked_files
let files_start = Instant::now();
let files = get_all_tracked_files(
repo,
Expand All @@ -270,27 +288,14 @@ pub fn run(
pathspec_filter,
is_pre_commit,
&ignore_matcher,
Some(&checkpoints),
)?;
debug_log(&format!(
"[BENCHMARK] get_all_tracked_files found {} files, took {:?}",
files.len(),
files_start.elapsed()
));

let read_checkpoints_start = Instant::now();
let mut checkpoints = if reset {
// If reset flag is set, start with an empty working log
working_log.reset_working_log()?;
Vec::new()
} else {
working_log.read_all_checkpoints()?
};
debug_log(&format!(
"[BENCHMARK] Reading {} checkpoints took {:?}",
checkpoints.len(),
read_checkpoints_start.elapsed()
));

if show_working_log {
if checkpoints.is_empty() {
eprintln!("No working log entries found.");
Expand Down Expand Up @@ -607,6 +612,7 @@ fn get_all_tracked_files(
edited_filepaths: Option<&Vec<String>>,
is_pre_commit: bool,
ignore_matcher: &IgnoreMatcher,
preloaded_checkpoints: Option<&[Checkpoint]>,
) -> Result<Vec<String>, GitAiError> {
let mut files: HashSet<String> = edited_filepaths
.map(|paths| {
Expand Down Expand Up @@ -659,28 +665,38 @@ fn get_all_tracked_files(
initial_read_start.elapsed()
));

// Use pre-loaded checkpoints if available, otherwise read from disk.
// This eliminates redundant read_all_checkpoints() calls when the caller
// already has the data loaded (e.g., checkpoint::run reads once and passes it through).
let owned_checkpoints;
let checkpoint_data: &[Checkpoint] = match preloaded_checkpoints {
Some(data) => data,
None => {
owned_checkpoints = working_log.read_all_checkpoints().unwrap_or_default();
&owned_checkpoints
}
};

let checkpoints_read_start = Instant::now();
if let Ok(working_log_data) = working_log.read_all_checkpoints() {
for checkpoint in &working_log_data {
for entry in &checkpoint.entries {
// Normalize path separators to forward slashes
let normalized_path = normalize_to_posix(&entry.file);
// Filter out paths outside the repository to prevent git command failures
if !is_path_in_repo(&normalized_path) {
debug_log(&format!(
"Skipping checkpoint file outside repository: {}",
normalized_path
));
continue;
}
if should_ignore_file_with_matcher(&normalized_path, ignore_matcher) {
continue;
}
if !files.contains(&normalized_path) {
// Check if it's a text file before adding
if is_text_file(working_log, &normalized_path) {
files.insert(normalized_path);
}
for checkpoint in checkpoint_data {
for entry in &checkpoint.entries {
// Normalize path separators to forward slashes
let normalized_path = normalize_to_posix(&entry.file);
// Filter out paths outside the repository to prevent git command failures
if !is_path_in_repo(&normalized_path) {
debug_log(&format!(
"Skipping checkpoint file outside repository: {}",
normalized_path
));
continue;
}
if should_ignore_file_with_matcher(&normalized_path, ignore_matcher) {
continue;
}
if !files.contains(&normalized_path) {
// Check if it's a text file before adding
if is_text_file(working_log, &normalized_path) {
files.insert(normalized_path);
}
}
}
Expand All @@ -690,13 +706,10 @@ fn get_all_tracked_files(
checkpoints_read_start.elapsed()
));

let has_ai_checkpoints = if let Ok(working_log_data) = working_log.read_all_checkpoints() {
working_log_data.iter().any(|checkpoint| {
checkpoint.kind == CheckpointKind::AiAgent || checkpoint.kind == CheckpointKind::AiTab
})
} else {
false
};
// Use same checkpoint data to check for AI checkpoints (no extra read)
let has_ai_checkpoints = checkpoint_data.iter().any(|checkpoint| {
checkpoint.kind == CheckpointKind::AiAgent || checkpoint.kind == CheckpointKind::AiTab
});

let status_files_start = Instant::now();
let mut results_for_tracked_files = if is_pre_commit && !has_ai_checkpoints {
Expand Down
4 changes: 2 additions & 2 deletions src/commands/hooks/checkout_hooks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ fn remove_attributions_for_pathspecs(repository: &Repository, head: &str, pathsp

// Filter checkpoints
if let Ok(checkpoints) = working_log.read_all_checkpoints() {
let filtered: Vec<_> = checkpoints
let mut filtered: Vec<_> = checkpoints
.into_iter()
.map(|mut cp| {
cp.entries
Expand All @@ -175,7 +175,7 @@ fn remove_attributions_for_pathspecs(repository: &Repository, head: &str, pathsp
})
.filter(|cp| !cp.entries.is_empty())
.collect();
let _ = working_log.write_all_checkpoints(&filtered);
let _ = working_log.write_all_checkpoints(&mut filtered);
}
}

Expand Down
63 changes: 35 additions & 28 deletions src/git/repo_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::{HashMap, HashSet};
use std::fs;
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};

/// Initial attributions data structure stored in the INITIAL file
Expand Down Expand Up @@ -321,8 +322,7 @@ impl PersistedWorkingLog {

/* append checkpoint */
pub fn append_checkpoint(&self, checkpoint: &Checkpoint) -> Result<(), GitAiError> {
// Read existing checkpoints
let mut checkpoints = self.read_all_checkpoints().unwrap_or_default();
let checkpoints_file = self.dir.join("checkpoints.jsonl");

// Create a copy, potentially without transcript to reduce storage size.
// Transcripts are refetched in update_prompts_to_latest() before post-commit
Expand Down Expand Up @@ -366,15 +366,18 @@ impl PersistedWorkingLog {
storage_checkpoint.transcript = None;
}

// Add the new checkpoint
checkpoints.push(storage_checkpoint);

// Prune char-level attributions from older checkpoints for the same files
// Only the most recent checkpoint per file needs char-level precision
self.prune_old_char_attributions(&mut checkpoints);
// Append only the new checkpoint to the file (O(1) instead of O(N))
// This avoids reading all existing checkpoints just to add one.
// Char-level attribution pruning is deferred to write_all_checkpoints(),
// which is called by post_commit after reading all checkpoints anyway.
let json_line = serde_json::to_string(&storage_checkpoint)?;
let mut file = fs::OpenOptions::new()
.create(true)
.append(true)
.open(&checkpoints_file)?;
writeln!(file, "{}", json_line)?;

// Write all checkpoints back
self.write_all_checkpoints(&checkpoints)
Ok(())
}

pub fn read_all_checkpoints(&self) -> Result<Vec<Checkpoint>, GitAiError> {
Expand All @@ -384,16 +387,19 @@ impl PersistedWorkingLog {
return Ok(Vec::new());
}

let content = fs::read_to_string(&checkpoints_file)?;
// Use BufReader to stream line-by-line instead of loading entire file into memory.
// This avoids holding both the full file string AND the parsed structs simultaneously.
let file = fs::File::open(&checkpoints_file)?;
let reader = BufReader::new(file);
let mut checkpoints = Vec::new();

// Parse JSONL file - each line is a separate JSON object
for line in content.lines() {
for line_result in reader.lines() {
let line = line_result?;
if line.trim().is_empty() {
continue;
}

let checkpoint: Checkpoint = serde_json::from_str(line)
let checkpoint: Checkpoint = serde_json::from_str(&line)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;

if checkpoint.api_version != CHECKPOINT_API_VERSION {
Expand Down Expand Up @@ -482,27 +488,28 @@ impl PersistedWorkingLog {
}
}

/// Write all checkpoints to the JSONL file, replacing any existing content
/// Write all checkpoints to the JSONL file, replacing any existing content.
/// Also prunes char-level attributions from older checkpoints (deferred from append_checkpoint).
/// Note: Unlike append_checkpoint(), this preserves transcripts because it's used
/// by post-commit after transcripts have been refetched and need to be preserved
/// for from_just_working_log() to read them.
pub fn write_all_checkpoints(&self, checkpoints: &[Checkpoint]) -> Result<(), GitAiError> {
pub fn write_all_checkpoints(&self, checkpoints: &mut [Checkpoint]) -> Result<(), GitAiError> {
let checkpoints_file = self.dir.join("checkpoints.jsonl");

// Serialize all checkpoints to JSONL
let mut lines = Vec::new();
for checkpoint in checkpoints {
let json_line = serde_json::to_string(checkpoint)?;
lines.push(json_line);
}
// Prune char-level attributions from older checkpoints for the same files.
// Only the most recent checkpoint per file needs char-level precision.
// This was previously done in append_checkpoint but is now deferred here
// to avoid the read-all-then-write-all pattern on every append.
self.prune_old_char_attributions(checkpoints);

// Write all lines to file
let content = lines.join("\n");
if !content.is_empty() {
fs::write(&checkpoints_file, format!("{}\n", content))?;
} else {
fs::write(&checkpoints_file, "")?;
// Use BufWriter for efficient serialization directly to file
let file = fs::File::create(&checkpoints_file)?;
let mut writer = std::io::BufWriter::new(file);
for checkpoint in checkpoints.iter() {
let json_line = serde_json::to_string(checkpoint)?;
writeln!(writer, "{}", json_line)?;
}
writer.flush()?;

Ok(())
}
Expand Down
Loading