Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 35 additions & 4 deletions reference/trace-hook.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
#!/usr/bin/env bun

/**
* Agent Trace Hook Handler
*
* This script processes hook events from AI coding tools (Cursor, Claude Code)
* and generates trace records for attribution tracking. It reads JSON input
* from stdin and dispatches to the appropriate handler based on hook_event_name.
*
* Supported tools:
* - Cursor: afterFileEdit, afterTabFileEdit, afterShellExecution, sessionStart, sessionEnd
* - Claude Code: PostToolUse, SessionStart, SessionEnd
*/

import {
createTrace,
appendTrace,
computeRangePositions,
tryReadFile,
type ContributorType,
extractModelFromTranscript,
type FileEdit,
} from "./trace-store";

Expand All @@ -32,6 +44,25 @@ interface HookInput {
cwd?: string;
}

/**
* Resolves the model identifier from hook input.
*
* Different tools provide model information differently:
* - Cursor: Sends model directly in the hook payload via `input.model`
* - Claude Code: Does not include model in payload; must be extracted from transcript
*
* This function handles both cases transparently.
*/
function resolveModel(input: HookInput): string | undefined {
if (input.model) {
return input.model;
}
if (input.transcript_path) {
return extractModelFromTranscript(input.transcript_path);
}
return undefined;
}

const handlers: Record<string, (input: HookInput) => void> = {
afterFileEdit: (input) => {
const rangePositions = computeRangePositions(input.edits ?? [], tryReadFile(input.file_path!));
Expand Down Expand Up @@ -108,7 +139,7 @@ const handlers: Record<string, (input: HookInput) => void> = {
: undefined;

appendTrace(createTrace("ai", file, {
model: input.model,
model: resolveModel(input),
rangePositions,
transcript: input.transcript_path,
metadata: {
Expand All @@ -122,14 +153,14 @@ const handlers: Record<string, (input: HookInput) => void> = {

SessionStart: (input) => {
appendTrace(createTrace("ai", ".sessions", {
model: input.model,
model: resolveModel(input),
metadata: { event: "session_start", session_id: input.session_id, source: input.source },
}));
},

SessionEnd: (input) => {
appendTrace(createTrace("ai", ".sessions", {
model: input.model,
model: resolveModel(input),
metadata: { event: "session_end", session_id: input.session_id, reason: input.reason },
}));
},
Expand Down
169 changes: 163 additions & 6 deletions reference/trace-store.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { execFileSync } from "child_process";
import { existsSync, mkdirSync, appendFileSync, readFileSync } from "fs";
import { existsSync, mkdirSync, appendFileSync, readFileSync, openSync, fstatSync, readSync, closeSync } from "fs";
import { join, relative } from "path";

export interface Range {
Expand Down Expand Up @@ -94,30 +94,187 @@ export function normalizeModelId(model?: string): string | undefined {
return model;
}

/**
* Extracts the model identifier from a Claude Code transcript file.
*
* Claude Code stores conversation transcripts as JSONL files where each line
* represents a message exchange. The model identifier is stored at `entry.message.model`.
* This function reads only the tail of the file to efficiently get the most recent model,
* which handles cases where the model may have changed during a session.
*
* @param transcriptPath - Absolute path to the Claude Code transcript JSONL file
* @returns The model identifier (e.g., "claude-opus-4-5-20251101") or undefined if not found
*
* @example
* ```typescript
* const model = extractModelFromTranscript("/path/to/transcript.jsonl");
* // Returns: "claude-opus-4-5-20251101"
* ```
*/
export function extractModelFromTranscript(transcriptPath: string): string | undefined {
try {
const fd = openSync(transcriptPath, "r");
const stats = fstatSync(fd);

// Start with 4KB, expand if needed (balances syscall overhead vs read size)
let readSize = Math.min(stats.size, 4 * 1024);

while (readSize <= stats.size) {
const buffer = Buffer.alloc(readSize);
readSync(fd, buffer, 0, readSize, stats.size - readSize);

const content = buffer.toString("utf-8");
const lines = content.split("\n");

// Iterate from end to get the most recent model
for (let i = lines.length - 1; i >= 0; i--) {
const line = lines[i].trim();
if (!line) continue;

try {
const entry = JSON.parse(line);
if (entry.message?.model) {
closeSync(fd);
return entry.message.model;
}
} catch {
// Skip malformed/partial JSON lines
continue;
}
}

// No model found, try larger chunk
if (readSize >= stats.size) break;
readSize = Math.min(stats.size, readSize * 2);
}

closeSync(fd);
return undefined;
} catch {
// File doesn't exist or isn't readable
return undefined;
}
}

export interface RangePosition {
start_line: number;
end_line: number;
}

/**
* Computes which lines in `newStr` are actually new or modified compared to `oldStr`.
*
* This function performs a simple line-by-line diff to distinguish between:
* - Context lines: Lines that exist in both old and new strings (not attributed)
* - Changed lines: Lines that are new or modified (attributed to AI)
*
* This is necessary because some tools (like Claude Code's Edit tool) include
* surrounding context lines in both `old_string` and `new_string`. Without this
* diff, we would incorrectly attribute unchanged context lines to the AI.
*
* @param oldStr - The original string before the edit
* @param newStr - The new string after the edit
* @returns Array of 0-indexed line offsets within `newStr` that are new or modified
*
* @example
* ```typescript
* // old: "line1\nline2\nline3"
* // new: "line1\nNEW LINE\nline3"
* diffToFindChangedLines(old, new); // Returns [1] - only the middle line changed
* ```
*/
function diffToFindChangedLines(oldStr: string, newStr: string): number[] {
const oldLines = oldStr.split("\n");
const newLines = newStr.split("\n");
const changedOffsets: number[] = [];

let oldIdx = 0;

for (let newIdx = 0; newIdx < newLines.length; newIdx++) {
if (oldIdx < oldLines.length && oldLines[oldIdx] === newLines[newIdx]) {
// Matching line - this is context, not a change
oldIdx++;
} else {
// Check if this line from newStr exists later in oldStr (handles deletions)
let foundAhead = false;
for (let lookAhead = oldIdx; lookAhead < oldLines.length; lookAhead++) {
if (oldLines[lookAhead] === newLines[newIdx]) {
oldIdx = lookAhead + 1;
foundAhead = true;
break;
}
}

if (!foundAhead) {
// Line is genuinely new or modified - attribute to AI
changedOffsets.push(newIdx);
}
}
}

return changedOffsets;
}

export function computeRangePositions(edits: FileEdit[], fileContent?: string): RangePosition[] {
return edits
.filter((e) => e.new_string)
.map((edit) => {
.flatMap((edit) => {
// Case 1: Has explicit range from tool → use it
if (edit.range) {
return {
return [{
start_line: edit.range.start_line_number,
end_line: edit.range.end_line_number,
};
}];
}

// Case 2: Has both old_string and new_string → diff them to find actual changes
if (edit.old_string && edit.new_string && fileContent) {
const idx = fileContent.indexOf(edit.new_string);
if (idx !== -1) {
const startLine = fileContent.substring(0, idx).split("\n").length;
const changedOffsets = diffToFindChangedLines(edit.old_string, edit.new_string);

if (changedOffsets.length === 0) {
return [];
}

// Convert offsets to line ranges, merging adjacent lines
const ranges: RangePosition[] = [];
let rangeStart = changedOffsets[0];
let rangeEnd = changedOffsets[0];

for (let i = 1; i < changedOffsets.length; i++) {
if (changedOffsets[i] === rangeEnd + 1) {
rangeEnd = changedOffsets[i];
} else {
ranges.push({
start_line: startLine + rangeStart,
end_line: startLine + rangeEnd,
});
rangeStart = changedOffsets[i];
rangeEnd = changedOffsets[i];
}
}

ranges.push({
start_line: startLine + rangeStart,
end_line: startLine + rangeEnd,
});

return ranges;
}
}

// Case 3: Fallback - attribute entire new_string (original behavior)
const lineCount = edit.new_string.split("\n").length;
if (fileContent) {
const idx = fileContent.indexOf(edit.new_string);
if (idx !== -1) {
const startLine = fileContent.substring(0, idx).split("\n").length;
return { start_line: startLine, end_line: startLine + lineCount - 1 };
return [{ start_line: startLine, end_line: startLine + lineCount - 1 }];
}
}
return { start_line: 1, end_line: lineCount };
return [{ start_line: 1, end_line: lineCount }];
});
}

Expand Down