fix(reference): Fix line attribution accuracy and add Claude Code model extraction #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

muraalee wants to merge 1 commit into cursor:main from muraalee:fix/claude-code-attribution

+198 −10

reference/trace-hook.ts

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,11 +1,23 @@
  
    #!/usr/bin/env bun

    /**

     * Agent Trace Hook Handler

     *

     * This script processes hook events from AI coding tools (Cursor, Claude Code)

     * and generates trace records for attribution tracking. It reads JSON input

     * from stdin and dispatches to the appropriate handler based on hook_event_name.

     *

     * Supported tools:

     * - Cursor: afterFileEdit, afterTabFileEdit, afterShellExecution, sessionStart, sessionEnd

     * - Claude Code: PostToolUse, SessionStart, SessionEnd

     */

    import {

      createTrace,

      appendTrace,

      computeRangePositions,

      tryReadFile,

      type ContributorType,

      extractModelFromTranscript,

      type FileEdit,

    } from "./trace-store";

    @@ -32,6 +44,25 @@ interface HookInput {
  
      cwd?: string;

    }

    /**

     * Resolves the model identifier from hook input.

     *

     * Different tools provide model information differently:

     * - Cursor: Sends model directly in the hook payload via `input.model`

     * - Claude Code: Does not include model in payload; must be extracted from transcript

     *

     * This function handles both cases transparently.

     */

    function resolveModel(input: HookInput): string | undefined {

      if (input.model) {

        return input.model;

      }

      if (input.transcript_path) {

        return extractModelFromTranscript(input.transcript_path);

      }

      return undefined;

    }

    const handlers: Record<string, (input: HookInput) => void> = {

      afterFileEdit: (input) => {

        const rangePositions = computeRangePositions(input.edits ?? [], tryReadFile(input.file_path!));

    @@ -108,7 +139,7 @@ const handlers: Record<string, (input: HookInput) => void> = {
  
          : undefined;

        appendTrace(createTrace("ai", file, {

          model: input.model,

          model: resolveModel(input),

          rangePositions,

          transcript: input.transcript_path,

          metadata: {

    @@ -122,14 +153,14 @@ const handlers: Record<string, (input: HookInput) => void> = {
  
      SessionStart: (input) => {

        appendTrace(createTrace("ai", ".sessions", {

          model: input.model,

          model: resolveModel(input),

          metadata: { event: "session_start", session_id: input.session_id, source: input.source },

        }));

      },

      SessionEnd: (input) => {

        appendTrace(createTrace("ai", ".sessions", {

          model: input.model,

          model: resolveModel(input),

          metadata: { event: "session_end", session_id: input.session_id, reason: input.reason },

        }));

      },

reference/trace-store.ts

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,5 +1,5 @@
  
    import { execFileSync } from "child_process";

    import { existsSync, mkdirSync, appendFileSync, readFileSync } from "fs";

    import { existsSync, mkdirSync, appendFileSync, readFileSync, openSync, fstatSync, readSync, closeSync } from "fs";

    import { join, relative } from "path";

    export interface Range {

    @@ -94,30 +94,187 @@ export function normalizeModelId(model?: string): string | undefined {
  
      return model;

    }

    /**

     * Extracts the model identifier from a Claude Code transcript file.

     *

     * Claude Code stores conversation transcripts as JSONL files where each line

     * represents a message exchange. The model identifier is stored at `entry.message.model`.

     * This function reads only the tail of the file to efficiently get the most recent model,

     * which handles cases where the model may have changed during a session.

     *

     * @param transcriptPath - Absolute path to the Claude Code transcript JSONL file

     * @returns The model identifier (e.g., "claude-opus-4-5-20251101") or undefined if not found

     *

     * @example

     * ```typescript

     * const model = extractModelFromTranscript("/path/to/transcript.jsonl");

     * // Returns: "claude-opus-4-5-20251101"

     * ```

     */

    export function extractModelFromTranscript(transcriptPath: string): string | undefined {

      try {

        const fd = openSync(transcriptPath, "r");

        const stats = fstatSync(fd);

        // Start with 4KB, expand if needed (balances syscall overhead vs read size)

        let readSize = Math.min(stats.size, 4 * 1024);

        while (readSize <= stats.size) {

          const buffer = Buffer.alloc(readSize);

          readSync(fd, buffer, 0, readSize, stats.size - readSize);

          const content = buffer.toString("utf-8");

          const lines = content.split("\n");

          // Iterate from end to get the most recent model

          for (let i = lines.length - 1; i >= 0; i--) {

            const line = lines[i].trim();

            if (!line) continue;

            try {

              const entry = JSON.parse(line);

              if (entry.message?.model) {

                closeSync(fd);

                return entry.message.model;

              }

            } catch {

              // Skip malformed/partial JSON lines

              continue;

            }

          }

          // No model found, try larger chunk

          if (readSize >= stats.size) break;

          readSize = Math.min(stats.size, readSize * 2);

        }

        closeSync(fd);

        return undefined;

      } catch {

        // File doesn't exist or isn't readable

        return undefined;

      }

    }

    export interface RangePosition {

      start_line: number;

      end_line: number;

    }

    /**

     * Computes which lines in `newStr` are actually new or modified compared to `oldStr`.

     *

     * This function performs a simple line-by-line diff to distinguish between:

     * - Context lines: Lines that exist in both old and new strings (not attributed)

     * - Changed lines: Lines that are new or modified (attributed to AI)

     *

     * This is necessary because some tools (like Claude Code's Edit tool) include

     * surrounding context lines in both `old_string` and `new_string`. Without this

     * diff, we would incorrectly attribute unchanged context lines to the AI.

     *

     * @param oldStr - The original string before the edit

     * @param newStr - The new string after the edit

     * @returns Array of 0-indexed line offsets within `newStr` that are new or modified

     *

     * @example

     * ```typescript

     * // old: "line1\nline2\nline3"

     * // new: "line1\nNEW LINE\nline3"

     * diffToFindChangedLines(old, new); // Returns [1] - only the middle line changed

     * ```

     */

    function diffToFindChangedLines(oldStr: string, newStr: string): number[] {

      const oldLines = oldStr.split("\n");

      const newLines = newStr.split("\n");

      const changedOffsets: number[] = [];

      let oldIdx = 0;

      for (let newIdx = 0; newIdx < newLines.length; newIdx++) {

        if (oldIdx < oldLines.length && oldLines[oldIdx] === newLines[newIdx]) {

          // Matching line - this is context, not a change

          oldIdx++;

        } else {

          // Check if this line from newStr exists later in oldStr (handles deletions)

          let foundAhead = false;

          for (let lookAhead = oldIdx; lookAhead < oldLines.length; lookAhead++) {

            if (oldLines[lookAhead] === newLines[newIdx]) {

              oldIdx = lookAhead + 1;

              foundAhead = true;

              break;

            }

          }

          if (!foundAhead) {

            // Line is genuinely new or modified - attribute to AI

            changedOffsets.push(newIdx);

          }

        }

      }

      return changedOffsets;

    }

    export function computeRangePositions(edits: FileEdit[], fileContent?: string): RangePosition[] {

      return edits

        .filter((e) => e.new_string)

        .map((edit) => {

        .flatMap((edit) => {

          // Case 1: Has explicit range from tool → use it

          if (edit.range) {

            return {

            return [{

              start_line: edit.range.start_line_number,

              end_line: edit.range.end_line_number,

            };

            }];

          }

          // Case 2: Has both old_string and new_string → diff them to find actual changes

          if (edit.old_string && edit.new_string && fileContent) {

            const idx = fileContent.indexOf(edit.new_string);

            if (idx !== -1) {

              const startLine = fileContent.substring(0, idx).split("\n").length;

              const changedOffsets = diffToFindChangedLines(edit.old_string, edit.new_string);

              if (changedOffsets.length === 0) {

                return [];

              }

              // Convert offsets to line ranges, merging adjacent lines

              const ranges: RangePosition[] = [];

              let rangeStart = changedOffsets[0];

              let rangeEnd = changedOffsets[0];

              for (let i = 1; i < changedOffsets.length; i++) {

                if (changedOffsets[i] === rangeEnd + 1) {

                  rangeEnd = changedOffsets[i];

                } else {

                  ranges.push({

                    start_line: startLine + rangeStart,

                    end_line: startLine + rangeEnd,

                  });

                  rangeStart = changedOffsets[i];

                  rangeEnd = changedOffsets[i];

                }

              }

              ranges.push({

                start_line: startLine + rangeStart,

                end_line: startLine + rangeEnd,

              });

              return ranges;

            }

          }

          // Case 3: Fallback - attribute entire new_string (original behavior)

          const lineCount = edit.new_string.split("\n").length;

          if (fileContent) {

            const idx = fileContent.indexOf(edit.new_string);

            if (idx !== -1) {

              const startLine = fileContent.substring(0, idx).split("\n").length;

              return { start_line: startLine, end_line: startLine + lineCount - 1 };

              return [{ start_line: startLine, end_line: startLine + lineCount - 1 }];

            }

          }

          return { start_line: 1, end_line: lineCount };

          return [{ start_line: 1, end_line: lineCount }];

        });

    }

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix(reference): Fix line attribution accuracy and add Claude Code model extraction #7

Diff view

Diff view

There are no files selected for viewing

fix(reference): Fix line attribution accuracy and add Claude Code model extraction #7

Are you sure you want to change the base?

fix(reference): Fix line attribution accuracy and add Claude Code model extraction #7

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing