From ac4f7f7a707387ffa500b23995e318f8ad2d1012 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 09:03:51 +0000
Subject: [PATCH 1/4] Initial plan


From f87eb986a0944b4c85497629041f208da2aa3bb5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 09:08:17 +0000
Subject: [PATCH 2/4] Fix text reconstruction by using character positions
 instead of joining sentences

Co-authored-by: abirharrasse <81148161+abirharrasse@users.noreply.github.com>
---
 .gitignore         | 24 ++++++++++++++++++++++++
 exploration/rpc.py | 41 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 60 insertions(+), 5 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ff76526
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,24 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+dist/
+build/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# Checkpoints
+checkpoints*/
+*.pkl
+
+# OS
+.DS_Store
+Thumbs.db
diff --git a/exploration/rpc.py b/exploration/rpc.py
index 60b33c1..91b8346 100644
--- a/exploration/rpc.py
+++ b/exploration/rpc.py
@@ -57,8 +57,32 @@
 }
 
 def split_into_sentences(text):
-    sentences = re.split(r'(?<=[.!?])\s+', text)
-    return [s.strip() for s in sentences if s.strip() and len(s.strip()) > 3]
+    """Split text into sentences and return both sentences and their character positions.
+    
+    Returns:
+        tuple: (sentences, positions) where sentences is a list of sentence strings,
+               and positions is a list of tuples (start, end) indicating character positions
+               in the original text.
+    """
+    sentences = []
+    positions = []
+    
+    # Split on sentence boundaries
+    split_sentences = re.split(r'(?<=[.!?])\s+', text)
+    
+    current_pos = 0
+    for sent in split_sentences:
+        stripped = sent.strip()
+        if stripped and len(stripped) > 3:
+            # Find the actual position in the original text
+            start = text.find(sent, current_pos)
+            if start != -1:
+                end = start + len(sent)
+                sentences.append(stripped)
+                positions.append((start, end))
+                current_pos = end
+    
+    return sentences, positions
 
 def get_sentence_token_positions(text, sentences, tokenizer):
     input_ids = tokenizer.encode(text, return_tensors="pt").to(device)
@@ -180,7 +204,7 @@ def get_hidden_state(text, layer=-1):
             )
         full_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
         cot = full_text[len(problem):].strip() if full_text.startswith(problem) else full_text
-        sentences = split_into_sentences(cot)
+        sentences, sentence_positions = split_into_sentences(cot)
         
         if len(sentences) < 2:
             print(f"  ✗ Only {len(sentences)} sentences, skipping")
@@ -193,6 +217,7 @@ def get_hidden_state(text, layer=-1):
             'problem': problem,
             'cot': cot,
             'sentences': sentences,
+            'sentence_positions': sentence_positions,
             'causal_matrix': causal_matrix
         }
         del output_ids, input_ids
@@ -328,12 +353,18 @@ def classify_sentence(sentence):
     for pid, anchors in tqdm(all_anchors.items(), desc="Extracting features"):
         data = all_data[pid]
         problem = data['problem']
-        sentences = data['sentences']
+        cot = data['cot']
+        sentence_positions = data['sentence_positions']
         causal_matrix = data['causal_matrix']
         
         for anchor in anchors:
             idx = anchor['idx']
-            text_before = problem + " " + " ".join(sentences[:idx])
+            # Use character positions to slice original text instead of reconstructing
+            if idx > 0:
+                end_pos = sentence_positions[idx - 1][1]
+                text_before = problem + " " + cot[:end_pos]
+            else:
+                text_before = problem
             hidden_state = get_hidden_state(text_before)
             outgoing_feature = np.sum(np.abs(causal_matrix[idx, :]))
             all_features.append({

From 2c73c0fe8944d1d304a122611917c5783726bf1b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 09:10:29 +0000
Subject: [PATCH 3/4] Improve sentence position tracking to handle edge cases

Co-authored-by: abirharrasse <81148161+abirharrasse@users.noreply.github.com>
---
 exploration/rpc.py | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/exploration/rpc.py b/exploration/rpc.py
index 91b8346..249a072 100644
--- a/exploration/rpc.py
+++ b/exploration/rpc.py
@@ -62,25 +62,42 @@ def split_into_sentences(text):
     Returns:
         tuple: (sentences, positions) where sentences is a list of sentence strings,
                and positions is a list of tuples (start, end) indicating character positions
-               in the original text.
+               in the original text (before stripping).
     """
     sentences = []
     positions = []
     
-    # Split on sentence boundaries
-    split_sentences = re.split(r'(?<=[.!?])\s+', text)
+    # Split on sentence boundaries, keeping track of positions
+    # Use re.split with capturing groups to preserve separators and calculate positions
+    parts = re.split(r'((?<=[.!?])\s+)', text)
     
     current_pos = 0
-    for sent in split_sentences:
-        stripped = sent.strip()
-        if stripped and len(stripped) > 3:
-            # Find the actual position in the original text
-            start = text.find(sent, current_pos)
-            if start != -1:
-                end = start + len(sent)
+    accumulated_text = ""
+    
+    for i, part in enumerate(parts):
+        # Even indices are sentence content, odd indices are separators
+        if i % 2 == 0:
+            accumulated_text = part
+        else:
+            # We have a complete sentence with its separator
+            stripped = accumulated_text.strip()
+            if stripped and len(stripped) > 3:
+                # Calculate actual positions in original text
+                start = current_pos
+                end = current_pos + len(accumulated_text)
                 sentences.append(stripped)
                 positions.append((start, end))
-                current_pos = end
+            current_pos += len(accumulated_text) + len(part)
+    
+    # Handle the last sentence (no separator after it)
+    if parts and len(parts) % 2 == 1:
+        accumulated_text = parts[-1]
+        stripped = accumulated_text.strip()
+        if stripped and len(stripped) > 3:
+            start = current_pos
+            end = current_pos + len(accumulated_text)
+            sentences.append(stripped)
+            positions.append((start, end))
     
     return sentences, positions
 

From 3e3eef0dfce743c74c236c1288a9f6496cd4f585 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 09:12:15 +0000
Subject: [PATCH 4/4] Clarify docstring about intentional design of position
 tracking

Co-authored-by: abirharrasse <81148161+abirharrasse@users.noreply.github.com>
---
 exploration/rpc.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/exploration/rpc.py b/exploration/rpc.py
index 249a072..723d68f 100644
--- a/exploration/rpc.py
+++ b/exploration/rpc.py
@@ -60,9 +60,16 @@ def split_into_sentences(text):
     """Split text into sentences and return both sentences and their character positions.
     
     Returns:
-        tuple: (sentences, positions) where sentences is a list of sentence strings,
-               and positions is a list of tuples (start, end) indicating character positions
-               in the original text (before stripping).
+        tuple: (sentences, positions) where:
+            - sentences: list of stripped sentence strings (for classification/display)
+            - positions: list of (start, end) tuples indicating character positions
+                        in the original text (unstripped, for accurate slicing that
+                        preserves formatting)
+    
+    Note: Positions track the unstripped sentence boundaries in the original text,
+          allowing us to slice the original text with preserved formatting. This is
+          intentionally different from the stripped sentences which are used for
+          classification and don't need the extra whitespace.
     """
     sentences = []
     positions = []