#!/usr/bin/env python3
import json
import re
from datetime import datetime
import os
import subprocess
def extract_xml_blocks(text):
"""Extract complete XML blocks from text"""
xml_patterns = [
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'.*?',
r'
.*?',
r'.*?',
r'.*?',
r'.*?',
]
blocks = []
for pattern in xml_patterns:
matches = re.findall(pattern, text, re.DOTALL)
blocks.extend(matches)
return blocks
def process_transcript_file(filepath):
"""Process a single transcript file and extract XML with timestamps"""
results = []
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
try:
data = json.loads(line)
# Get timestamp
timestamp = data.get('timestamp', 'unknown')
# Extract text content from message
message = data.get('message', {})
content = message.get('content', [])
if isinstance(content, list):
for item in content:
if isinstance(item, dict):
text = ''
if item.get('type') == 'text':
text = item.get('text', '')
elif item.get('type') == 'tool_use':
# Also check tool_use input fields
tool_input = item.get('input', {})
if isinstance(tool_input, dict):
text = str(tool_input)
if text:
# Extract XML blocks
xml_blocks = extract_xml_blocks(text)
for block in xml_blocks:
results.append({
'timestamp': timestamp,
'xml': block
})
except json.JSONDecodeError:
continue
return results
# Get list of transcript files
transcript_dir = os.path.expanduser('~/.claude/projects/-Users-alexnewman-Scripts-claude-mem/')
os.chdir(transcript_dir)
# Get all transcript files sorted by modification time
result = subprocess.run(['ls', '-t'], capture_output=True, text=True)
files = [f for f in result.stdout.strip().split('\n') if f.endswith('.jsonl')][:62]
all_results = []
for filename in files:
filepath = os.path.join(transcript_dir, filename)
print(f"Processing {filename}...")
results = process_transcript_file(filepath)
all_results.extend(results)
print(f" Found {len(results)} XML blocks")
# Write results with timestamps
output_file = os.path.expanduser('~/Scripts/claude-mem/all_xml_fragments_with_timestamps.xml')
with open(output_file, 'w', encoding='utf-8') as f:
f.write('\n')
f.write('\n\n')
for i, item in enumerate(all_results, 1):
timestamp = item['timestamp']
xml = item['xml']
# Format timestamp nicely if it's ISO format
if timestamp != 'unknown' and timestamp:
try:
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
formatted_time = dt.strftime('%Y-%m-%d %H:%M:%S UTC')
except:
formatted_time = timestamp
else:
formatted_time = 'unknown'
f.write(f'\n')
f.write(xml)
f.write('\n\n')
f.write('\n')
print(f"\nExtracted {len(all_results)} XML blocks with timestamps to {output_file}")