#!/usr/bin/env python3 import json import re from datetime import datetime import os import subprocess def extract_xml_blocks(text): """Extract complete XML blocks from text""" xml_patterns = [ r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', r'.*?', ] blocks = [] for pattern in xml_patterns: matches = re.findall(pattern, text, re.DOTALL) blocks.extend(matches) return blocks def process_transcript_file(filepath): """Process a single transcript file and extract XML with timestamps""" results = [] with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: for line in f: try: data = json.loads(line) # Get timestamp timestamp = data.get('timestamp', 'unknown') # Extract text content from message message = data.get('message', {}) content = message.get('content', []) if isinstance(content, list): for item in content: if isinstance(item, dict): text = '' if item.get('type') == 'text': text = item.get('text', '') elif item.get('type') == 'tool_use': # Also check tool_use input fields tool_input = item.get('input', {}) if isinstance(tool_input, dict): text = str(tool_input) if text: # Extract XML blocks xml_blocks = extract_xml_blocks(text) for block in xml_blocks: results.append({ 'timestamp': timestamp, 'xml': block }) except json.JSONDecodeError: continue return results # Get list of transcript files transcript_dir = os.path.expanduser('~/.claude/projects/-Users-alexnewman-Scripts-claude-mem/') os.chdir(transcript_dir) # Get all transcript files sorted by modification time result = subprocess.run(['ls', '-t'], capture_output=True, text=True) files = [f for f in result.stdout.strip().split('\n') if f.endswith('.jsonl')][:62] all_results = [] for filename in files: filepath = os.path.join(transcript_dir, filename) print(f"Processing {filename}...") results = process_transcript_file(filepath) all_results.extend(results) print(f" Found {len(results)} XML blocks") # Write results with timestamps output_file = os.path.expanduser('~/Scripts/claude-mem/all_xml_fragments_with_timestamps.xml') with open(output_file, 'w', encoding='utf-8') as f: f.write('\n') f.write('\n\n') for i, item in enumerate(all_results, 1): timestamp = item['timestamp'] xml = item['xml'] # Format timestamp nicely if it's ISO format if timestamp != 'unknown' and timestamp: try: dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00')) formatted_time = dt.strftime('%Y-%m-%d %H:%M:%S UTC') except: formatted_time = timestamp else: formatted_time = 'unknown' f.write(f'\n') f.write(xml) f.write('\n\n') f.write('\n') print(f"\nExtracted {len(all_results)} XML blocks with timestamps to {output_file}")