def save(self, output_path: str): with open(output_path, 'w') as f: json.dump(self.results, f, indent=2) schema = "timestamp": r"(\d4-\d2-\d2T\d2:\d2:\d2.\d+Z)", "request_id": r"RequestId: ([a-f0-9-]+)", "duration_ms": r"Duration: (\d+.\d+) ms", "memory_mb": r"MemorySize: (\d+) MB"
def extract_from_text(self, text: str, file_path: str = None): entry = "_source": file_path for field, pattern in self.schema.items(): match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE) entry[field] = match.group(1) if match else None self.results.append(entry) return entry Cursor Extractor
extractor = CursorExtractor(schema) for log_file in Path("data/raw/logs").glob("*.log"): content = log_file.read_text() extractor.extract_from_text(content, str(log_file)) output_path: str): with open(output_path
find data/raw -name "*.log" | entr -r python extractor/run_extractor.py Then ask Cursor AI: “Show me the diff of extracted errors between the last two runs.” Cursor Extractor can output to: 'w') as f: json.dump(self.results