Initial upload with CI/CD workflow

2026-01-30 22:12:52 +00:00
parent 0f6df4f8c4
commit c6abc05f86
1 changed files with 99 additions and 0 deletions
--- a/codesnap/output/llm_exporter.py
+++ b/codesnap/output/llm_exporter.py
@@ -0,0 +1,99 @@
+"""LLM-optimized export module for CodeSnap."""
+
+from pathlib import Path
+from typing import Any, Optional
+
+from ..core.extractor import ExtractedCode
+
+
+class LLMExporter:
+    """Exports code summaries optimized for LLM context windows."""
+
+    def __init__(self, tokens_per_word: float = 0.75) -> None:
+        self.tokens_per_word = tokens_per_word
+
+    def estimate_tokens(self, text: str) -> int:
+        """Estimate token count for text."""
+        return int(len(text.split()) / self.tokens_per_word)
+
+    def export(
+        self,
+        extracted_files: list[ExtractedCode],
+        file_paths: list[Path],
+        dependency_data: dict[str, Any],
+        complexity_data: dict[str, str],
+        max_tokens: int = 8000,
+        output_path: Optional[Path] = None,
+    ) -> str:
+        """Export code summary optimized for LLM context."""
+        lines: list[str] = []
+
+        lines.append("## CODEBASE SUMMARY\n")
+        lines.append(f"Files analyzed: {len(extracted_files)}\n")
+
+        language_counts: dict[str, int] = {}
+        for extracted in extracted_files:
+            language_counts[extracted.language] = language_counts.get(extracted.language, 0) + 1
+        lang_summary = ", ".join(f"{k}: {v}" for k, v in sorted(language_counts.items()))
+        lines.append(f"Languages: {lang_summary}\n")
+
+        total_funcs = sum(len(f.functions) for f in extracted_files)
+        total_classes = sum(len(f.classes) for f in extracted_files)
+        lines.append(f"Functions: {total_funcs}, Classes: {total_classes}\n")
+
+        lines.append("\n## KEY STRUCTURE\n")
+
+        for extracted in extracted_files:
+            if extracted.classes:
+                lines.append(f"\n{extracted.file_path.name}:\n")
+                for cls in extracted.classes:
+                    method_names = [m.name for m in cls.methods]
+                    if method_names:
+                        lines.append(f"  CLASS {cls.name}: {', '.join(method_names)}\n")
+                    else:
+                        lines.append(f"  CLASS {cls.name}\n")
+
+        lines.append("\n## FUNCTIONS (Global)\n")
+        for extracted in extracted_files:
+            for func in extracted.functions:
+                if not func.is_method:
+                    params = ", ".join(func.parameters) if func.parameters else ""
+                    lines.append(f"{extracted.file_path.name}:{func.name}({params})\n")
+
+        lines.append("\n## DEPENDENCIES\n")
+        dependencies = dependency_data.get("dependencies", [])
+        for dep in dependencies[:30]:
+            lines.append(f"{dep.source.name} → {dep.target.name}\n")
+
+        orphaned = dependency_data.get("orphaned", [])
+        if orphaned:
+            lines.append(f"\nOrphaned files: {', '.join(f.name for f in orphaned)}\n")
+
+        cycles = dependency_data.get("cycles", [])
+        if cycles:
+            lines.append("\nCircular dependencies detected\n")
+
+        lines.append("\n## FILE LIST\n")
+        for path in file_paths:
+            complexity = complexity_data.get(str(path), "?")
+            lines.append(f"{path.relative_to(file_paths[0].parent)} [{complexity}]\n")
+
+        result = "\n".join(lines)
+
+        if self.estimate_tokens(result) > max_tokens:
+            result = self._compress_output(result, max_tokens)
+
+        if output_path:
+            output_path.write_text(result, encoding="utf-8")
+
+        return result
+
+    def _compress_output(self, content: str, max_tokens: int) -> str:
+        """Compress output to fit within token limit."""
+        while self.estimate_tokens(content) > max_tokens and len(content) > 100:
+            lines = content.split("\n")
+            if len(lines) > 10:
+                content = "\n".join(lines[:-5]) + "\n...[truncated]..."
+            else:
+                content = content[: int(len(content) * 0.8)]
+        return content