Initial upload with CI/CD workflow
This commit is contained in:
99
codesnap/output/llm_exporter.py
Normal file
99
codesnap/output/llm_exporter.py
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
"""LLM-optimized export module for CodeSnap."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from ..core.extractor import ExtractedCode
|
||||||
|
|
||||||
|
|
||||||
|
class LLMExporter:
|
||||||
|
"""Exports code summaries optimized for LLM context windows."""
|
||||||
|
|
||||||
|
def __init__(self, tokens_per_word: float = 0.75) -> None:
|
||||||
|
self.tokens_per_word = tokens_per_word
|
||||||
|
|
||||||
|
def estimate_tokens(self, text: str) -> int:
|
||||||
|
"""Estimate token count for text."""
|
||||||
|
return int(len(text.split()) / self.tokens_per_word)
|
||||||
|
|
||||||
|
def export(
|
||||||
|
self,
|
||||||
|
extracted_files: list[ExtractedCode],
|
||||||
|
file_paths: list[Path],
|
||||||
|
dependency_data: dict[str, Any],
|
||||||
|
complexity_data: dict[str, str],
|
||||||
|
max_tokens: int = 8000,
|
||||||
|
output_path: Optional[Path] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Export code summary optimized for LLM context."""
|
||||||
|
lines: list[str] = []
|
||||||
|
|
||||||
|
lines.append("## CODEBASE SUMMARY\n")
|
||||||
|
lines.append(f"Files analyzed: {len(extracted_files)}\n")
|
||||||
|
|
||||||
|
language_counts: dict[str, int] = {}
|
||||||
|
for extracted in extracted_files:
|
||||||
|
language_counts[extracted.language] = language_counts.get(extracted.language, 0) + 1
|
||||||
|
lang_summary = ", ".join(f"{k}: {v}" for k, v in sorted(language_counts.items()))
|
||||||
|
lines.append(f"Languages: {lang_summary}\n")
|
||||||
|
|
||||||
|
total_funcs = sum(len(f.functions) for f in extracted_files)
|
||||||
|
total_classes = sum(len(f.classes) for f in extracted_files)
|
||||||
|
lines.append(f"Functions: {total_funcs}, Classes: {total_classes}\n")
|
||||||
|
|
||||||
|
lines.append("\n## KEY STRUCTURE\n")
|
||||||
|
|
||||||
|
for extracted in extracted_files:
|
||||||
|
if extracted.classes:
|
||||||
|
lines.append(f"\n{extracted.file_path.name}:\n")
|
||||||
|
for cls in extracted.classes:
|
||||||
|
method_names = [m.name for m in cls.methods]
|
||||||
|
if method_names:
|
||||||
|
lines.append(f" CLASS {cls.name}: {', '.join(method_names)}\n")
|
||||||
|
else:
|
||||||
|
lines.append(f" CLASS {cls.name}\n")
|
||||||
|
|
||||||
|
lines.append("\n## FUNCTIONS (Global)\n")
|
||||||
|
for extracted in extracted_files:
|
||||||
|
for func in extracted.functions:
|
||||||
|
if not func.is_method:
|
||||||
|
params = ", ".join(func.parameters) if func.parameters else ""
|
||||||
|
lines.append(f"{extracted.file_path.name}:{func.name}({params})\n")
|
||||||
|
|
||||||
|
lines.append("\n## DEPENDENCIES\n")
|
||||||
|
dependencies = dependency_data.get("dependencies", [])
|
||||||
|
for dep in dependencies[:30]:
|
||||||
|
lines.append(f"{dep.source.name} → {dep.target.name}\n")
|
||||||
|
|
||||||
|
orphaned = dependency_data.get("orphaned", [])
|
||||||
|
if orphaned:
|
||||||
|
lines.append(f"\nOrphaned files: {', '.join(f.name for f in orphaned)}\n")
|
||||||
|
|
||||||
|
cycles = dependency_data.get("cycles", [])
|
||||||
|
if cycles:
|
||||||
|
lines.append("\nCircular dependencies detected\n")
|
||||||
|
|
||||||
|
lines.append("\n## FILE LIST\n")
|
||||||
|
for path in file_paths:
|
||||||
|
complexity = complexity_data.get(str(path), "?")
|
||||||
|
lines.append(f"{path.relative_to(file_paths[0].parent)} [{complexity}]\n")
|
||||||
|
|
||||||
|
result = "\n".join(lines)
|
||||||
|
|
||||||
|
if self.estimate_tokens(result) > max_tokens:
|
||||||
|
result = self._compress_output(result, max_tokens)
|
||||||
|
|
||||||
|
if output_path:
|
||||||
|
output_path.write_text(result, encoding="utf-8")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _compress_output(self, content: str, max_tokens: int) -> str:
|
||||||
|
"""Compress output to fit within token limit."""
|
||||||
|
while self.estimate_tokens(content) > max_tokens and len(content) > 100:
|
||||||
|
lines = content.split("\n")
|
||||||
|
if len(lines) > 10:
|
||||||
|
content = "\n".join(lines[:-5]) + "\n...[truncated]..."
|
||||||
|
else:
|
||||||
|
content = content[: int(len(content) * 0.8)]
|
||||||
|
return content
|
||||||
Reference in New Issue
Block a user