Initial upload with CI/CD workflow

This commit is contained in:
2026-01-30 22:12:52 +00:00
parent 0f6df4f8c4
commit c6abc05f86

View File

@@ -0,0 +1,99 @@
"""LLM-optimized export module for CodeSnap."""
from pathlib import Path
from typing import Any, Optional
from ..core.extractor import ExtractedCode
class LLMExporter:
"""Exports code summaries optimized for LLM context windows."""
def __init__(self, tokens_per_word: float = 0.75) -> None:
self.tokens_per_word = tokens_per_word
def estimate_tokens(self, text: str) -> int:
"""Estimate token count for text."""
return int(len(text.split()) / self.tokens_per_word)
def export(
self,
extracted_files: list[ExtractedCode],
file_paths: list[Path],
dependency_data: dict[str, Any],
complexity_data: dict[str, str],
max_tokens: int = 8000,
output_path: Optional[Path] = None,
) -> str:
"""Export code summary optimized for LLM context."""
lines: list[str] = []
lines.append("## CODEBASE SUMMARY\n")
lines.append(f"Files analyzed: {len(extracted_files)}\n")
language_counts: dict[str, int] = {}
for extracted in extracted_files:
language_counts[extracted.language] = language_counts.get(extracted.language, 0) + 1
lang_summary = ", ".join(f"{k}: {v}" for k, v in sorted(language_counts.items()))
lines.append(f"Languages: {lang_summary}\n")
total_funcs = sum(len(f.functions) for f in extracted_files)
total_classes = sum(len(f.classes) for f in extracted_files)
lines.append(f"Functions: {total_funcs}, Classes: {total_classes}\n")
lines.append("\n## KEY STRUCTURE\n")
for extracted in extracted_files:
if extracted.classes:
lines.append(f"\n{extracted.file_path.name}:\n")
for cls in extracted.classes:
method_names = [m.name for m in cls.methods]
if method_names:
lines.append(f" CLASS {cls.name}: {', '.join(method_names)}\n")
else:
lines.append(f" CLASS {cls.name}\n")
lines.append("\n## FUNCTIONS (Global)\n")
for extracted in extracted_files:
for func in extracted.functions:
if not func.is_method:
params = ", ".join(func.parameters) if func.parameters else ""
lines.append(f"{extracted.file_path.name}:{func.name}({params})\n")
lines.append("\n## DEPENDENCIES\n")
dependencies = dependency_data.get("dependencies", [])
for dep in dependencies[:30]:
lines.append(f"{dep.source.name}{dep.target.name}\n")
orphaned = dependency_data.get("orphaned", [])
if orphaned:
lines.append(f"\nOrphaned files: {', '.join(f.name for f in orphaned)}\n")
cycles = dependency_data.get("cycles", [])
if cycles:
lines.append("\nCircular dependencies detected\n")
lines.append("\n## FILE LIST\n")
for path in file_paths:
complexity = complexity_data.get(str(path), "?")
lines.append(f"{path.relative_to(file_paths[0].parent)} [{complexity}]\n")
result = "\n".join(lines)
if self.estimate_tokens(result) > max_tokens:
result = self._compress_output(result, max_tokens)
if output_path:
output_path.write_text(result, encoding="utf-8")
return result
def _compress_output(self, content: str, max_tokens: int) -> str:
"""Compress output to fit within token limit."""
while self.estimate_tokens(content) > max_tokens and len(content) > 100:
lines = content.split("\n")
if len(lines) > 10:
content = "\n".join(lines[:-5]) + "\n...[truncated]..."
else:
content = content[: int(len(content) * 0.8)]
return content