Files
codechunk-cli/codechunk/core/summarizer.py
7000pctAUTO 1ec6ccbf1b
Some checks failed
CI / test (push) Failing after 42s
CI / build (push) Has been skipped
fix: resolve CI/CD issues - Poetry setup, type annotations, MyPy errors
2026-02-02 00:08:16 +00:00

152 lines
5.4 KiB
Python

from typing import Optional
from codechunk.core.chunking import ParsedChunk
class CodeSummarizer:
"""Summarize code chunks for LLM context."""
def __init__(self):
self.summary_templates = {
"function": "Function {name} with {param_count} parameters",
"class": "Class {name} with {method_count} methods",
"method": "Method {name} of class {class_name}",
"file": "File {filename} in {language}",
}
def summarize(self, chunk: ParsedChunk) -> str:
"""Generate a summary for a chunk."""
if chunk.summary:
return chunk.summary
if chunk.chunk_type == "function":
summary = self._summarize_function(chunk)
elif chunk.chunk_type == "method":
summary = self._summarize_method(chunk)
elif chunk.chunk_type == "class":
summary = self._summarize_class(chunk)
elif chunk.chunk_type == "file":
summary = self._summarize_file(chunk)
else:
summary = self._summarize_generic(chunk)
return summary
def _summarize_function(self, chunk: ParsedChunk) -> str:
"""Summarize a function."""
parts = []
if chunk.metadata.decorators:
decorators = ", ".join(d[1:] if d.startswith("@") else d for d in chunk.metadata.decorators)
parts.append(f"Decorators: {decorators}")
parts.append(f"Function: {chunk.name}")
if chunk.metadata.parameters:
parts.append(f"Parameters: {', '.join(chunk.metadata.parameters)}")
if chunk.metadata.return_type:
parts.append(f"Returns: {chunk.metadata.return_type}")
if chunk.metadata.docstring:
doc_summary = chunk.metadata.docstring[:100]
if len(chunk.metadata.docstring) > 100:
doc_summary += "..."
parts.append(f"Doc: {doc_summary}")
parts.append(f"Lines: {chunk.metadata.line_count}")
if chunk.metadata.complexity_score > 5:
parts.append(f"Complexity: {chunk.metadata.complexity_score}")
return " | ".join(parts)
def _summarize_method(self, chunk: ParsedChunk) -> str:
"""Summarize a method."""
parts = []
parts.append(f"Method: {chunk.name}")
if chunk.metadata.parameters:
params = [p for p in chunk.metadata.parameters if p != "self" and p != "cls"]
if params:
parts.append(f"Parameters: {', '.join(params)}")
if chunk.metadata.return_type:
parts.append(f"Returns: {chunk.metadata.return_type}")
if chunk.metadata.docstring:
doc_summary = chunk.metadata.docstring[:100]
if len(chunk.metadata.docstring) > 100:
doc_summary += "..."
parts.append(f"Doc: {doc_summary}")
return " | ".join(parts)
def _summarize_class(self, chunk: ParsedChunk) -> str:
"""Summarize a class."""
parts = []
parts.append(f"Class: {chunk.name}")
if chunk.metadata.docstring:
doc_summary = chunk.metadata.docstring[:150]
if len(chunk.metadata.docstring) > 150:
doc_summary += "..."
parts.append(f"Doc: {doc_summary}")
parts.append(f"Lines: {chunk.metadata.line_count}")
return " | ".join(parts)
def _summarize_file(self, chunk: ParsedChunk) -> str:
"""Summarize a file."""
parts = []
parts.append(f"File: {chunk.metadata.file_name}")
parts.append(f"Language: {chunk.metadata.language}")
parts.append(f"Lines: {chunk.metadata.line_count}")
if chunk.metadata.imports:
import_count = len(chunk.metadata.imports)
parts.append(f"Imports: {import_count}")
return " | ".join(parts)
def _summarize_generic(self, chunk: ParsedChunk) -> str:
"""Summarize a generic chunk."""
return f"{chunk.chunk_type.capitalize()}: {chunk.name} ({chunk.metadata.line_count} lines)"
def batch_summarize(self, chunks: list[ParsedChunk]) -> list[str]:
"""Generate summaries for multiple chunks."""
return [self.summarize(chunk) for chunk in chunks]
def generate_overview(self, chunks: list[ParsedChunk], project_name: str = "Project") -> str:
"""Generate an overview of the project structure."""
lines = []
lines.append(f"# {project_name} Overview")
lines.append("")
type_counts: dict[str, int] = {}
lang_counts: dict[str, int] = {}
for chunk in chunks:
type_counts[chunk.chunk_type] = type_counts.get(chunk.chunk_type, 0) + 1
lang_counts[chunk.metadata.language] = lang_counts.get(chunk.metadata.language, 0) + 1
lines.append("## Statistics")
lines.append(f"- Total chunks: {len(chunks)}")
for chunk_type, count in sorted(type_counts.items()):
lines.append(f" - {chunk_type}: {count}")
lines.append("")
lines.append("## Languages")
for lang, count in sorted(lang_counts.items(), key=lambda x: -x[1]):
lines.append(f"- {lang}: {count}")
lines.append("")
high_priority = [c for c in chunks if c.priority >= 75]
if high_priority:
lines.append("## Key Components (High Priority)")
for chunk in high_priority[:10]:
summary = self.summarize(chunk)
lines.append(f"- **{chunk.name}**: {summary}")
return "\n".join(lines)