from typing import Optional, List from codechunk.core.chunking import ParsedChunk class CodeSummarizer: """Summarize code chunks for LLM context.""" def __init__(self): self.summary_templates = { "function": "Function {name} with {param_count} parameters", "class": "Class {name} with {method_count} methods", "method": "Method {name} of class {class_name}", "file": "File {filename} in {language}", } def summarize(self, chunk: ParsedChunk) -> str: """Generate a summary for a chunk.""" if chunk.summary: return chunk.summary if chunk.chunk_type == "function": summary = self._summarize_function(chunk) elif chunk.chunk_type == "method": summary = self._summarize_method(chunk) elif chunk.chunk_type == "class": summary = self._summarize_class(chunk) elif chunk.chunk_type == "file": summary = self._summarize_file(chunk) else: summary = self._summarize_generic(chunk) return summary def _summarize_function(self, chunk: ParsedChunk) -> str: """Summarize a function.""" parts = [] if chunk.metadata.decorators: decorators = ", ".join(d[1:] if d.startswith("@") else d for d in chunk.metadata.decorators) parts.append(f"Decorators: {decorators}") parts.append(f"Function: {chunk.name}") if chunk.metadata.parameters: parts.append(f"Parameters: {', '.join(chunk.metadata.parameters)}") if chunk.metadata.return_type: parts.append(f"Returns: {chunk.metadata.return_type}") if chunk.metadata.docstring: doc_summary = chunk.metadata.docstring[:100] if len(chunk.metadata.docstring) > 100: doc_summary += "..." parts.append(f"Doc: {doc_summary}") parts.append(f"Lines: {chunk.metadata.line_count}") if chunk.metadata.complexity_score > 5: parts.append(f"Complexity: {chunk.metadata.complexity_score}") return " | ".join(parts) def _summarize_method(self, chunk: ParsedChunk) -> str: """Summarize a method.""" class_name = chunk.name.split(".")[0] if "." in chunk.name else "Unknown" parts = [] parts.append(f"Method: {chunk.name}") if chunk.metadata.parameters: params = [p for p in chunk.metadata.parameters if p != "self" and p != "cls"] if params: parts.append(f"Parameters: {', '.join(params)}") if chunk.metadata.return_type: parts.append(f"Returns: {chunk.metadata.return_type}") if chunk.metadata.docstring: doc_summary = chunk.metadata.docstring[:100] if len(chunk.metadata.docstring) > 100: doc_summary += "..." parts.append(f"Doc: {doc_summary}") return " | ".join(parts) def _summarize_class(self, chunk: ParsedChunk) -> str: """Summarize a class.""" parts = [] parts.append(f"Class: {chunk.name}") if chunk.metadata.docstring: doc_summary = chunk.metadata.docstring[:150] if len(chunk.metadata.docstring) > 150: doc_summary += "..." parts.append(f"Doc: {doc_summary}") parts.append(f"Lines: {chunk.metadata.line_count}") return " | ".join(parts) def _summarize_file(self, chunk: ParsedChunk) -> str: """Summarize a file.""" parts = [] parts.append(f"File: {chunk.metadata.file_name}") parts.append(f"Language: {chunk.metadata.language}") parts.append(f"Lines: {chunk.metadata.line_count}") if chunk.metadata.imports: import_count = len(chunk.metadata.imports) parts.append(f"Imports: {import_count}") return " | ".join(parts) def _summarize_generic(self, chunk: ParsedChunk) -> str: """Summarize a generic chunk.""" return f"{chunk.chunk_type.capitalize()}: {chunk.name} ({chunk.metadata.line_count} lines)" def batch_summarize(self, chunks: List[ParsedChunk]) -> List[str]: """Generate summaries for multiple chunks.""" return [self.summarize(chunk) for chunk in chunks] def generate_overview(self, chunks: List[ParsedChunk], project_name: str = "Project") -> str: """Generate an overview of the project structure.""" lines = [] lines.append(f"# {project_name} Overview") lines.append("") type_counts = {} lang_counts = {} for chunk in chunks: type_counts[chunk.chunk_type] = type_counts.get(chunk.chunk_type, 0) + 1 lang_counts[chunk.metadata.language] = lang_counts.get(chunk.metadata.language, 0) + 1 lines.append("## Statistics") lines.append(f"- Total chunks: {len(chunks)}") for chunk_type, count in sorted(type_counts.items()): lines.append(f" - {chunk_type}: {count}") lines.append("") lines.append("## Languages") for lang, count in sorted(lang_counts.items(), key=lambda x: -x[1]): lines.append(f"- {lang}: {count}") lines.append("") high_priority = [c for c in chunks if c.priority >= 75] if high_priority: lines.append("## Key Components (High Priority)") for chunk in high_priority[:10]: summary = self.summarize(chunk) lines.append(f"- **{chunk.name}**: {summary}") return "\n".join(lines)