Add core modules (parser, chunking, formatter, dependency, summarizer)
This commit is contained in:
153
codechunk/core/summarizer.py
Normal file
153
codechunk/core/summarizer.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
from typing import Optional, List
|
||||||
|
from codechunk.core.chunking import ParsedChunk
|
||||||
|
|
||||||
|
|
||||||
|
class CodeSummarizer:
|
||||||
|
"""Summarize code chunks for LLM context."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.summary_templates = {
|
||||||
|
"function": "Function {name} with {param_count} parameters",
|
||||||
|
"class": "Class {name} with {method_count} methods",
|
||||||
|
"method": "Method {name} of class {class_name}",
|
||||||
|
"file": "File {filename} in {language}",
|
||||||
|
}
|
||||||
|
|
||||||
|
def summarize(self, chunk: ParsedChunk) -> str:
|
||||||
|
"""Generate a summary for a chunk."""
|
||||||
|
if chunk.summary:
|
||||||
|
return chunk.summary
|
||||||
|
|
||||||
|
if chunk.chunk_type == "function":
|
||||||
|
summary = self._summarize_function(chunk)
|
||||||
|
elif chunk.chunk_type == "method":
|
||||||
|
summary = self._summarize_method(chunk)
|
||||||
|
elif chunk.chunk_type == "class":
|
||||||
|
summary = self._summarize_class(chunk)
|
||||||
|
elif chunk.chunk_type == "file":
|
||||||
|
summary = self._summarize_file(chunk)
|
||||||
|
else:
|
||||||
|
summary = self._summarize_generic(chunk)
|
||||||
|
|
||||||
|
return summary
|
||||||
|
|
||||||
|
def _summarize_function(self, chunk: ParsedChunk) -> str:
|
||||||
|
"""Summarize a function."""
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
if chunk.metadata.decorators:
|
||||||
|
decorators = ", ".join(d[1:] if d.startswith("@") else d for d in chunk.metadata.decorators)
|
||||||
|
parts.append(f"Decorators: {decorators}")
|
||||||
|
|
||||||
|
parts.append(f"Function: {chunk.name}")
|
||||||
|
|
||||||
|
if chunk.metadata.parameters:
|
||||||
|
parts.append(f"Parameters: {', '.join(chunk.metadata.parameters)}")
|
||||||
|
|
||||||
|
if chunk.metadata.return_type:
|
||||||
|
parts.append(f"Returns: {chunk.metadata.return_type}")
|
||||||
|
|
||||||
|
if chunk.metadata.docstring:
|
||||||
|
doc_summary = chunk.metadata.docstring[:100]
|
||||||
|
if len(chunk.metadata.docstring) > 100:
|
||||||
|
doc_summary += "..."
|
||||||
|
parts.append(f"Doc: {doc_summary}")
|
||||||
|
|
||||||
|
parts.append(f"Lines: {chunk.metadata.line_count}")
|
||||||
|
|
||||||
|
if chunk.metadata.complexity_score > 5:
|
||||||
|
parts.append(f"Complexity: {chunk.metadata.complexity_score}")
|
||||||
|
|
||||||
|
return " | ".join(parts)
|
||||||
|
|
||||||
|
def _summarize_method(self, chunk: ParsedChunk) -> str:
|
||||||
|
"""Summarize a method."""
|
||||||
|
class_name = chunk.name.split(".")[0] if "." in chunk.name else "Unknown"
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
parts.append(f"Method: {chunk.name}")
|
||||||
|
|
||||||
|
if chunk.metadata.parameters:
|
||||||
|
params = [p for p in chunk.metadata.parameters if p != "self" and p != "cls"]
|
||||||
|
if params:
|
||||||
|
parts.append(f"Parameters: {', '.join(params)}")
|
||||||
|
|
||||||
|
if chunk.metadata.return_type:
|
||||||
|
parts.append(f"Returns: {chunk.metadata.return_type}")
|
||||||
|
|
||||||
|
if chunk.metadata.docstring:
|
||||||
|
doc_summary = chunk.metadata.docstring[:100]
|
||||||
|
if len(chunk.metadata.docstring) > 100:
|
||||||
|
doc_summary += "..."
|
||||||
|
parts.append(f"Doc: {doc_summary}")
|
||||||
|
|
||||||
|
return " | ".join(parts)
|
||||||
|
|
||||||
|
def _summarize_class(self, chunk: ParsedChunk) -> str:
|
||||||
|
"""Summarize a class."""
|
||||||
|
parts = []
|
||||||
|
parts.append(f"Class: {chunk.name}")
|
||||||
|
|
||||||
|
if chunk.metadata.docstring:
|
||||||
|
doc_summary = chunk.metadata.docstring[:150]
|
||||||
|
if len(chunk.metadata.docstring) > 150:
|
||||||
|
doc_summary += "..."
|
||||||
|
parts.append(f"Doc: {doc_summary}")
|
||||||
|
|
||||||
|
parts.append(f"Lines: {chunk.metadata.line_count}")
|
||||||
|
|
||||||
|
return " | ".join(parts)
|
||||||
|
|
||||||
|
def _summarize_file(self, chunk: ParsedChunk) -> str:
|
||||||
|
"""Summarize a file."""
|
||||||
|
parts = []
|
||||||
|
parts.append(f"File: {chunk.metadata.file_name}")
|
||||||
|
parts.append(f"Language: {chunk.metadata.language}")
|
||||||
|
parts.append(f"Lines: {chunk.metadata.line_count}")
|
||||||
|
|
||||||
|
if chunk.metadata.imports:
|
||||||
|
import_count = len(chunk.metadata.imports)
|
||||||
|
parts.append(f"Imports: {import_count}")
|
||||||
|
|
||||||
|
return " | ".join(parts)
|
||||||
|
|
||||||
|
def _summarize_generic(self, chunk: ParsedChunk) -> str:
|
||||||
|
"""Summarize a generic chunk."""
|
||||||
|
return f"{chunk.chunk_type.capitalize()}: {chunk.name} ({chunk.metadata.line_count} lines)"
|
||||||
|
|
||||||
|
def batch_summarize(self, chunks: List[ParsedChunk]) -> List[str]:
|
||||||
|
"""Generate summaries for multiple chunks."""
|
||||||
|
return [self.summarize(chunk) for chunk in chunks]
|
||||||
|
|
||||||
|
def generate_overview(self, chunks: List[ParsedChunk], project_name: str = "Project") -> str:
|
||||||
|
"""Generate an overview of the project structure."""
|
||||||
|
lines = []
|
||||||
|
lines.append(f"# {project_name} Overview")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
type_counts = {}
|
||||||
|
lang_counts = {}
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
type_counts[chunk.chunk_type] = type_counts.get(chunk.chunk_type, 0) + 1
|
||||||
|
lang_counts[chunk.metadata.language] = lang_counts.get(chunk.metadata.language, 0) + 1
|
||||||
|
|
||||||
|
lines.append("## Statistics")
|
||||||
|
lines.append(f"- Total chunks: {len(chunks)}")
|
||||||
|
for chunk_type, count in sorted(type_counts.items()):
|
||||||
|
lines.append(f" - {chunk_type}: {count}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("## Languages")
|
||||||
|
for lang, count in sorted(lang_counts.items(), key=lambda x: -x[1]):
|
||||||
|
lines.append(f"- {lang}: {count}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
high_priority = [c for c in chunks if c.priority >= 75]
|
||||||
|
if high_priority:
|
||||||
|
lines.append("## Key Components (High Priority)")
|
||||||
|
for chunk in high_priority[:10]:
|
||||||
|
summary = self.summarize(chunk)
|
||||||
|
lines.append(f"- **{chunk.name}**: {summary}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
Reference in New Issue
Block a user