from codechunk.core.chunking import ParsedChunk class OutputFormatter: """Format output for different LLM platforms.""" def __init__(self, format_type: str = "markdown", max_tokens: int = 8192): self.format_type = format_type self.max_tokens = max_tokens self.token_warning_thresholds = [0.7, 0.9, 1.0] def format(self, chunks: list[ParsedChunk]) -> str: """Format chunks for output.""" if self.format_type == "ollama": return self._format_ollama(chunks) elif self.format_type == "lmstudio": return self._format_lmstudio(chunks) else: return self._format_markdown(chunks) def _format_ollama(self, chunks: list[ParsedChunk]) -> str: """Format for Ollama.""" lines = [] lines.append("### System") lines.append("You are a helpful programming assistant analyzing a codebase.") lines.append("") lines.append("### User") lines.append("Analyze the following code:") lines.append("") for chunk in chunks: lines.append(f"**File: {chunk.metadata.file_path}**") lines.append(f"**Type: {chunk.chunk_type}**") lines.append(f"**Name: {chunk.name}**") if chunk.metadata.docstring: lines.append(f"**Description: {chunk.metadata.docstring}**") if chunk.metadata.parameters: lines.append(f"**Parameters: {', '.join(chunk.metadata.parameters)}**") if chunk.metadata.return_type: lines.append(f"**Returns: {chunk.metadata.return_type}**") if chunk.priority >= 75: lines.append("**Priority: HIGH**") lines.append("") lines.append("```" + self._get_language_lexer(chunk.metadata.language)) lines.append(chunk.content) lines.append("```") lines.append("") lines.append("---") lines.append("") return "\n".join(lines) def _format_lmstudio(self, chunks: list[ParsedChunk]) -> str: """Format for LM Studio.""" import json messages = [] system_content = """You are a helpful programming assistant analyzing a codebase. Provide clear, accurate code analysis and assistance.""" messages.append({ "role": "system", "content": system_content }) content_parts = [] content_parts.append("# Codebase Analysis") content_parts.append("") content_parts.append(f"Total files/chunks: {len(chunks)}") content_parts.append("") for chunk in chunks: content_parts.append(f"## {chunk.metadata.file_name}") content_parts.append(f"**Type:** {chunk.chunk_type}") content_parts.append(f"**Name:** {chunk.name}") content_parts.append(f"**Lines:** {chunk.metadata.start_line}-{chunk.metadata.end_line}") if chunk.metadata.docstring: content_parts.append(f"**Description:** {chunk.metadata.docstring}") content_parts.append("") content_parts.append("```" + self._get_language_lexer(chunk.metadata.language)) content_parts.append(chunk.content) content_parts.append("```") content_parts.append("") messages.append({ "role": "user", "content": "\n".join(content_parts) }) return json.dumps(messages, indent=2) def _format_markdown(self, chunks: list[ParsedChunk]) -> str: """Format as markdown.""" lines = [] lines.append("# Code Context") lines.append("") lines.append(f"_Generated by CodeChunk CLI | {len(chunks)} chunks_") lines.append("") for chunk in chunks: lines.append(f"## {chunk.name}") lines.append("") lines.append(f"**File:** `{chunk.metadata.file_path}`") lines.append(f"**Type:** {chunk.chunk_type}") lines.append(f"**Lines:** {chunk.metadata.start_line}-{chunk.metadata.end_line}") if chunk.metadata.docstring: lines.append("") lines.append(f"> {chunk.metadata.docstring}") if chunk.metadata.parameters: lines.append(f"**Parameters:** `{'`, `'.join(chunk.metadata.parameters)}`") if chunk.metadata.return_type: lines.append(f"**Returns:** `{chunk.metadata.return_type}`") if chunk.priority >= 75: lines.append("") lines.append("⭐ **High Priority**") lines.append("") lines.append("```" + self._get_language_lexer(chunk.metadata.language)) lines.append(chunk.content) lines.append("```") lines.append("") lines.append("---") lines.append("") return "\n".join(lines) def _get_language_lexer(self, language: str) -> str: """Get language lexer for code block.""" language_map = { "python": "python", "javascript": "javascript", "typescript": "typescript", "go": "go", "rust": "rust", "java": "java", "cpp": "cpp", "c": "c", "csharp": "csharp", "ruby": "ruby", "php": "php", "swift": "swift", "kotlin": "kotlin", "scala": "scala", "lua": "lua", "perl": "perl", "haskell": "haskell", "elixir": "elixir", "erlang": "erlang", } return language_map.get(language, "") def estimate_tokens(self, text: str) -> int: """Estimate token count for text.""" chars = len(text) avg_chars_per_token = 4 return int(chars / avg_chars_per_token) def check_token_limit(self, text: str) -> tuple[bool, float, str]: """Check if text exceeds token limit.""" token_count = self.estimate_tokens(text) ratio = token_count / self.max_tokens if ratio >= 1.0: return False, ratio, "CRITICAL" elif ratio >= 0.9: return False, ratio, "WARNING" elif ratio >= 0.7: return True, ratio, "INFO" else: return True, ratio, "OK" def prune_for_limit(self, chunks: list[ParsedChunk], max_tokens: int) -> list[ParsedChunk]: """Prune chunks to fit within token limit.""" result = [] current_tokens = 0 for chunk in chunks: chunk_tokens = self.estimate_tokens(chunk.content) if current_tokens + chunk_tokens <= max_tokens: result.append(chunk) current_tokens += chunk_tokens return result