diff --git a/codesnap/cli/__init__.py b/codesnap/cli/__init__.py new file mode 100644 index 0000000..0dabf31 --- /dev/null +++ b/codesnap/cli/__init__.py @@ -0,0 +1,344 @@ +"""CLI interface for CodeSnap.""" + +import os +import sys +from pathlib import Path +from typing import Optional + +import click +from pythonjsonlogger import jsonlogger + +try: + from .. import __version__ + from ..core.extractor import FunctionExtractor + from ..core.language_detection import LanguageDetector + from ..core.parser import CodeParser + from ..core.dependency_analyzer import DependencyAnalyzer + from ..output.json_exporter import JSONExporter + from ..output.llm_exporter import LLMExporter + from ..output.markdown_exporter import MarkdownExporter + from ..utils.complexity import ComplexityCalculator + from ..utils.config import Config, load_config +except ImportError: + from codesnap import __version__ + from codesnap.core.extractor import FunctionExtractor + from codesnap.core.language_detection import LanguageDetector + from codesnap.core.parser import CodeParser + from codesnap.core.dependency_analyzer import DependencyAnalyzer + from codesnap.output.json_exporter import JSONExporter + from codesnap.output.llm_exporter import LLMExporter + from codesnap.output.markdown_exporter import MarkdownExporter + from codesnap.utils.complexity import ComplexityCalculator + from codesnap.utils.config import Config, load_config + + +class JsonFormatter(jsonlogger.JsonFormatter): + """Custom JSON log formatter.""" + + def add_fields( + self, log_record: dict, record: object, message_dict: dict + ) -> None: + super().add_fields(log_record, record, message_dict) + log_record["level"] = record.levelname + log_record["logger"] = record.name + + +@click.group() +@click.version_option(version=__version__) +@click.option( + "--config", + type=click.Path(exists=True, path_type=Path), + help="Path to configuration file", +) +@click.option( + "--verbose", + "-v", + is_flag=True, + help="Enable verbose output", +) +@click.pass_context +def main(ctx: click.Context, config: Optional[Path], verbose: bool) -> None: + """CodeSnap - Codebase Summarization CLI Tool.""" + ctx.ensure_object(dict) + ctx.obj["config"] = load_config(config) + ctx.obj["verbose"] = verbose + + +@main.command() +@click.argument( + "path", + type=click.Path(exists=True, path_type=Path), +) +@click.option( + "--output", + "-o", + type=click.Choice(["json", "markdown", "llm"]), + default="markdown", + help="Output format", +) +@click.option( + "--output-file", + type=click.Path(path_type=Path), + help="Output file path", +) +@click.option( + "--max-files", + type=int, + help="Maximum files to analyze", +) +@click.option( + "--max-tokens", + type=int, + help="Maximum tokens for LLM output", +) +@click.pass_obj +def analyze( + obj: dict, + path: Path, + output: str, + output_file: Optional[Path], + max_files: Optional[int], + max_tokens: Optional[int], +) -> None: + """Analyze a codebase and generate a summary.""" + config: Config = obj.get("config", Config()) + + if max_files: + config.max_files = max_files + if max_tokens: + config.max_tokens = max_tokens + + path = path.resolve() + if path.is_file(): + paths = [path] + else: + paths = _collect_files(path, config) + + if obj.get("verbose"): + click.echo(f"Analyzing {len(paths)} files...", err=True) + + try: + parser = CodeParser() + extractor = FunctionExtractor() + language_detector = LanguageDetector() + complexity_calc = ComplexityCalculator( + low_threshold=config.complexity.low, + medium_threshold=config.complexity.medium, + high_threshold=config.complexity.high, + ) + + parsed_files = [] + file_contents: dict[str, str] = {} + + for file_path in paths: + try: + content = file_path.read_text(encoding="utf-8") + parsed = parser.parse_file(file_path, content) + parsed_files.append(parsed) + file_contents[str(file_path)] = content + except Exception as e: + if obj.get("verbose"): + click.echo(f"Warning: Could not parse {file_path}: {e}", err=True) + + extracted_files = [ + extractor.extract_from_content(file_contents[str(p)], p, p.language) + for p in parsed_files + if str(p) in file_contents + ] + + dependency_analyzer = DependencyAnalyzer() + dependency_report = dependency_analyzer.analyze(parsed_files, path) + + complexity_results = complexity_calc.calculate_batch( + list(path.keys()) if isinstance(paths, dict) else paths, file_contents + ) + complexity_data = { + k: v.get("rating", "unknown") for k, v in complexity_results.items() + } + + if output == "json": + exporter = JSONExporter() + result = exporter.export( + extracted_files, + [p.path for p in parsed_files], + { + "dependencies": dependency_report.dependencies, + "cycles": dependency_report.cycles, + "orphaned": dependency_report.orphaned_files, + }, + complexity_data, + output_file, + ) + elif output == "llm": + exporter = LLMExporter() + result = exporter.export( + extracted_files, + [p.path for p in parsed_files], + { + "dependencies": dependency_report.dependencies, + "cycles": dependency_report.cycles, + "orphaned": dependency_report.orphaned_files, + }, + complexity_data, + config.max_tokens, + output_file, + ) + else: + exporter = MarkdownExporter() + result = exporter.export( + extracted_files, + [p.path for p in parsed_files], + { + "dependencies": dependency_report.dependencies, + "cycles": dependency_report.cycles, + "orphaned": dependency_report.orphaned_files, + }, + complexity_data, + output_file, + ) + + if not output_file: + click.echo(result) + + if obj.get("verbose"): + click.echo(f"Analysis complete. {len(parsed_files)} files processed.", err=True) + + except Exception as e: + click.echo(f"Error during analysis: {e}", err=True) + sys.exit(1) + + +@main.command() +@click.argument( + "path", + type=click.Path(exists=True, path_type=Path), +) +@click.option( + "--format", + "graph_format", + type=click.Choice(["dot", "adjacency"]), + default="adjacency", + help="Graph format", +) +@click.pass_obj +def deps(obj: dict, path: Path, graph_format: str) -> None: + """Show dependency graph for a codebase.""" + config: Config = obj.get("config", Config()) + + paths = _collect_files(path, config) + + parser = CodeParser() + parsed_files = [parser.parse_file(p) for p in paths] + + dependency_analyzer = DependencyAnalyzer() + dependency_report = dependency_analyzer.analyze(parsed_files, path) + + graph_stats = dependency_analyzer.get_graph_stats() + click.echo(f"Graph Statistics:") + click.echo(f" Nodes: {graph_stats['total_nodes']}") + click.echo(f" Edges: {graph_stats['total_edges']}") + click.echo(f" DAG: {graph_stats['is_dag']}") + + if dependency_report.cycles: + click.echo(f"\nCircular Dependencies Found:") + for i, cycle in enumerate(dependency_report.cycles, 1): + cycle_str = " -> ".join(p.name for p in cycle) + click.echo(f" Cycle {i}: {cycle_str}") + + click.echo(f"\nMost Depended On:") + for file_path, count in dependency_report.most_depended[:5]: + click.echo(f" {file_path.name}: {count} incoming dependencies") + + click.echo(f"\nMost Dependant:") + for file_path, count in dependency_report.most_dependant[:5]: + click.echo(f" {file_path.name}: {count} outgoing dependencies") + + +@main.command() +@click.argument( + "path", + type=click.Path(exists=True, path_type=Path), +) +@click.option( + "--format", + type=click.Choice(["json", "markdown"]), + default="markdown", + help="Output format", +) +@click.pass_obj +def complexity(obj: dict, path: Path, format: str) -> None: + """Calculate complexity metrics for a codebase.""" + config: Config = obj.get("config", Config()) + + paths = _collect_files(path, config) + contents = {} + + for file_path in paths: + try: + contents[str(file_path)] = file_path.read_text(encoding="utf-8") + except Exception as e: + if obj.get("verbose"): + click.echo(f"Warning: Could not read {file_path}: {e}", err=True) + + complexity_calc = ComplexityCalculator( + low_threshold=config.complexity.low, + medium_threshold=config.complexity.medium, + high_threshold=config.complexity.high, + ) + + results = complexity_calc.calculate_batch(paths, contents) + + if format == "json": + import json + click.echo(json.dumps(results, indent=2)) + else: + click.echo("Complexity Analysis:") + click.echo("-" * 60) + for file_path, result in sorted(results.items(), key=lambda x: x[1].get("complexity", 0), reverse=True): + rating = result.get("rating", "unknown") + complexity = result.get("complexity", 0) + file_name = Path(file_path).name + click.echo(f" {file_name:<30} {rating:<10} ({complexity})") + click.echo("-" * 60) + + low = sum(1 for r in results.values() if r.get("rating") == "low") + medium = sum(1 for r in results.values() if r.get("rating") == "medium") + high = sum(1 for r in results.values() if r.get("rating") == "high") + critical = sum(1 for r in results.values() if r.get("rating") == "critical") + + click.echo(f"\nSummary: {low} low, {medium} medium, {high} high, {critical} critical") + + +@main.command() +@click.pass_obj +def languages(obj: dict) -> None: + """List supported languages.""" + detector = LanguageDetector() + languages = detector.get_supported_languages() + extensions = detector.get_supported_extensions() + + click.echo("Supported Languages:") + for lang in sorted(languages): + click.echo(f" - {lang}") + + click.echo(f"\nSupported Extensions: {', '.join(sorted(extensions))}") + + +def _collect_files(directory: Path, config: Config) -> list[Path]: + """Collect source files from directory based on config patterns.""" + files: list[Path] = [] + + for root, _, filenames in os.walk(directory): + for filename in filenames: + filepath = Path(root) / filename + + if any(filepath.match(pattern) for pattern in config.exclude_patterns): + continue + + if any(filepath.match(pattern) for pattern in config.include_patterns): + files.append(filepath) + + if len(files) >= config.max_files: + return files + + return files