Initial upload with CI/CD workflow

2026-01-30 22:12:48 +00:00
parent 0d7baf34e8
commit 34ddd09033
1 changed files with 344 additions and 0 deletions
--- a/codesnap/cli/init.py
+++ b/codesnap/cli/init.py
@@ -0,0 +1,344 @@
+"""CLI interface for CodeSnap."""
+
+import os
+import sys
+from pathlib import Path
+from typing import Optional
+
+import click
+from pythonjsonlogger import jsonlogger
+
+try:
+    from .. import __version__
+    from ..core.extractor import FunctionExtractor
+    from ..core.language_detection import LanguageDetector
+    from ..core.parser import CodeParser
+    from ..core.dependency_analyzer import DependencyAnalyzer
+    from ..output.json_exporter import JSONExporter
+    from ..output.llm_exporter import LLMExporter
+    from ..output.markdown_exporter import MarkdownExporter
+    from ..utils.complexity import ComplexityCalculator
+    from ..utils.config import Config, load_config
+except ImportError:
+    from codesnap import __version__
+    from codesnap.core.extractor import FunctionExtractor
+    from codesnap.core.language_detection import LanguageDetector
+    from codesnap.core.parser import CodeParser
+    from codesnap.core.dependency_analyzer import DependencyAnalyzer
+    from codesnap.output.json_exporter import JSONExporter
+    from codesnap.output.llm_exporter import LLMExporter
+    from codesnap.output.markdown_exporter import MarkdownExporter
+    from codesnap.utils.complexity import ComplexityCalculator
+    from codesnap.utils.config import Config, load_config
+
+
+class JsonFormatter(jsonlogger.JsonFormatter):
+    """Custom JSON log formatter."""
+
+    def add_fields(
+        self, log_record: dict, record: object, message_dict: dict
+    ) -> None:
+        super().add_fields(log_record, record, message_dict)
+        log_record["level"] = record.levelname
+        log_record["logger"] = record.name
+
+
+@click.group()
+@click.version_option(version=__version__)
+@click.option(
+    "--config",
+    type=click.Path(exists=True, path_type=Path),
+    help="Path to configuration file",
+)
+@click.option(
+    "--verbose",
+    "-v",
+    is_flag=True,
+    help="Enable verbose output",
+)
+@click.pass_context
+def main(ctx: click.Context, config: Optional[Path], verbose: bool) -> None:
+    """CodeSnap - Codebase Summarization CLI Tool."""
+    ctx.ensure_object(dict)
+    ctx.obj["config"] = load_config(config)
+    ctx.obj["verbose"] = verbose
+
+
+@main.command()
+@click.argument(
+    "path",
+    type=click.Path(exists=True, path_type=Path),
+)
+@click.option(
+    "--output",
+    "-o",
+    type=click.Choice(["json", "markdown", "llm"]),
+    default="markdown",
+    help="Output format",
+)
+@click.option(
+    "--output-file",
+    type=click.Path(path_type=Path),
+    help="Output file path",
+)
+@click.option(
+    "--max-files",
+    type=int,
+    help="Maximum files to analyze",
+)
+@click.option(
+    "--max-tokens",
+    type=int,
+    help="Maximum tokens for LLM output",
+)
+@click.pass_obj
+def analyze(
+    obj: dict,
+    path: Path,
+    output: str,
+    output_file: Optional[Path],
+    max_files: Optional[int],
+    max_tokens: Optional[int],
+) -> None:
+    """Analyze a codebase and generate a summary."""
+    config: Config = obj.get("config", Config())
+
+    if max_files:
+        config.max_files = max_files
+    if max_tokens:
+        config.max_tokens = max_tokens
+
+    path = path.resolve()
+    if path.is_file():
+        paths = [path]
+    else:
+        paths = _collect_files(path, config)
+
+    if obj.get("verbose"):
+        click.echo(f"Analyzing {len(paths)} files...", err=True)
+
+    try:
+        parser = CodeParser()
+        extractor = FunctionExtractor()
+        language_detector = LanguageDetector()
+        complexity_calc = ComplexityCalculator(
+            low_threshold=config.complexity.low,
+            medium_threshold=config.complexity.medium,
+            high_threshold=config.complexity.high,
+        )
+
+        parsed_files = []
+        file_contents: dict[str, str] = {}
+
+        for file_path in paths:
+            try:
+                content = file_path.read_text(encoding="utf-8")
+                parsed = parser.parse_file(file_path, content)
+                parsed_files.append(parsed)
+                file_contents[str(file_path)] = content
+            except Exception as e:
+                if obj.get("verbose"):
+                    click.echo(f"Warning: Could not parse {file_path}: {e}", err=True)
+
+        extracted_files = [
+            extractor.extract_from_content(file_contents[str(p)], p, p.language)
+            for p in parsed_files
+            if str(p) in file_contents
+        ]
+
+        dependency_analyzer = DependencyAnalyzer()
+        dependency_report = dependency_analyzer.analyze(parsed_files, path)
+
+        complexity_results = complexity_calc.calculate_batch(
+            list(path.keys()) if isinstance(paths, dict) else paths, file_contents
+        )
+        complexity_data = {
+            k: v.get("rating", "unknown") for k, v in complexity_results.items()
+        }
+
+        if output == "json":
+            exporter = JSONExporter()
+            result = exporter.export(
+                extracted_files,
+                [p.path for p in parsed_files],
+                {
+                    "dependencies": dependency_report.dependencies,
+                    "cycles": dependency_report.cycles,
+                    "orphaned": dependency_report.orphaned_files,
+                },
+                complexity_data,
+                output_file,
+            )
+        elif output == "llm":
+            exporter = LLMExporter()
+            result = exporter.export(
+                extracted_files,
+                [p.path for p in parsed_files],
+                {
+                    "dependencies": dependency_report.dependencies,
+                    "cycles": dependency_report.cycles,
+                    "orphaned": dependency_report.orphaned_files,
+                },
+                complexity_data,
+                config.max_tokens,
+                output_file,
+            )
+        else:
+            exporter = MarkdownExporter()
+            result = exporter.export(
+                extracted_files,
+                [p.path for p in parsed_files],
+                {
+                    "dependencies": dependency_report.dependencies,
+                    "cycles": dependency_report.cycles,
+                    "orphaned": dependency_report.orphaned_files,
+                },
+                complexity_data,
+                output_file,
+            )
+
+        if not output_file:
+            click.echo(result)
+
+        if obj.get("verbose"):
+            click.echo(f"Analysis complete. {len(parsed_files)} files processed.", err=True)
+
+    except Exception as e:
+        click.echo(f"Error during analysis: {e}", err=True)
+        sys.exit(1)
+
+
+@main.command()
+@click.argument(
+    "path",
+    type=click.Path(exists=True, path_type=Path),
+)
+@click.option(
+    "--format",
+    "graph_format",
+    type=click.Choice(["dot", "adjacency"]),
+    default="adjacency",
+    help="Graph format",
+)
+@click.pass_obj
+def deps(obj: dict, path: Path, graph_format: str) -> None:
+    """Show dependency graph for a codebase."""
+    config: Config = obj.get("config", Config())
+
+    paths = _collect_files(path, config)
+
+    parser = CodeParser()
+    parsed_files = [parser.parse_file(p) for p in paths]
+
+    dependency_analyzer = DependencyAnalyzer()
+    dependency_report = dependency_analyzer.analyze(parsed_files, path)
+
+    graph_stats = dependency_analyzer.get_graph_stats()
+    click.echo(f"Graph Statistics:")
+    click.echo(f"  Nodes: {graph_stats['total_nodes']}")
+    click.echo(f"  Edges: {graph_stats['total_edges']}")
+    click.echo(f"  DAG: {graph_stats['is_dag']}")
+
+    if dependency_report.cycles:
+        click.echo(f"\nCircular Dependencies Found:")
+        for i, cycle in enumerate(dependency_report.cycles, 1):
+            cycle_str = " -> ".join(p.name for p in cycle)
+            click.echo(f"  Cycle {i}: {cycle_str}")
+
+    click.echo(f"\nMost Depended On:")
+    for file_path, count in dependency_report.most_depended[:5]:
+        click.echo(f"  {file_path.name}: {count} incoming dependencies")
+
+    click.echo(f"\nMost Dependant:")
+    for file_path, count in dependency_report.most_dependant[:5]:
+        click.echo(f"  {file_path.name}: {count} outgoing dependencies")
+
+
+@main.command()
+@click.argument(
+    "path",
+    type=click.Path(exists=True, path_type=Path),
+)
+@click.option(
+    "--format",
+    type=click.Choice(["json", "markdown"]),
+    default="markdown",
+    help="Output format",
+)
+@click.pass_obj
+def complexity(obj: dict, path: Path, format: str) -> None:
+    """Calculate complexity metrics for a codebase."""
+    config: Config = obj.get("config", Config())
+
+    paths = _collect_files(path, config)
+    contents = {}
+
+    for file_path in paths:
+        try:
+            contents[str(file_path)] = file_path.read_text(encoding="utf-8")
+        except Exception as e:
+            if obj.get("verbose"):
+                click.echo(f"Warning: Could not read {file_path}: {e}", err=True)
+
+    complexity_calc = ComplexityCalculator(
+        low_threshold=config.complexity.low,
+        medium_threshold=config.complexity.medium,
+        high_threshold=config.complexity.high,
+    )
+
+    results = complexity_calc.calculate_batch(paths, contents)
+
+    if format == "json":
+        import json
+        click.echo(json.dumps(results, indent=2))
+    else:
+        click.echo("Complexity Analysis:")
+        click.echo("-" * 60)
+        for file_path, result in sorted(results.items(), key=lambda x: x[1].get("complexity", 0), reverse=True):
+            rating = result.get("rating", "unknown")
+            complexity = result.get("complexity", 0)
+            file_name = Path(file_path).name
+            click.echo(f"  {file_name:<30} {rating:<10} ({complexity})")
+        click.echo("-" * 60)
+
+        low = sum(1 for r in results.values() if r.get("rating") == "low")
+        medium = sum(1 for r in results.values() if r.get("rating") == "medium")
+        high = sum(1 for r in results.values() if r.get("rating") == "high")
+        critical = sum(1 for r in results.values() if r.get("rating") == "critical")
+
+        click.echo(f"\nSummary: {low} low, {medium} medium, {high} high, {critical} critical")
+
+
+@main.command()
+@click.pass_obj
+def languages(obj: dict) -> None:
+    """List supported languages."""
+    detector = LanguageDetector()
+    languages = detector.get_supported_languages()
+    extensions = detector.get_supported_extensions()
+
+    click.echo("Supported Languages:")
+    for lang in sorted(languages):
+        click.echo(f"  - {lang}")
+
+    click.echo(f"\nSupported Extensions: {', '.join(sorted(extensions))}")
+
+
+def _collect_files(directory: Path, config: Config) -> list[Path]:
+    """Collect source files from directory based on config patterns."""
+    files: list[Path] = []
+
+    for root, _, filenames in os.walk(directory):
+        for filename in filenames:
+            filepath = Path(root) / filename
+
+            if any(filepath.match(pattern) for pattern in config.exclude_patterns):
+                continue
+
+            if any(filepath.match(pattern) for pattern in config.include_patterns):
+                files.append(filepath)
+
+            if len(files) >= config.max_files:
+                return files
+
+    return files