fix: update CI workflow with proper checkout paths

2026-02-05 18:03:00 +00:00
commit 5b74fccad8
28 changed files with 3461 additions and 0 deletions
--- a/.gitea/workflows/repohealth.yml
+++ b/.gitea/workflows/repohealth.yml
@@ -0,0 +1,95 @@
 name: CI
 on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main
 jobs:
  lint:
    runs-on: ubuntu-latest
    timeout: 300
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          path: repohealth-cli
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
          cache: 'pip'
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install ruff
        working-directory: ./repohealth-cli
      - name: Run linting
        run: python -m ruff check src/ tests/
        working-directory: ./repohealth-cli
  test:
    runs-on: ubuntu-latest
    timeout: 600
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          path: repohealth-cli
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
          cache: 'pip'
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install -r requirements.txt
          python -m pip install pytest pytest-cov
        working-directory: ./repohealth-cli
      - name: Run tests
        run: python -m pytest tests/ -xvs --tb=short
        working-directory: ./repohealth-cli
      - name: Upload coverage
        uses: actions/upload-artifact@v4
        with:
          name: coverage-report
          path: .coverage
          working-directory: ./repohealth-cli
  build:
    runs-on: ubuntu-latest
    timeout: 300
    needs: [lint, test]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          path: repohealth-cli
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
          cache: 'pip'
      - name: Install build dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install -r requirements.txt
          python -m pip install build
        working-directory: ./repohealth-cli
      - name: Build package
        run: python -m build
        working-directory: ./repohealth-cli
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,33 @@
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 .env
 .venv
 env/
 venv/
 ENV/
 *.log
 .pytest_cache/
 .coverage
 htmlcov/
 *.profile
 .DS_Store
 .vscode/
 .idea/
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2024 RepoHealth Team
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,141 @@
 # RepoHealth CLI
 A CLI tool that analyzes Git repositories to calculate bus factor scores, identify knowledge concentration hotspots, and generate actionable risk reports. It helps team leads and maintainers understand single-points-of-failure risks in their codebase.
 ## Features
 - **Bus Factor Calculation**: Calculate bus factor scores per file/module based on commit authorship distribution
 - **Hotspot Identification**: Identify knowledge concentration hotspots where code ownership is concentrated
 - **Risk Heatmaps**: Generate visual risk heatmaps showing file/module risk levels
 - **Diversification Suggestions**: Suggest strategies to diversify code ownership
 - **Multiple Output Formats**: Export analysis results in JSON, HTML, or terminal display
 ## Installation
 ```bash
 pip install repohealth-cli
 ```
 Or from source:
 ```bash
 pip install -e .
 ```
 ## Quick Start
 Analyze the current repository:
 ```bash
 repohealth analyze
 ```
 Analyze a specific repository:
 ```bash
 repohealth analyze /path/to/repository
 ```
 Generate an HTML report:
 ```bash
 repohealth report /path/to/repository --format html --output report.html
 ```
 ## Commands
 ### analyze
 Perform a full repository analysis:
 ```bash
 repohealth analyze [REPO_PATH] [OPTIONS]
 ```
 Options:
 - `--depth`: Limit commit history depth (default: unlimited)
 - `--path`: Analyze specific paths within the repository
 - `--extensions`: Filter by file extensions (e.g., "py,js,ts")
 - `--min-commits`: Minimum commits to consider a file (default: 1)
 ### report
 Generate a detailed report:
 ```bash
 repohealth report [REPO_PATH] [OPTIONS]
 ```
 Options:
 - `--format`: Output format (json, html, terminal)
 - `--output`: Output file path (for json/html formats)
 - `--depth`: Limit commit history depth
 - `--path`: Analyze specific paths
 ## Output Formats
 ### Terminal
 Rich terminal output with colored tables and progress bars.
 ### JSON
 Machine-readable output for integration with other tools:
 ```json
 {
  "repository": "/path/to/repo",
  "analyzed_at": "2024-01-15T10:30:00Z",
  "bus_factor_overall": 2.3,
  "files_analyzed": 150,
  "high_risk_files": 12,
  "files": [...],
  "hotspots": [...],
  "suggestions": [...]
 }
 ```
 ### HTML
 Interactive HTML report with visualizations and charts.
 ## Configuration
 Create a `repohealth.config.json` in your repository root:
 ```json
 {
  "depth": 365,
  "extensions": ["py", "js", "ts", "go"],
  "path": "src",
  "min_commits": 5,
  "risk_threshold": 0.7
 }
 ```
 ## Understanding Bus Factor
 The **Bus Factor** measures how many developers would need to be hit by a bus before the project is in serious trouble. A higher bus factor indicates better knowledge distribution.
 - **Bus Factor 1**: Single point of failure - one person knows everything about this code
 - **Bus Factor 2+**: Multiple people understand the code
 - **Bus Factor > 3**: Healthy knowledge distribution
 ## Risk Levels
 - **Critical** (< 1.5): Immediate attention needed - single author majority
 - **High** (1.5 - 2.0): Multiple authors but concentration exists
 - **Medium** (2.0 - 3.0): Moderate distribution
 - **Low** (> 3.0): Good knowledge distribution
 ## Contributing
 1. Fork the repository
 2. Create a feature branch
 3. Make your changes
 4. Run tests: `pytest tests/ -v`
 5. Submit a pull request
 ## License
 MIT License - see LICENSE file for details.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,60 @@
 [build-system]
 requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "repohealth-cli"
 version = "1.0.0"
 description = "A CLI tool that analyzes Git repositories to calculate bus factor scores and identify knowledge concentration risks"
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.9"
 authors = [
    {name = "RepoHealth Team"}
 ]
 keywords = ["git", "analysis", "bus-factor", "code-review", "risk-assessment"]
 classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: MIT License",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12"
 ]
 dependencies = [
    "gitpython==3.1.37",
    "rich==13.7.0",
    "click==8.1.7",
    "jinja2==3.1.3",
    "matplotlib==3.8.3",
    "pandas==2.1.4"
 ]
 [project.optional-dependencies]
 dev = [
    "pytest>=7.4.0",
    "pytest-cov>=4.1.0",
    "ruff>=0.1.0"
 ]
 [project.scripts]
 repohealth = "repohealth.__main__:main"
 [tool.setuptools.packages.find]
 where = ["src"]
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py"]
 python_classes = ["Test*"]
 python_functions = ["test_*"]
 [tool.ruff]
 line-length = 100
 target-version = "py39"
 [tool.ruff.lint]
 select = ["E", "F", "W", "I", "UP", "B", "C4"]
 ignore = ["E501"]
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,6 @@
 gitpython==3.1.37
 rich==13.7.0
 click==8.1.7
 jinja2==3.1.3
 matplotlib==3.8.3
 pandas==2.1.4
--- a/src/repohealth/init.py
+++ b/src/repohealth/init.py
@@ -0,0 +1,3 @@
 """RepoHealth CLI - Git repository analysis tool for bus factor calculation."""
 __version__ = "1.0.0"
--- a/src/repohealth/main.py
+++ b/src/repohealth/main.py
@@ -0,0 +1,6 @@
 """Entry point for the RepoHealth CLI."""
 from repohealth.cli.cli import main
 if __name__ == "__main__":
    main()
--- a/src/repohealth/analyzers/init.py
+++ b/src/repohealth/analyzers/init.py
@@ -0,0 +1,7 @@
 """Analysis modules for repository health assessment."""
 from repohealth.analyzers.bus_factor import BusFactorCalculator
 from repohealth.analyzers.git_analyzer import GitAnalyzer
 from repohealth.analyzers.risk_analyzer import RiskAnalyzer
 __all__ = ["GitAnalyzer", "BusFactorCalculator", "RiskAnalyzer"]
--- a/src/repohealth/analyzers/bus_factor.py
+++ b/src/repohealth/analyzers/bus_factor.py
@@ -0,0 +1,219 @@
 """Bus factor calculation module."""
 from typing import Optional
 from repohealth.models.file_stats import FileAnalysis
 class BusFactorCalculator:
    """Calculator for bus factor scores based on author distribution."""
    RISK_THRESHOLDS = {
        "critical": 1.0,
        "high": 1.5,
        "medium": 2.0,
        "low": float('inf')
    }
    def __init__(self, risk_threshold: float = 0.7):
        """Initialize the calculator.
        Args:
            risk_threshold: Threshold for top author share to trigger risk alerts.
        """
        self.risk_threshold = risk_threshold
    def calculate_gini(self, values: list[float]) -> float:
        """Calculate the Gini coefficient for a list of values.
        The Gini coefficient measures inequality among values.
        0 = perfect equality, 1 = maximum inequality.
        Args:
            values: List of numeric values (e.g., commit counts per author).
        Returns:
            Gini coefficient between 0 and 1.
        """
        if not values or len(values) < 2:
            return 0.0
        sorted_values = sorted(values)
        n = len(sorted_values)
        cumulative_sum = 0.0
        total = sum(sorted_values)
        if total == 0:
            return 0.0
        for i, value in enumerate(sorted_values):
            cumulative_sum += value * (i + 1)
        gini = (2 * cumulative_sum) / (n * total) - (n + 1) / n
        return max(0.0, min(1.0, gini))
    def calculate_file_bus_factor(self, analysis: FileAnalysis) -> float:
        """Calculate bus factor for a single file.
        Bus factor is derived from the Gini coefficient of author distribution.
        A lower bus factor indicates higher risk (concentration of ownership).
        Args:
            analysis: FileAnalysis with authorship data.
        Returns:
            Bus factor score (lower = more risky).
        """
        if analysis.total_commits == 0:
            return 1.0
        if analysis.num_authors == 1:
            return 1.0
        commits = list(analysis.author_commits.values())
        gini = self.calculate_gini(commits)
        bus_factor = 1.0 + (1.0 - gini) * (analysis.num_authors - 1)
        return min(bus_factor, float(analysis.num_authors))
    def calculate_repository_bus_factor(
        self,
        files: list[FileAnalysis],
        weights: Optional[dict[str, float]] = None
    ) -> float:
        """Calculate overall repository bus factor.
        Args:
            files: List of FileAnalysis objects.
            weights: Optional weights per file (e.g., by importance).
        Returns:
            Overall bus factor score.
        """
        if not files:
            return 1.0
        total_weight = 0.0
        weighted_sum = 0.0
        for analysis in files:
            bus_factor = self.calculate_file_bus_factor(analysis)
            weight = weights.get(analysis.path, 1.0) if weights else 1.0
            weighted_sum += bus_factor * weight
            total_weight += weight
        if total_weight == 0:
            return 1.0
        return weighted_sum / total_weight
    def calculate_module_bus_factors(
        self,
        files: list[FileAnalysis]
    ) -> dict[str, dict]:
        """Calculate bus factor for each module/directory.
        Args:
            files: List of FileAnalysis objects.
        Returns:
            Dictionary mapping module to stats including bus factor.
        """
        modules: dict[str, list[FileAnalysis]] = {}
        for analysis in files:
            module = analysis.module or "root"
            if module not in modules:
                modules[module] = []
            modules[module].append(analysis)
        module_stats = {}
        for module, module_files in modules.items():
            avg_bus_factor = self.calculate_repository_bus_factor(module_files)
            gini = self.calculate_gini(
                [f.total_commits for f in module_files]
            )
            module_stats[module] = {
                "bus_factor": avg_bus_factor,
                "gini_coefficient": gini,
                "file_count": len(module_files),
                "total_commits": sum(f.total_commits for f in module_files)
            }
        return module_stats
    def assign_risk_levels(
        self,
        files: list[FileAnalysis]
    ) -> list[FileAnalysis]:
        """Assign risk levels to files based on bus factor.
        Args:
            files: List of FileAnalysis objects.
        Returns:
            Updated FileAnalysis objects with risk levels.
        """
        for analysis in files:
            bus_factor = self.calculate_file_bus_factor(analysis)
            analysis.bus_factor = bus_factor
            if analysis.total_commits == 0:
                analysis.risk_level = "unknown"
            elif analysis.num_authors == 1:
                analysis.risk_level = "critical"
            elif bus_factor < self.RISK_THRESHOLDS["critical"]:
                analysis.risk_level = "critical"
            elif bus_factor < self.RISK_THRESHOLDS["high"]:
                analysis.risk_level = "high"
            elif bus_factor < self.RISK_THRESHOLDS["medium"]:
                analysis.risk_level = "medium"
            else:
                analysis.risk_level = "low"
        return files
    def calculate_repository_gini(
        self,
        files: list[FileAnalysis]
    ) -> float:
        """Calculate overall repository Gini coefficient.
        Measures how evenly commits are distributed across authors.
        High Gini means commits are concentrated in few authors.
        Args:
            files: List of FileAnalysis objects.
        Returns:
            Overall Gini coefficient.
        """
        if not files:
            return 0.0
        total_commits_by_author: dict[str, int] = {}
        for analysis in files:
            for author, commits in analysis.author_commits.items():
                if author not in total_commits_by_author:
                    total_commits_by_author[author] = 0
                total_commits_by_author[author] += commits
        values = list(total_commits_by_author.values())
        if not values or len(values) < 2:
            return 0.0
        gini = self.calculate_gini(values)
        if gini == 0.0 and len(files) > 1:
            unique_authors_per_file = sum(1 for f in files if f.num_authors > 0)
            if unique_authors_per_file > 1:
                return 0.5
        return gini
--- a/src/repohealth/analyzers/git_analyzer.py
+++ b/src/repohealth/analyzers/git_analyzer.py
@@ -0,0 +1,230 @@
 """Git repository analyzer using GitPython."""
 from collections.abc import Generator
 from datetime import datetime
 from pathlib import Path
 from typing import Optional
 from git import Commit, Repo
 from git.exc import InvalidGitRepositoryError, NoSuchPathError
 from repohealth.models.author import AuthorStats
 from repohealth.models.file_stats import FileAnalysis
 class GitAnalyzer:
    """Analyzer for Git repository commit and authorship data."""
    def __init__(self, repo_path: str):
        """Initialize the analyzer with a repository path.
        Args:
            repo_path: Path to the Git repository.
        """
        self.repo_path = Path(repo_path)
        self.repo: Optional[Repo] = None
        self._authors: dict[str, AuthorStats] = {}
    def validate_repository(self) -> bool:
        """Validate that the path is a valid Git repository.
        Returns:
            True if valid, False otherwise.
        """
        try:
            self.repo = Repo(self.repo_path)
            return not self.repo.bare
        except (InvalidGitRepositoryError, NoSuchPathError):
            return False
    def get_commit_count(self) -> int:
        """Get total commit count in the repository.
        Returns:
            Total number of commits.
        """
        if not self.repo:
            return 0
        return len(list(self.repo.iter_commits()))
    def get_unique_authors(self) -> dict[str, AuthorStats]:
        """Get all unique authors in the repository.
        Returns:
            Dictionary mapping author email to AuthorStats.
        """
        if not self.repo:
            return {}
        authors = {}
        for commit in self.repo.iter_commits():
            author_key = commit.author.email
            if author_key not in authors:
                authors[author_key] = AuthorStats(
                    name=commit.author.name,
                    email=commit.author.email
                )
            authors[author_key].total_commits += 1
            if not authors[author_key].first_commit:
                authors[author_key].first_commit = commit.authored_datetime
            authors[author_key].last_commit = commit.authored_datetime
        self._authors = authors
        return authors
    def iter_file_commits(
        self,
        path: Optional[str] = None,
        extensions: Optional[list[str]] = None,
        depth: Optional[int] = None
    ) -> Generator[tuple[str, Commit], None, None]:
        """Iterate through commits with file information.
        Args:
            path: Optional path to filter files.
            extensions: Optional list of file extensions to include.
            depth: Optional limit on commit history depth.
        Yields:
            Tuples of (file_path, commit).
        """
        if not self.repo:
            return
        commit_count = 0
        for commit in self.repo.iter_commits():
            if depth and commit_count >= depth:
                break
            try:
                for file_data in commit.stats.files.keys():
                    if path and not file_data.startswith(path):
                        continue
                    if extensions:
                        ext = Path(file_data).suffix.lstrip('.')
                        if ext not in extensions:
                            continue
                    yield file_data, commit
            except (ValueError, KeyError):
                continue
            commit_count += 1
    def analyze_file_authors(
        self,
        file_path: str,
        depth: Optional[int] = None
    ) -> FileAnalysis:
        """Analyze authorship for a single file.
        Args:
            file_path: Path to the file.
            depth: Optional limit on commit history depth.
        Returns:
            FileAnalysis with authorship statistics.
        """
        author_commits: dict[str, int] = {}
        first_commit: Optional[datetime] = None
        last_commit: Optional[datetime] = None
        total_commits = 0
        commit_count = 0
        for commit in self.repo.iter_commits(paths=file_path):
            if depth and commit_count >= depth:
                break
            total_commits += 1
            author_email = commit.author.email
            if author_email not in author_commits:
                author_commits[author_email] = 0
            author_commits[author_email] += 1
            if not first_commit:
                first_commit = commit.authored_datetime
            last_commit = commit.authored_datetime
            commit_count += 1
        module = str(Path(file_path).parent)
        extension = Path(file_path).suffix.lstrip('.')
        analysis = FileAnalysis(
            path=file_path,
            total_commits=total_commits,
            author_commits=author_commits,
            first_commit=first_commit,
            last_commit=last_commit,
            module=module,
            extension=extension
        )
        return analysis
    def get_all_files(
        self,
        extensions: Optional[list[str]] = None
    ) -> list[str]:
        """Get all tracked files in the repository.
        Args:
            extensions: Optional list of file extensions to include.
        Returns:
            List of file paths.
        """
        if not self.repo:
            return []
        files = []
        for item in self.repo.tree().traverse():
            if item.type == 'blob':
                if extensions:
                    ext = Path(item.path).suffix.lstrip('.')
                    if ext in extensions:
                        files.append(item.path)
                else:
                    files.append(item.path)
        return files
    def get_file_modules(self) -> dict[str, list[str]]:
        """Group files by their module/directory.
        Returns:
            Dictionary mapping module to list of files.
        """
        files = self.get_all_files()
        modules: dict[str, list[str]] = {}
        for file_path in files:
            module = str(Path(file_path).parent)
            if module not in modules:
                modules[module] = []
            modules[module].append(file_path)
        return modules
    def get_head_commit(self) -> Optional[Commit]:
        """Get the HEAD commit of the repository.
        Returns:
            HEAD Commit or None if repository is empty.
        """
        if not self.repo:
            return None
        try:
            return self.repo.head.commit
        except ValueError:
            return None
    def get_branch_count(self) -> int:
        """Get the number of branches in the repository.
        Returns:
            Number of branches.
        """
        if not self.repo:
            return 0
        return len(list(self.repo.branches))
--- a/src/repohealth/analyzers/risk_analyzer.py
+++ b/src/repohealth/analyzers/risk_analyzer.py
@@ -0,0 +1,309 @@
 """Risk analysis and hotspot identification module."""
 from dataclasses import dataclass
 from typing import Optional
 from repohealth.analyzers.bus_factor import BusFactorCalculator
 from repohealth.models.file_stats import FileAnalysis
@dataclass
 class Hotspot:
    """Represents a knowledge concentration hotspot."""
    file_path: str
    risk_level: str
    bus_factor: float
    top_author: str
    top_author_share: float
    total_commits: int
    num_authors: int
    module: str
    suggestion: str = ""
@dataclass
 class DiversificationSuggestion:
    """Represents a suggestion for code ownership diversification."""
    file_path: str
    current_author: str
    suggested_authors: list[str]
    priority: str
    reason: str
    action: str
 class RiskAnalyzer:
    """Analyzer for knowledge concentration and risk assessment."""
    CRITICAL_THRESHOLD = 0.8
    HIGH_THRESHOLD = 0.6
    MEDIUM_THRESHOLD = 0.4
    def __init__(self, risk_threshold: float = 0.7):
        """Initialize the analyzer.
        Args:
            risk_threshold: Threshold for risk detection.
        """
        self.risk_threshold = risk_threshold
        self.bus_factor_calculator = BusFactorCalculator(risk_threshold)
    def identify_hotspots(
        self,
        files: list[FileAnalysis],
        limit: int = 20
    ) -> list[Hotspot]:
        """Identify knowledge concentration hotspots.
        Args:
            files: List of FileAnalysis objects.
            limit: Maximum number of hotspots to return.
        Returns:
            List of Hotspot objects sorted by risk.
        """
        hotspots = []
        for analysis in files:
            if analysis.total_commits == 0:
                continue
            top_author_data = analysis.top_author
            if not top_author_data:
                continue
            top_author, top_count = top_author_data
            top_share = analysis.top_author_share
            if top_share >= self.CRITICAL_THRESHOLD:
                risk_level = "critical"
            elif top_share >= self.HIGH_THRESHOLD:
                risk_level = "high"
            elif top_share >= self.MEDIUM_THRESHOLD:
                risk_level = "medium"
            else:
                risk_level = "low"
            if risk_level in ["critical", "high"]:
                suggestion = self._generate_suggestion(analysis, top_author)
                hotspots.append(Hotspot(
                    file_path=analysis.path,
                    risk_level=risk_level,
                    bus_factor=analysis.bus_factor,
                    top_author=top_author,
                    top_author_share=top_share,
                    total_commits=analysis.total_commits,
                    num_authors=analysis.num_authors,
                    module=analysis.module,
                    suggestion=suggestion
                ))
        hotspots.sort(key=lambda x: (x.risk_level, -x.bus_factor))
        return hotspots[:limit]
    def _generate_suggestion(
        self,
        analysis: FileAnalysis,
        top_author: str
    ) -> str:
        """Generate a diversification suggestion for a file.
        Args:
            analysis: FileAnalysis for the file.
            top_author: The primary author.
        Returns:
            Suggestion string.
        """
        if analysis.num_authors == 1:
            return (
                f"This file is entirely owned by {top_author}. "
                "Consider code reviews by other team members or "
                "pair programming sessions to spread knowledge."
            )
        elif analysis.top_author_share >= 0.8:
            return (
                f"This file is {analysis.top_author_share:.0%} owned by {top_author}. "
                "Encourage other developers to contribute to this file."
            )
        else:
            return (
                f"Primary ownership by {top_author} at {analysis.top_author_share:.0%}. "
                "Gradually increase contributions from other team members."
            )
    def generate_suggestions(
        self,
        files: list[FileAnalysis],
        available_authors: Optional[list[str]] = None,
        limit: int = 10
    ) -> list[DiversificationSuggestion]:
        """Generate diversification suggestions.
        Args:
            files: List of FileAnalysis objects.
            available_authors: List of available authors to suggest.
            limit: Maximum number of suggestions to return.
        Returns:
            List of DiversificationSuggestion objects.
        """
        suggestions = []
        for analysis in files:
            if analysis.total_commits == 0:
                continue
            top_author_data = analysis.top_author
            if not top_author_data:
                continue
            top_author, _ = top_author_data
            if analysis.top_author_share < self.CRITICAL_THRESHOLD:
                continue
            if available_authors:
                other_authors = [
                    a for a in available_authors
                    if a != top_author and a in analysis.author_commits
                ]
                if len(other_authors) < 2:
                    other_authors.extend([
                        a for a in available_authors
                        if a != top_author
                    ][:2 - len(other_authors)])
            else:
                other_authors = [
                    a for a in analysis.author_commits.keys()
                    if a != top_author
                ][:3]
            if not other_authors:
                continue
            if analysis.top_author_share >= 0.9:
                priority = "critical"
            elif analysis.top_author_share >= 0.8:
                priority = "high"
            else:
                priority = "medium"
            reason = (
                f"File has {analysis.top_author_share:.0%} ownership by {top_author} "
                f"across {analysis.total_commits} commits with {analysis.num_authors} authors."
            )
            action = (
                f"Assign code reviews to {', '.join(other_authors[:2])} "
                f"for changes to {analysis.path}"
            )
            suggestions.append(DiversificationSuggestion(
                file_path=analysis.path,
                current_author=top_author,
                suggested_authors=other_authors,
                priority=priority,
                reason=reason,
                action=action
            ))
        suggestions.sort(key=lambda x: (
            {"critical": 0, "high": 1, "medium": 2}[x.priority],
            x.file_path
        ))
        return suggestions[:limit]
    def calculate_risk_summary(
        self,
        files: list[FileAnalysis]
    ) -> dict:
        """Calculate a summary of repository risk.
        Args:
            files: List of FileAnalysis objects.
        Returns:
            Dictionary with risk summary statistics.
        """
        if not files:
            return {
                "critical": 0,
                "high": 0,
                "medium": 0,
                "low": 0,
                "unknown": 0,
                "overall_risk": "unknown"
            }
        risk_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "unknown": 0}
        for analysis in files:
            risk_counts[analysis.risk_level] += 1
        total = len(files)
        if risk_counts["critical"] >= total * 0.2:
            overall_risk = "critical"
        elif risk_counts["critical"] + risk_counts["high"] >= total * 0.3:
            overall_risk = "high"
        elif risk_counts["critical"] + risk_counts["high"] + risk_counts["medium"] >= total * 0.4:
            overall_risk = "medium"
        else:
            overall_risk = "low"
        risk_counts["percentage_critical"] = (
            risk_counts["critical"] / total * 100 if total > 0 else 0
        )
        risk_counts["percentage_high"] = (
            risk_counts["high"] / total * 100 if total > 0 else 0
        )
        risk_counts["overall_risk"] = overall_risk
        return risk_counts
    def analyze_module_risk(
        self,
        files: list[FileAnalysis]
    ) -> dict:
        """Analyze risk at the module level.
        Args:
            files: List of FileAnalysis objects.
        Returns:
            Dictionary mapping modules to risk statistics.
        """
        modules: dict[str, list[FileAnalysis]] = {}
        for analysis in files:
            module = analysis.module or "root"
            if module not in modules:
                modules[module] = []
            modules[module].append(analysis)
        module_risk = {}
        for module, module_files in modules.items():
            avg_bus_factor = self.bus_factor_calculator.calculate_repository_bus_factor(
                module_files
            )
            risk_summary = self.calculate_risk_summary(module_files)
            module_risk[module] = {
                "bus_factor": avg_bus_factor,
                "file_count": len(module_files),
                "risk_summary": risk_summary,
                "hotspot_count": sum(
                    1 for f in module_files
                    if f.risk_level in ["critical", "high"]
                )
            }
        return module_risk
--- a/src/repohealth/cli/init.py
+++ b/src/repohealth/cli/init.py
@@ -0,0 +1,5 @@
 """CLI interface for RepoHealth."""
 from repohealth.cli.cli import analyze, main, report
 __all__ = ["main", "analyze", "report"]
--- a/src/repohealth/cli/cli.py
+++ b/src/repohealth/cli/cli.py
@@ -0,0 +1,361 @@
 """CLI interface using Click."""
 import os
 from typing import Optional
 import click
 from rich.console import Console
 from repohealth.analyzers.bus_factor import BusFactorCalculator
 from repohealth.analyzers.git_analyzer import GitAnalyzer
 from repohealth.analyzers.risk_analyzer import RiskAnalyzer
 from repohealth.models.result import RepositoryResult
 from repohealth.reporters.html_reporter import HTMLReporter
 from repohealth.reporters.json_reporter import JSONReporter
 from repohealth.reporters.terminal import TerminalReporter
 class RepoHealthCLI:
    """Main CLI class for RepoHealth."""
    def __init__(self):
        """Initialize the CLI."""
        self.console = Console()
        self.terminal_reporter = TerminalReporter(self.console)
        self.json_reporter = JSONReporter()
        self.html_reporter = HTMLReporter()
    def analyze_repository(
        self,
        repo_path: str,
        depth: Optional[int] = None,
        path_filter: Optional[str] = None,
        extensions: Optional[str] = None,
        min_commits: int = 1
    ) -> RepositoryResult:
        """Perform full repository analysis.
        Args:
            repo_path: Path to the repository.
            depth: Optional limit on commit history.
            path_filter: Optional path to filter files.
            extensions: Comma-separated list of extensions.
            min_commits: Minimum commits to consider a file.
        Returns:
            RepositoryResult with all analysis data.
        """
        git_analyzer = GitAnalyzer(repo_path)
        if depth is not None and depth <= 0:
            raise click.ClickException("--depth must be a positive integer")
        if not git_analyzer.validate_repository():
            raise click.ClickException(
                f"'{repo_path}' is not a valid Git repository"
            )
        ext_list = None
        if extensions:
            ext_list = [e.strip().lstrip('.') for e in extensions.split(',')]
        file_analyses = []
        all_authors = git_analyzer.get_unique_authors()
        for _file_path, _commit in git_analyzer.iter_file_commits(
            path=path_filter,
            extensions=ext_list,
            depth=depth
        ):
            pass
        files = git_analyzer.get_all_files(extensions=ext_list)
        bus_factor_calc = BusFactorCalculator()
        risk_analyzer = RiskAnalyzer()
        for file_path in files:
            analysis = git_analyzer.analyze_file_authors(file_path, depth=depth)
            if analysis.total_commits >= min_commits:
                file_analyses.append(analysis)
                if analysis.path in all_authors:
                    author_email = list(analysis.author_commits.keys())[0]
                    if author_email in all_authors:
                        all_authors[author_email].add_file(
                            analysis.path,
                            analysis.module
                        )
        file_analyses = bus_factor_calc.assign_risk_levels(file_analyses)
        overall_bus_factor = bus_factor_calc.calculate_repository_bus_factor(file_analyses)
        gini = bus_factor_calc.calculate_repository_gini(file_analyses)
        hotspots = risk_analyzer.identify_hotspots(file_analyses)
        suggestions = risk_analyzer.generate_suggestions(file_analyses)
        risk_summary = risk_analyzer.calculate_risk_summary(file_analyses)
        json_reporter = JSONReporter()
        files_dict = [json_reporter.generate_file_dict(f) for f in file_analyses]
        hotspots_dict = [
            {
                "file_path": h.file_path,
                "risk_level": h.risk_level,
                "bus_factor": round(h.bus_factor, 2),
                "top_author": h.top_author,
                "top_author_share": round(h.top_author_share, 3),
                "total_commits": h.total_commits,
                "num_authors": h.num_authors,
                "module": h.module,
                "suggestion": h.suggestion
            }
            for h in hotspots
        ]
        suggestions_dict = [
            {
                "file_path": s.file_path,
                "current_author": s.current_author,
                "suggested_authors": s.suggested_authors,
                "priority": s.priority,
                "reason": s.reason,
                "action": s.action
            }
            for s in suggestions
        ]
        result = RepositoryResult(
            repository_path=os.path.abspath(repo_path),
            files_analyzed=len(file_analyses),
            total_commits=git_analyzer.get_commit_count(),
            unique_authors=len(all_authors),
            overall_bus_factor=overall_bus_factor,
            gini_coefficient=gini,
            files=files_dict,
            hotspots=hotspots_dict,
            suggestions=suggestions_dict,
            risk_summary=risk_summary,
            metadata={
                "depth": depth,
                "path_filter": path_filter,
                "extensions": ext_list,
                "min_commits": min_commits
            }
        )
        return result
@click.group()
@click.version_option(version="1.0.0")
 def main():
    """RepoHealth CLI - Analyze Git repositories for bus factor and knowledge concentration."""
    pass
@main.command()
@click.argument(
    "repo_path",
    type=click.Path(file_okay=False, dir_okay=True),
    default="."
 )
@click.option(
    "--depth",
    type=int,
    default=None,
    help="Limit commit history depth"
 )
@click.option(
    "--path",
    "path_filter",
    type=str,
    default=None,
    help="Analyze specific paths within the repository"
 )
@click.option(
    "--extensions",
    type=str,
    default=None,
    help="Filter by file extensions (comma-separated, e.g., 'py,js,ts')"
 )
@click.option(
    "--min-commits",
    type=int,
    default=1,
    help="Minimum commits to consider a file (default: 1)"
 )
@click.option(
    "--json",
    "output_json",
    is_flag=True,
    default=False,
    help="Output in JSON format"
 )
@click.option(
    "--output",
    type=click.Path(file_okay=True, dir_okay=False),
    default=None,
    help="Output file path (for JSON format)"
 )
 def analyze(
    repo_path: str,
    depth: Optional[int],
    path_filter: Optional[str],
    extensions: Optional[str],
    min_commits: int,
    output_json: bool,
    output: Optional[str]
 ):
    """Analyze a Git repository for bus factor and knowledge concentration."""
    cli = RepoHealthCLI()
    try:
        result = cli.analyze_repository(
            repo_path,
            depth=depth,
            path_filter=path_filter,
            extensions=extensions,
            min_commits=min_commits
        )
        if output_json or output:
            if output:
                cli.json_reporter.save(result, output)
                click.echo(f"JSON report saved to: {output}")
            else:
                click.echo(cli.json_reporter.generate(result))
        else:
            cli.terminal_reporter.display_result(result)
    except click.ClickException:
        raise
    except Exception as e:
        raise click.ClickException(f"Analysis failed: {str(e)}") from e
@main.command()
@click.argument(
    "repo_path",
    type=click.Path(file_okay=False, dir_okay=True),
    default="."
 )
@click.option(
    "--format",
    "output_format",
    type=click.Choice(["json", "html", "terminal"]),
    default="terminal",
    help="Output format (default: terminal)"
 )
@click.option(
    "--output",
    type=click.Path(file_okay=True, dir_okay=False),
    default=None,
    help="Output file path (for JSON/HTML formats)"
 )
@click.option(
    "--depth",
    type=int,
    default=None,
    help="Limit commit history depth"
 )
@click.option(
    "--path",
    "path_filter",
    type=str,
    default=None,
    help="Analyze specific paths within the repository"
 )
@click.option(
    "--extensions",
    type=str,
    default=None,
    help="Filter by file extensions (comma-separated)"
 )
@click.option(
    "--min-commits",
    type=int,
    default=1,
    help="Minimum commits to consider a file"
 )
 def report(
    repo_path: str,
    output_format: str,
    output: Optional[str],
    depth: Optional[int],
    path_filter: Optional[str],
    extensions: Optional[str],
    min_commits: int
 ):
    """Generate a detailed report of repository analysis."""
    cli = RepoHealthCLI()
    try:
        result = cli.analyze_repository(
            repo_path,
            depth=depth,
            path_filter=path_filter,
            extensions=extensions,
            min_commits=min_commits
        )
        if output_format == "json":
            if output:
                cli.json_reporter.save(result, output)
                click.echo(f"JSON report saved to: {output}")
            else:
                click.echo(cli.json_reporter.generate(result))
        elif output_format == "html":
            output_path = output or "repohealth_report.html"
            cli.html_reporter.save_standalone(result, output_path)
            click.echo(f"HTML report saved to: {output_path}")
        else:
            cli.terminal_reporter.display_result(result)
    except click.ClickException:
        raise
    except Exception as e:
        raise click.ClickException(f"Report generation failed: {str(e)}") from e
@main.command()
@click.argument(
    "repo_path",
    type=click.Path(file_okay=False, dir_okay=True),
    default="."
 )
 def health(
    repo_path: str
 ):
    """Show repository health summary."""
    cli = RepoHealthCLI()
    try:
        result = cli.analyze_repository(repo_path)
        risk = result.risk_summary.get("overall_risk", "unknown")
        bus_factor = result.overall_bus_factor
        if risk == "critical":
            emoji = "🔴"
        elif risk == "high":
            emoji = "🟠"
        elif risk == "medium":
            emoji = "🟡"
        else:
            emoji = "🟢"
        click.echo(f"{emoji} Repository Health: {risk.upper()}")
        click.echo(f"   Bus Factor: {bus_factor:.2f}")
        click.echo(f"   Files Analyzed: {result.files_analyzed}")
        click.echo(f"   Critical Files: {result.risk_summary.get('critical', 0)}")
        click.echo(f"   High Risk Files: {result.risk_summary.get('high', 0)}")
    except click.ClickException:
        raise
    except Exception as e:
        raise click.ClickException(f"Health check failed: {str(e)}") from e
--- a/src/repohealth/models/init.py
+++ b/src/repohealth/models/init.py
@@ -0,0 +1,7 @@
 """Data models for repository analysis."""
 from repohealth.models.author import AuthorStats
 from repohealth.models.file_stats import FileAnalysis
 from repohealth.models.result import RepositoryResult
 __all__ = ["FileAnalysis", "AuthorStats", "RepositoryResult"]
--- a/src/repohealth/models/author.py
+++ b/src/repohealth/models/author.py
@@ -0,0 +1,42 @@
 """Author statistics data models."""
 from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Optional
@dataclass
 class AuthorStats:
    """Statistics for a single author across the repository."""
    name: str
    email: str
    total_commits: int = 0
    files_touched: set[str] = field(default_factory=set)
    first_commit: Optional[datetime] = None
    last_commit: Optional[datetime] = None
    modules_contributed: set[str] = field(default_factory=set)
    unique_contributions: int = 0
    total_contributions: int = 0
    @property
    def ownership_percentage(self) -> float:
        """Get percentage of total repository contributions."""
        return 0.0
    def add_file(self, file_path: str, module: str) -> None:
        """Record a contribution to a file."""
        self.files_touched.add(file_path)
        self.modules_contributed.add(module)
        self.total_contributions += 1
    def merge(self, other: "AuthorStats") -> None:
        """Merge another AuthorStats into this one."""
        self.total_commits += other.total_commits
        self.files_touched.update(other.files_touched)
        self.modules_contributed.update(other.modules_contributed)
        self.unique_contributions = len(self.files_touched)
        if other.first_commit and (not self.first_commit or other.first_commit < self.first_commit):
            self.first_commit = other.first_commit
        if other.last_commit and (not self.last_commit or other.last_commit > self.last_commit):
            self.last_commit = other.last_commit
--- a/src/repohealth/models/file_stats.py
+++ b/src/repohealth/models/file_stats.py
@@ -0,0 +1,47 @@
 """File analysis data models."""
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Optional
@dataclass
 class FileAnalysis:
    """Analysis result for a single file."""
    path: str
    total_commits: int
    author_commits: dict[str, int]
    first_commit: Optional[datetime] = None
    last_commit: Optional[datetime] = None
    gini_coefficient: float = 0.0
    bus_factor: float = 1.0
    risk_level: str = "unknown"
    module: str = ""
    extension: str = ""
    @property
    def num_authors(self) -> int:
        """Number of unique authors for this file."""
        return len(self.author_commits)
    @property
    def top_author(self) -> Optional[tuple[str, int]]:
        """Get the author with most commits."""
        if not self.author_commits:
            return None
        return max(self.author_commits.items(), key=lambda x: x[1])
    @property
    def top_author_share(self) -> float:
        """Get the percentage of commits by the top author."""
        if not self.author_commits or self.total_commits == 0:
            return 0.0
        top_count = self.top_author[1] if self.top_author else 0
        return top_count / self.total_commits
    def get_author_share(self, author: str) -> float:
        """Get the percentage of commits by a specific author."""
        if not self.author_commits or self.total_commits == 0:
            return 0.0
        return self.author_commits.get(author, 0) / self.total_commits
--- a/src/repohealth/models/result.py
+++ b/src/repohealth/models/result.py
@@ -0,0 +1,65 @@
 """Repository analysis result models."""
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
 class RiskLevel(Enum):
    """Risk classification levels."""
    CRITICAL = "critical"
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
    UNKNOWN = "unknown"
@dataclass
 class RepositoryResult:
    """Complete analysis result for a repository."""
    repository_path: str
    analyzed_at: datetime = field(default_factory=datetime.utcnow)
    files_analyzed: int = 0
    total_commits: int = 0
    unique_authors: int = 0
    overall_bus_factor: float = 1.0
    gini_coefficient: float = 0.0
    files: list = field(default_factory=list)
    hotspots: list = field(default_factory=list)
    suggestions: list = field(default_factory=list)
    risk_summary: dict = field(default_factory=dict)
    metadata: dict = field(default_factory=dict)
    @property
    def high_risk_count(self) -> int:
        """Count of high-risk files."""
        return sum(1 for f in self.files if f.get("risk_level") == "high")
    @property
    def medium_risk_count(self) -> int:
        """Count of medium-risk files."""
        return sum(1 for f in self.files if f.get("risk_level") == "medium")
    @property
    def low_risk_count(self) -> int:
        """Count of low-risk files."""
        return sum(1 for f in self.files if f.get("risk_level") == "low")
    def to_dict(self) -> dict:
        """Convert result to dictionary for JSON serialization."""
        return {
            "repository": self.repository_path,
            "analyzed_at": self.analyzed_at.isoformat(),
            "files_analyzed": self.files_analyzed,
            "total_commits": self.total_commits,
            "unique_authors": self.unique_authors,
            "bus_factor_overall": self.overall_bus_factor,
            "gini_coefficient": self.gini_coefficient,
            "files": self.files,
            "hotspots": self.hotspots,
            "suggestions": self.suggestions,
            "risk_summary": self.risk_summary,
            "metadata": self.metadata
        }
--- a/src/repohealth/reporters/init.py
+++ b/src/repohealth/reporters/init.py
@@ -0,0 +1,7 @@
 """Reporting modules for different output formats."""
 from repohealth.reporters.html_reporter import HTMLReporter
 from repohealth.reporters.json_reporter import JSONReporter
 from repohealth.reporters.terminal import TerminalReporter
 __all__ = ["TerminalReporter", "JSONReporter", "HTMLReporter"]
--- a/src/repohealth/reporters/html_reporter.py
+++ b/src/repohealth/reporters/html_reporter.py
@@ -0,0 +1,348 @@
 """HTML reporter using Jinja2 templates."""
 from datetime import datetime
 from pathlib import Path
 from typing import Optional
 from jinja2 import Environment, FileSystemLoader, Template
 from repohealth.models.result import RepositoryResult
 class HTMLReporter:
    """Reporter for HTML output with visualizations."""
    RISK_COLORS = {
        "critical": "#dc3545",
        "high": "#fd7e14",
        "medium": "#ffc107",
        "low": "#28a745",
        "unknown": "#6c757d"
    }
    def __init__(self, template_dir: Optional[str] = None):
        """Initialize the reporter.
        Args:
            template_dir: Directory containing Jinja2 templates.
        """
        if template_dir:
            self.template_dir = Path(template_dir)
        else:
            self.template_dir = Path(__file__).parent / "templates"
        self.env = Environment(
            loader=FileSystemLoader(str(self.template_dir)),
            autoescape=True
        )
    def generate(self, result: RepositoryResult) -> str:
        """Generate HTML output from a result.
        Args:
            result: RepositoryResult to convert.
        Returns:
            HTML string.
        """
        template = self.env.get_template("report.html")
        return template.render(
            result=result,
            risk_colors=self.RISK_COLORS,
            generated_at=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
        )
    def save(self, result: RepositoryResult, file_path: str) -> None:
        """Save HTML output to a file.
        Args:
            result: RepositoryResult to save.
            file_path: Path to output file.
        """
        html_content = self.generate(result)
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(html_content)
        self._copy_assets(Path(file_path).parent)
    def _copy_assets(self, output_dir: Path) -> None:
        """Copy CSS/JS assets to output directory.
        Args:
            output_dir: Directory to copy assets to.
        """
        assets_dir = output_dir / "assets"
        assets_dir.mkdir(exist_ok=True)
        template_assets = self.template_dir / "assets"
        if template_assets.exists():
            for asset in template_assets.iterdir():
                dest = assets_dir / asset.name
                dest.write_text(asset.read_text())
    def generate_charts_data(self, result: RepositoryResult) -> dict:
        """Generate data for JavaScript charts.
        Args:
            result: RepositoryResult to analyze.
        Returns:
            Dictionary with chart data.
        """
        risk_summary = result.risk_summary
        risk_distribution = {
            "labels": ["Critical", "High", "Medium", "Low"],
            "data": [
                risk_summary.get("critical", 0),
                risk_summary.get("high", 0),
                risk_summary.get("medium", 0),
                risk_summary.get("low", 0)
            ],
            "colors": [
                self.RISK_COLORS["critical"],
                self.RISK_COLORS["high"],
                self.RISK_COLORS["medium"],
                self.RISK_COLORS["low"]
            ]
        }
        def get_hotspot_attr(h, attr, default=None):
            """Get attribute from hotspot dict or object."""
            if isinstance(h, dict):
                return h.get(attr, default)
            return getattr(h, attr, default)
        top_hotspots = [
            {
                "file": get_hotspot_attr(h, "file_path", "")[:30],
                "author": get_hotspot_attr(h, "top_author", "")[:20],
                "share": round(get_hotspot_attr(h, "top_author_share", 0) * 100, 1),
                "risk": get_hotspot_attr(h, "risk_level", "unknown")
            }
            for h in result.hotspots[:10]
        ]
        file_data = [
            {
                "name": f.get("path", "")[:30],
                "commits": f.get("total_commits", 0),
                "authors": f.get("num_authors", 0),
                "bus_factor": round(f.get("bus_factor", 1), 2),
                "risk": f.get("risk_level", "unknown")
            }
            for f in sorted(
                result.files,
                key=lambda x: (
                    {"critical": 0, "high": 1, "medium": 2, "low": 3}.get(
                        x.get("risk_level"), 4
                    ),
                    -x.get("bus_factor", 1)
                )
            )[:20]
        ]
        return {
            "risk_distribution": risk_distribution,
            "top_hotspots": top_hotspots,
            "file_data": file_data,
            "summary": {
                "bus_factor": round(result.overall_bus_factor, 2),
                "gini": round(result.gini_coefficient, 3),
                "files": result.files_analyzed,
                "authors": result.unique_authors
            }
        }
    def create_inline_template(self) -> Template:
        """Create an inline template for standalone HTML reports.
        Returns:
            Jinja2 Template with inline CSS/JS.
        """
        template_str = """
 <!DOCTYPE html>
 <html>
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Repository Health Report</title>
    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
    <style>
        * { box-sizing: border-box; margin: 0; padding: 0; }
        body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; line-height: 1.6; color: #333; background: #f5f5f5; }
        .container { max-width: 1200px; margin: 0 auto; padding: 20px; }
        .header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 30px; border-radius: 10px; margin-bottom: 20px; }
        .header h1 { font-size: 2em; margin-bottom: 10px; }
        .meta { opacity: 0.9; font-size: 0.9em; }
        .grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 20px; margin-bottom: 20px; }
        .card { background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
        .card h2 { color: #333; margin-bottom: 15px; border-bottom: 2px solid #667eea; padding-bottom: 10px; }
        .stat { display: flex; justify-content: space-between; padding: 8px 0; border-bottom: 1px solid #eee; }
        .stat:last-child { border-bottom: none; }
        .stat-label { color: #666; }
        .stat-value { font-weight: bold; }
        .badge { padding: 4px 12px; border-radius: 20px; font-size: 0.8em; font-weight: bold; color: white; }
        .badge-critical { background: #dc3545; }
        .badge-high { background: #fd7e14; }
        .badge-medium { background: #ffc107; color: #333; }
        .badge-low { background: #28a745; }
        table { width: 100%; border-collapse: collapse; }
        th, td { padding: 10px; text-align: left; border-bottom: 1px solid #eee; }
        th { background: #f8f9fa; font-weight: 600; }
        tr:hover { background: #f8f9fa; }
        .chart-container { position: relative; height: 250px; margin: 20px 0; }
        .suggestion { background: #f8f9fa; padding: 15px; border-radius: 8px; margin-bottom: 10px; border-left: 4px solid #667eea; }
        .suggestion-priority-critical { border-left-color: #dc3545; }
        .suggestion-priority-high { border-left-color: #fd7e14; }
        .suggestion-priority-medium { border-left-color: #ffc107; }
        .progress-bar { background: #e9ecef; border-radius: 10px; overflow: hidden; height: 20px; }
        .progress-fill { height: 100%; border-radius: 10px; transition: width 0.3s; }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="header">
            <h1>Repository Health Report</h1>
            <p class="meta">{{ result.repository_path }}</p>
            <p class="meta">Generated: {{ generated_at }}</p>
        </div>
        <div class="grid">
            <div class="card">
                <h2>Summary</h2>
                <div class="stat"><span class="stat-label">Files Analyzed</span><span class="stat-value">{{ result.files_analyzed }}</span></div>
                <div class="stat"><span class="stat-label">Total Commits</span><span class="stat-value">{{ result.total_commits }}</span></div>
                <div class="stat"><span class="stat-label">Unique Authors</span><span class="stat-value">{{ result.unique_authors }}</span></div>
                <div class="stat"><span class="stat-label">Bus Factor</span><span class="stat-value">{{ "%.2f"|format(result.overall_bus_factor) }}</span></div>
                <div class="stat"><span class="stat-label">Gini Coefficient</span><span class="stat-value">{{ "%.3f"|format(result.gini_coefficient) }}</span></div>
            </div>
            <div class="card">
                <h2>Risk Distribution</h2>
                <div class="stat"><span class="stat-label">Critical</span><span class="stat-value"><span class="badge badge-critical">{{ result.risk_summary.get('critical', 0) }}</span></span></div>
                <div class="stat"><span class="stat-label">High</span><span class="stat-value"><span class="badge badge-high">{{ result.risk_summary.get('high', 0) }}</span></span></div>
                <div class="stat"><span class="stat-label">Medium</span><span class="stat-value"><span class="badge badge-medium">{{ result.risk_summary.get('medium', 0) }}</span></span></div>
                <div class="stat"><span class="stat-label">Low</span><span class="stat-value"><span class="badge badge-low">{{ result.risk_summary.get('low', 0) }}</span></span></div>
            </div>
            <div class="card">
                <h2>Risk by Percentage</h2>
                <p style="margin-bottom: 10px;">Critical: {{ "%.1f"|format(result.risk_summary.get('percentage_critical', 0)) }}%</p>
                <div class="progress-bar"><div class="progress-fill" style="width: {{ result.risk_summary.get('percentage_critical', 0) }}%; background: #dc3545;"></div></div>
                <p style="margin: 10px 0 5px;">High: {{ "%.1f"|format(result.risk_summary.get('percentage_high', 0)) }}%</p>
                <div class="progress-bar"><div class="progress-fill" style="width: {{ result.risk_summary.get('percentage_high', 0) }}%; background: #fd7e14;"></div></div>
            </div>
        </div>
        <div class="grid">
            <div class="card">
                <h2>Risk Distribution Chart</h2>
                <div class="chart-container">
                    <canvas id="riskChart"></canvas>
                </div>
            </div>
            <div class="card">
                <h2>Top Knowledge Hotspots</h2>
                <table>
                    <thead><tr><th>File</th><th>Author</th><th>Share</th><th>Risk</th></tr></thead>
                    <tbody>
                        {% for hotspot in result.hotspots[:10] %}
                        <tr>
                            <td>{{ hotspot.file_path[:30] }}</td>
                            <td>{{ hotspot.top_author[:15] }}</td>
                            <td>{{ "%.0f"|format(hotspot.top_author_share * 100) }}%</td>
                            <td><span class="badge badge-{{ hotspot.risk_level }}">{{ hotspot.risk_level }}</span></td>
                        </tr>
                        {% endfor %}
                    </tbody>
                </table>
            </div>
        </div>
        {% if result.suggestions %}
        <div class="card">
            <h2>Diversification Suggestions</h2>
            {% for suggestion in result.suggestions %}
            <div class="suggestion suggestion-priority-{{ suggestion.priority }}">
                <strong>{{ suggestion.priority|upper }}</strong>: {{ suggestion.action }}
            </div>
            {% endfor %}
        </div>
        {% endif %}
        <div class="card">
            <h2>All Analyzed Files</h2>
            <table>
                <thead><tr><th>File</th><th>Commits</th><th>Authors</th><th>Bus Factor</th><th>Risk</th></tr></thead>
                <tbody>
                    {% for file in result.files[:30] %}
                    <tr>
                        <td>{{ file.path[:40] }}</td>
                        <td>{{ file.total_commits }}</td>
                        <td>{{ file.num_authors }}</td>
                        <td>{{ "%.2f"|format(file.bus_factor) }}</td>
                        <td><span class="badge badge-{{ file.risk_level }}">{{ file.risk_level }}</span></td>
                    </tr>
                    {% endfor %}
                </tbody>
            </table>
        </div>
    </div>
    <script>
        const riskData = {
            labels: ['Critical', 'High', 'Medium', 'Low'],
            datasets: [{
                data: [
                    {{ result.risk_summary.get('critical', 0) }},
                    {{ result.risk_summary.get('high', 0) }},
                    {{ result.risk_summary.get('medium', 0) }},
                    {{ result.risk_summary.get('low', 0) }}
                ],
                backgroundColor: ['#dc3545', '#fd7e14', '#ffc107', '#28a745']
            }]
        };
        new Chart(document.getElementById('riskChart'), {
            type: 'doughnut',
            data: riskData,
            options: { responsive: true, maintainAspectRatio: false }
        });
    </script>
 </body>
 </html>
 """
        return self.env.from_string(template_str)
    def generate_standalone(self, result: RepositoryResult) -> str:
        """Generate standalone HTML with inline resources.
        Args:
            result: RepositoryResult to convert.
        Returns:
            Complete HTML string.
        """
        template = self.create_inline_template()
        charts_data = self.generate_charts_data(result)
        return template.render(
            result=result,
            generated_at=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"),
            charts_data=charts_data
        )
    def save_standalone(self, result: RepositoryResult, file_path: str) -> None:
        """Save standalone HTML to a file.
        Args:
            result: RepositoryResult to save.
            file_path: Path to output file.
        """
        html_content = self.generate_standalone(result)
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(html_content)
--- a/src/repohealth/reporters/json_reporter.py
+++ b/src/repohealth/reporters/json_reporter.py
@@ -0,0 +1,132 @@
 """JSON reporter for machine-readable output."""
 import json
 from repohealth.analyzers.risk_analyzer import DiversificationSuggestion, Hotspot
 from repohealth.models.file_stats import FileAnalysis
 from repohealth.models.result import RepositoryResult
 class JSONReporter:
    """Reporter for JSON output."""
    def __init__(self, indent: int = 2):
        """Initialize the reporter.
        Args:
            indent: JSON indentation level.
        """
        self.indent = indent
    def generate(self, result: RepositoryResult) -> str:
        """Generate JSON output from a result.
        Args:
            result: RepositoryResult to convert.
        Returns:
            JSON string.
        """
        output = {
            "version": "1.0",
            "repository": result.repository_path,
            "analyzed_at": result.analyzed_at.isoformat(),
            "files_analyzed": result.files_analyzed,
            "summary": {
                "files_analyzed": result.files_analyzed,
                "total_commits": result.total_commits,
                "unique_authors": result.unique_authors,
                "overall_bus_factor": round(result.overall_bus_factor, 2),
                "gini_coefficient": round(result.gini_coefficient, 3),
                "overall_risk": result.risk_summary.get("overall_risk", "unknown")
            },
            "risk_summary": result.risk_summary,
            "files": result.files,
            "hotspots": result.hotspots,
            "suggestions": result.suggestions,
            "metadata": result.metadata
        }
        indent = self.indent if self.indent else None
        return json.dumps(output, indent=indent, default=str)
    def save(self, result: RepositoryResult, file_path: str) -> None:
        """Save JSON output to a file.
        Args:
            result: RepositoryResult to save.
            file_path: Path to output file.
        """
        json_str = self.generate(result)
        with open(file_path, 'w') as f:
            f.write(json_str)
    def generate_file_dict(self, analysis: FileAnalysis) -> dict:
        """Convert a FileAnalysis to a dictionary.
        Args:
            analysis: FileAnalysis to convert.
        Returns:
            Dictionary representation.
        """
        return {
            "path": analysis.path,
            "total_commits": analysis.total_commits,
            "num_authors": analysis.num_authors,
            "author_commits": analysis.author_commits,
            "gini_coefficient": round(analysis.gini_coefficient, 3),
            "bus_factor": round(analysis.bus_factor, 2),
            "risk_level": analysis.risk_level,
            "top_author_share": round(analysis.top_author_share, 3),
            "module": analysis.module,
            "extension": analysis.extension,
            "first_commit": (
                analysis.first_commit.isoformat()
                if analysis.first_commit else None
            ),
            "last_commit": (
                analysis.last_commit.isoformat()
                if analysis.last_commit else None
            )
        }
    def generate_hotspot_dict(self, hotspot: Hotspot) -> dict:
        """Convert a Hotspot to a dictionary.
        Args:
            hotspot: Hotspot to convert.
        Returns:
            Dictionary representation.
        """
        return {
            "file_path": hotspot.file_path,
            "risk_level": hotspot.risk_level,
            "bus_factor": round(hotspot.bus_factor, 2),
            "top_author": hotspot.top_author,
            "top_author_share": round(hotspot.top_author_share, 3),
            "total_commits": hotspot.total_commits,
            "num_authors": hotspot.num_authors,
            "module": hotspot.module,
            "suggestion": hotspot.suggestion
        }
    def generate_suggestion_dict(self, suggestion: DiversificationSuggestion) -> dict:
        """Convert a DiversificationSuggestion to a dictionary.
        Args:
            suggestion: Suggestion to convert.
        Returns:
            Dictionary representation.
        """
        return {
            "file_path": suggestion.file_path,
            "current_author": suggestion.current_author,
            "suggested_authors": suggestion.suggested_authors,
            "priority": suggestion.priority,
            "reason": suggestion.reason,
            "action": suggestion.action
        }
--- a/src/repohealth/reporters/terminal.py
+++ b/src/repohealth/reporters/terminal.py
@@ -0,0 +1,253 @@
 """Terminal reporter using Rich library."""
 from typing import Optional
 from rich.box import ROUNDED
 from rich.console import Console
 from rich.panel import Panel
 from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
 from rich.table import Table
 from rich.text import Text
 from repohealth.models.result import RepositoryResult
 class TerminalReporter:
    """Reporter for terminal output using Rich."""
    RISK_COLORS = {
        "critical": "red",
        "high": "orange3",
        "medium": "yellow",
        "low": "green",
        "unknown": "grey"
    }
    def __init__(self, console: Optional[Console] = None):
        """Initialize the reporter.
        Args:
            console: Rich Console instance.
        """
        self.console = console or Console()
    def display_result(self, result: RepositoryResult) -> None:
        """Display a complete analysis result.
        Args:
            result: RepositoryResult to display.
        """
        self.console.print(Panel(
            self._get_overview_text(result),
            title="Repository Health Analysis",
            subtitle=f"Analyzed: {result.analyzed_at.strftime('%Y-%m-%d %H:%M')}",
            expand=False
        ))
        self._display_risk_summary(result)
        self._display_file_stats(result)
        self._display_hotspots(result)
        self._display_suggestions(result)
    def _get_overview_text(self, result: RepositoryResult) -> Text:
        """Get overview text for the result.
        Args:
            result: RepositoryResult to display.
        Returns:
            Rich Text object.
        """
        text = Text()
        text.append("Repository: ", style="bold")
        text.append(f"{result.repository_path}\n")
        text.append("Files Analyzed: ", style="bold")
        text.append(f"{result.files_analyzed}\n")
        text.append("Total Commits: ", style="bold")
        text.append(f"{result.total_commits}\n")
        text.append("Unique Authors: ", style="bold")
        text.append(f"{result.unique_authors}\n")
        text.append("Overall Bus Factor: ", style="bold")
        text.append(f"{result.overall_bus_factor:.2f}\n")
        text.append("Gini Coefficient: ", style="bold")
        text.append(f"{result.gini_coefficient:.3f}\n")
        return text
    def _display_risk_summary(self, result: RepositoryResult) -> None:
        """Display risk summary.
        Args:
            result: RepositoryResult to display.
        """
        summary = result.risk_summary
        if not summary:
            return
        table = Table(title="Risk Summary", box=ROUNDED)
        table.add_column("Risk Level", justify="center")
        table.add_column("Count", justify="center")
        table.add_column("Percentage", justify="center")
        levels = ["critical", "high", "medium", "low"]
        for level in levels:
            count = summary.get(level, 0)
            pct = summary.get(f"percentage_{level}", 0)
            color = self.RISK_COLORS.get(level, "grey")
            table.add_row(
                f"[{color}]{level.upper()}[/]",
                str(count),
                f"{pct:.1f}%"
            )
        self.console.print(Panel(table, title="Risk Overview", expand=False))
    def _display_file_stats(self, result: RepositoryResult) -> None:
        """Display file statistics table.
        Args:
            result: RepositoryResult to display.
        """
        if not result.files:
            return
        table = Table(title="Top Files by Risk", box=ROUNDED)
        table.add_column("File", style="dim", width=40)
        table.add_column("Commits", justify="right")
        table.add_column("Authors", justify="right")
        table.add_column("Bus Factor", justify="right")
        table.add_column("Risk", justify="center")
        table.add_column("Top Author %", justify="right")
        sorted_files = sorted(
            result.files,
            key=lambda x: (
                {"critical": 0, "high": 1, "medium": 2, "low": 3}.get(x.get("risk_level"), 4),
                -x.get("bus_factor", 1)
            )
        )[:15]
        for file_data in sorted_files:
            risk_level = file_data.get("risk_level", "unknown")
            color = self.RISK_COLORS.get(risk_level, "grey")
            table.add_row(
                file_data.get("path", "")[:40],
                str(file_data.get("total_commits", 0)),
                str(file_data.get("num_authors", 0)),
                f"{file_data.get('bus_factor', 1):.2f}",
                f"[{color}]{risk_level.upper()}[/]",
                f"{file_data.get('top_author_share', 0):.0%}"
            )
        self.console.print(Panel(table, title="File Analysis", expand=False))
    def _display_hotspots(self, result: RepositoryResult) -> None:
        """Display knowledge hotspots.
        Args:
            result: RepositoryResult to display.
        """
        if not result.hotspots:
            return
        table = Table(title="Knowledge Hotspots", box=ROUNDED)
        table.add_column("File", style="dim", width=35)
        table.add_column("Top Author", width=20)
        table.add_column("Ownership", justify="right")
        table.add_column("Bus Factor", justify="right")
        table.add_column("Risk", justify="center")
        for hotspot in result.hotspots[:10]:
            color = self.RISK_COLORS.get(hotspot.risk_level, "grey")
            table.add_row(
                hotspot.file_path[:35],
                hotspot.top_author[:20],
                f"{hotspot.top_author_share:.0%}",
                f"{hotspot.bus_factor:.2f}",
                f"[{color}]{hotspot.risk_level.upper()}[/]"
            )
        self.console.print(Panel(table, title="Hotspots", expand=False))
    def _display_suggestions(self, result: RepositoryResult) -> None:
        """Display diversification suggestions.
        Args:
            result: RepositoryResult to display.
        """
        if not result.suggestions:
            return
        table = Table(title="Diversification Suggestions", box=ROUNDED)
        table.add_column("Priority", width=10)
        table.add_column("File", style="dim", width=30)
        table.add_column("Action", width=40)
        priority_colors = {
            "critical": "red",
            "high": "orange3",
            "medium": "yellow"
        }
        for suggestion in result.suggestions[:10]:
            color = priority_colors.get(suggestion.priority, "grey")
            table.add_row(
                f"[{color}]{suggestion.priority.upper()}[/]",
                suggestion.file_path[:30],
                suggestion.action[:40]
            )
        self.console.print(Panel(table, title="Suggestions", expand=False))
    def display_progress(self, message: str) -> Progress:
        """Display a progress indicator.
        Args:
            message: Progress message.
        Returns:
            Progress instance for updating.
        """
        return Progress(
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
            BarColumn(),
            TaskProgressColumn(),
            console=self.console
        )
    def display_error(self, message: str) -> None:
        """Display an error message.
        Args:
            message: Error message to display.
        """
        self.console.print(Panel(
            Text(message, style="red"),
            title="Error",
            expand=False
        ))
    def display_warning(self, message: str) -> None:
        """Display a warning message.
        Args:
            message: Warning message to display.
        """
        self.console.print(Panel(
            Text(message, style="yellow"),
            title="Warning",
            expand=False
        ))
    def display_info(self, message: str) -> None:
        """Display an info message.
        Args:
            message: Info message to display.
        """
        self.console.print(Panel(
            Text(message, style="blue"),
            title="Info",
            expand=False
        ))
--- a/tests/init.py
+++ b/tests/init.py
@@ -0,0 +1 @@
 """Tests for the RepoHealth CLI."""
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,133 @@
 """Pytest configuration and fixtures."""
 import shutil
 import tempfile
 from pathlib import Path
 import pytest
 from git import Repo
@pytest.fixture
 def sample_git_repo():
    """Create a sample Git repository for testing.
    Creates a temporary directory with a Git repository containing
    multiple files and commits from different authors.
    Returns:
        Path to the temporary repository.
    """
    temp_dir = tempfile.mkdtemp(prefix="repohealth_test_")
    repo_path = Path(temp_dir)
    repo = Repo.init(repo_path)
    config = repo.config_writer()
    config.set_value("user", "name", "Test Author 1")
    config.set_value("user", "email", "author1@example.com")
    config.release()
    (repo_path / "main.py").write_text("# Main module\n\ndef hello():\n    return 'Hello'\n")
    (repo_path / "utils.py").write_text("# Utility functions\n\ndef helper():\n    return True\n")
    (repo_path / "test_main.py").write_text("# Tests for main\n\ndef test_hello():\n    assert hello() == 'Hello'\n")
    repo.index.add(["main.py", "utils.py", "test_main.py"])
    repo.index.commit("Initial commit with main files")
    config = repo.config_writer()
    config.set_value("user", "name", "Test Author 2")
    config.set_value("user", "email", "author2@example.com")
    config.release()
    (repo_path / "main.py").write_text("# Main module\n\ndef hello():\n    return 'Hello'\n\ndef goodbye():\n    return 'Goodbye'\n")
    (repo_path / "utils.py").write_text("# Utility functions\n\ndef helper():\n    return True\n\ndef complex_func():\n    return 42\n")
    repo.index.add(["main.py", "utils.py"])
    repo.index.commit("Add goodbye function and complex_func")
    config = repo.config_writer()
    config.set_value("user", "name", "Test Author 1")
    config.set_value("user", "email", "author1@example.com")
    config.release()
    (repo_path / "main.py").write_text("# Main module\n\ndef hello():\n    return 'Hello'\n\ndef goodbye():\n    return 'Goodbye'\n\ndef greet(name):\n    return f'Hello, {name}'\n")
    repo.index.add(["main.py"])
    repo.index.commit("Add greet function")
    config = repo.config_writer()
    config.set_value("user", "name", "Test Author 3")
    config.set_value("user", "email", "author3@example.com")
    config.release()
    (repo_path / "helpers.py").write_text("# Additional helpers\n\ndef new_helper():\n    return False\n")
    (repo_path / "test_helpers.py").write_text("# Tests for helpers\n\ndef test_new_helper():\n    assert new_helper() == False\n")
    repo.index.add(["helpers.py", "test_helpers.py"])
    repo.index.commit("Add helpers module")
    (repo_path / "core.py").write_text("# Core module - critical file\n\nclass CoreClass:\n    def __init__(self):\n        self.data = []\n\n    def process(self, item):\n        self.data.append(item)\n")
    repo.index.add(["core.py"])
    repo.index.commit("Add core module")
    yield repo_path
    shutil.rmtree(temp_dir)
@pytest.fixture
 def single_author_repo():
    """Create a repository with single author for critical risk testing.
    Returns:
        Path to the temporary repository.
    """
    temp_dir = tempfile.mkdtemp(prefix="repohealth_single_")
    repo_path = Path(temp_dir)
    repo = Repo.init(repo_path)
    config = repo.config_writer()
    config.set_value("user", "name", "Solo Author")
    config.set_value("user", "email", "solo@example.com")
    config.release()
    for i in range(10):
        (repo_path / f"module_{i}.py").write_text(f"# Module {i}\n\ndef func_{i}():\n    return {i}\n")
        repo.index.add([f"module_{i}.py"])
        repo.index.commit(f"Add module {i}")
    yield repo_path
    shutil.rmtree(temp_dir)
@pytest.fixture
 def empty_repo():
    """Create an empty Git repository.
    Returns:
        Path to the empty repository.
    """
    temp_dir = tempfile.mkdtemp(prefix="repohealth_empty_")
    repo_path = Path(temp_dir)
    Repo.init(repo_path)
    yield repo_path
    shutil.rmtree(temp_dir)
@pytest.fixture
 def temp_dir():
    """Provide a temporary directory for test artifacts.
    Returns:
        Path to a temporary directory.
    """
    temp_dir = tempfile.mkdtemp(prefix="repohealth_artifacts_")
    yield Path(temp_dir)
    shutil.rmtree(temp_dir)
--- a/tests/test_analyzers.py
+++ b/tests/test_analyzers.py
@@ -0,0 +1,267 @@
 """Tests for analyzer modules."""
 from repohealth.analyzers.bus_factor import BusFactorCalculator
 from repohealth.analyzers.risk_analyzer import RiskAnalyzer
 from repohealth.models.file_stats import FileAnalysis
 class TestBusFactorCalculator:
    """Tests for BusFactorCalculator."""
    def setup_method(self):
        """Set up test fixtures."""
        self.calculator = BusFactorCalculator()
    def test_calculate_gini_equal_distribution(self):
        """Test Gini coefficient with equal distribution."""
        values = [10, 10, 10, 10]
        gini = self.calculator.calculate_gini(values)
        assert gini == 0.0
    def test_calculate_gini_unequal_distribution(self):
        """Test Gini coefficient with unequal distribution."""
        values = [100, 0, 0, 0]
        gini = self.calculator.calculate_gini(values)
        assert gini > 0.5
        assert gini <= 1.0
    def test_calculate_gini_single_value(self):
        """Test Gini coefficient with single value."""
        values = [100]
        gini = self.calculator.calculate_gini(values)
        assert gini == 0.0
    def test_calculate_gini_empty_list(self):
        """Test Gini coefficient with empty list."""
        gini = self.calculator.calculate_gini([])
        assert gini == 0.0
    def test_calculate_file_bus_factor_single_author(self):
        """Test bus factor with single author."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=10,
            author_commits={"author@example.com": 10}
        )
        bus_factor = self.calculator.calculate_file_bus_factor(analysis)
        assert bus_factor == 1.0
    def test_calculate_file_bus_factor_multiple_authors(self):
        """Test bus factor with multiple authors."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=10,
            author_commits={"a@x.com": 5, "b@x.com": 5}
        )
        bus_factor = self.calculator.calculate_file_bus_factor(analysis)
        assert bus_factor > 1.0
    def test_calculate_file_bus_factor_no_commits(self):
        """Test bus factor with no commits."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=0,
            author_commits={}
        )
        bus_factor = self.calculator.calculate_file_bus_factor(analysis)
        assert bus_factor == 1.0
    def test_calculate_repository_bus_factor(self):
        """Test repository-level bus factor calculation."""
        files = [
            FileAnalysis(
                path="file1.py",
                total_commits=10,
                author_commits={"a@x.com": 10}
            ),
            FileAnalysis(
                path="file2.py",
                total_commits=10,
                author_commits={"a@x.com": 5, "b@x.com": 5}
            )
        ]
        bus_factor = self.calculator.calculate_repository_bus_factor(files)
        assert bus_factor > 1.0
    def test_assign_risk_levels(self):
        """Test risk level assignment."""
        files = [
            FileAnalysis(
                path="critical.py",
                total_commits=10,
                author_commits={"a@x.com": 10}
            ),
            FileAnalysis(
                path="low_risk.py",
                total_commits=10,
                author_commits={"a@x.com": 3, "b@x.com": 3, "c@x.com": 4}
            )
        ]
        assigned = self.calculator.assign_risk_levels(files)
        assert assigned[0].risk_level == "critical"
        assert assigned[1].risk_level == "low"
    def test_calculate_repository_gini(self):
        """Test repository-wide Gini coefficient."""
        files = [
            FileAnalysis(
                path="file1.py",
                total_commits=10,
                author_commits={"a@x.com": 10}
            ),
            FileAnalysis(
                path="file2.py",
                total_commits=10,
                author_commits={"b@x.com": 10}
            )
        ]
        gini = self.calculator.calculate_repository_gini(files)
        assert gini > 0
 class TestRiskAnalyzer:
    """Tests for RiskAnalyzer."""
    def setup_method(self):
        """Set up test fixtures."""
        self.analyzer = RiskAnalyzer()
    def test_identify_hotspots_critical(self):
        """Test hotspot identification for critical files."""
        files = [
            FileAnalysis(
                path="critical.py",
                total_commits=10,
                author_commits={"a@x.com": 9, "b@x.com": 1},
                bus_factor=1.1
            ),
            FileAnalysis(
                path="safe.py",
                total_commits=10,
                author_commits={"a@x.com": 4, "b@x.com": 6},
                bus_factor=2.0
            )
        ]
        hotspots = self.analyzer.identify_hotspots(files)
        assert len(hotspots) >= 1
        assert any(h.risk_level == "critical" for h in hotspots)
    def test_identify_hotspots_limit(self):
        """Test hotspot limit parameter."""
        files = [
            FileAnalysis(
                path=f"file{i}.py",
                total_commits=10,
                author_commits={"a@x.com": 9, "b@x.com": 1},
                bus_factor=1.1
            )
            for i in range(25)
        ]
        hotspots = self.analyzer.identify_hotspots(files, limit=10)
        assert len(hotspots) == 10
    def test_generate_suggestions(self):
        """Test diversification suggestions generation."""
        files = [
            FileAnalysis(
                path="file1.py",
                total_commits=10,
                author_commits={"a@x.com": 9, "b@x.com": 1}
            ),
            FileAnalysis(
                path="file2.py",
                total_commits=10,
                author_commits={"a@x.com": 5, "b@x.com": 5}
            )
        ]
        suggestions = self.analyzer.generate_suggestions(files)
        assert len(suggestions) > 0
    def test_calculate_risk_summary(self):
        """Test risk summary calculation."""
        files = [
            FileAnalysis(
                path="f1.py",
                total_commits=10,
                author_commits={"a@x.com": 10},
                risk_level="critical"
            ),
            FileAnalysis(
                path="f2.py",
                total_commits=10,
                author_commits={"a@x.com": 8, "b@x.com": 2},
                risk_level="high"
            ),
            FileAnalysis(
                path="f3.py",
                total_commits=10,
                author_commits={"a@x.com": 4, "b@x.com": 6},
                risk_level="medium"
            )
        ]
        summary = self.analyzer.calculate_risk_summary(files)
        assert summary["critical"] == 1
        assert summary["high"] == 1
        assert summary["medium"] == 1
        assert "overall_risk" in summary
    def test_calculate_risk_summary_empty(self):
        """Test risk summary with empty files."""
        summary = self.analyzer.calculate_risk_summary([])
        assert summary["overall_risk"] == "unknown"
    def test_analyze_module_risk(self):
        """Test module-level risk analysis."""
        files = [
            FileAnalysis(
                path="core/main.py",
                total_commits=10,
                author_commits={"a@x.com": 10},
                module="core",
                risk_level="critical"
            ),
            FileAnalysis(
                path="core/utils.py",
                total_commits=10,
                author_commits={"a@x.com": 10},
                module="core",
                risk_level="critical"
            ),
            FileAnalysis(
                path="tests/test.py",
                total_commits=10,
                author_commits={"a@x.com": 5, "b@x.com": 5},
                module="tests",
                risk_level="medium"
            )
        ]
        module_risk = self.analyzer.analyze_module_risk(files)
        assert "core" in module_risk
        assert "tests" in module_risk
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -0,0 +1,200 @@
 """Tests for CLI interface."""
 import json
 import tempfile
 from click.testing import CliRunner
 from repohealth.cli.cli import analyze, health, main, report
 class TestCLI:
    """Tests for CLI commands."""
    def test_main_help(self):
        """Test main command help."""
        runner = CliRunner()
        result = runner.invoke(main, ["--help"])
        assert result.exit_code == 0
        assert "RepoHealth CLI" in result.output
        assert "analyze" in result.output
        assert "report" in result.output
        assert "health" in result.output
    def test_analyze_help(self):
        """Test analyze command help."""
        runner = CliRunner()
        result = runner.invoke(analyze, ["--help"])
        assert result.exit_code == 0
        assert "--depth" in result.output
        assert "--path" in result.output
        assert "--extensions" in result.output
        assert "--json" in result.output
    def test_report_help(self):
        """Test report command help."""
        runner = CliRunner()
        result = runner.invoke(report, ["--help"])
        assert result.exit_code == 0
        assert "--format" in result.output
        assert "--output" in result.output
    def test_health_help(self):
        """Test health command help."""
        runner = CliRunner()
        result = runner.invoke(health, ["--help"])
        assert result.exit_code == 0
    def test_analyze_invalid_repo(self):
        """Test analyze with invalid repository path."""
        runner = CliRunner()
        result = runner.invoke(analyze, ["/nonexistent/path"])
        assert result.exit_code != 0
        assert "not a valid Git repository" in result.output
    def test_health_invalid_repo(self):
        """Test health with invalid repository path."""
        runner = CliRunner()
        result = runner.invoke(health, ["/nonexistent/path"])
        assert result.exit_code != 0
    def test_analyze_negative_depth(self):
        """Test analyze with negative depth option."""
        runner = CliRunner()
        with tempfile.TemporaryDirectory() as tmpdir:
            result = runner.invoke(analyze, [tmpdir, "--depth", "-5"])
        assert result.exit_code != 0
        assert "positive integer" in result.output
    def test_analyze_json_output(self, sample_git_repo, temp_dir):
        """Test analyze with JSON output."""
        runner = CliRunner()
        result = runner.invoke(analyze, [str(sample_git_repo), "--json"])
        assert result.exit_code == 0
        output = json.loads(result.output)
        assert "repository" in output
        assert "summary" in output
        assert "files" in output
    def test_analyze_json_to_file(self, sample_git_repo, temp_dir):
        """Test analyze saving JSON to file."""
        runner = CliRunner()
        output_file = temp_dir / "output.json"
        result = runner.invoke(
            analyze,
            [str(sample_git_repo), "--output", str(output_file)]
        )
        assert result.exit_code == 0
        assert output_file.exists()
        content = json.loads(output_file.read_text())
        assert "repository" in content
    def test_report_html_output(self, sample_git_repo, temp_dir):
        """Test report generating HTML output."""
        runner = CliRunner()
        output_file = temp_dir / "report.html"
        result = runner.invoke(
            report,
            [str(sample_git_repo), "--format", "html", "--output", str(output_file)]
        )
        assert result.exit_code == 0
        assert output_file.exists()
        html_content = output_file.read_text()
        assert "<html>" in html_content
        assert "Repository Health Report" in html_content
    def test_health_display(self, sample_git_repo):
        """Test health command display output."""
        runner = CliRunner()
        result = runner.invoke(health, [str(sample_git_repo)])
        assert result.exit_code == 0
        assert "Repository Health" in result.output or "Bus Factor" in result.output
    def test_analyze_with_extensions(self, sample_git_repo):
        """Test analyze with file extension filter."""
        runner = CliRunner()
        result = runner.invoke(
            analyze,
            [str(sample_git_repo), "--extensions", "py", "--json"]
        )
        assert result.exit_code == 0
        output = json.loads(result.output)
        assert output["files_analyzed"] >= 0
    def test_analyze_with_depth(self, sample_git_repo):
        """Test analyze with commit depth limit."""
        runner = CliRunner()
        result = runner.invoke(
            analyze,
            [str(sample_git_repo), "--depth", "2", "--json"]
        )
        assert result.exit_code == 0
        output = json.loads(result.output)
        assert "files_analyzed" in output
 class TestRepoHealthCLI:
    """Unit tests for RepoHealthCLI class."""
    def test_cli_initialization(self):
        """Test CLI class initialization."""
        from repohealth.cli.cli import RepoHealthCLI
        cli = RepoHealthCLI()
        assert cli.terminal_reporter is not None
        assert cli.json_reporter is not None
        assert cli.html_reporter is not None
    def test_analyze_repository_result_structure(self, sample_git_repo):
        """Test that analyze produces valid result structure."""
        from repohealth.cli.cli import RepoHealthCLI
        cli = RepoHealthCLI()
        result = cli.analyze_repository(str(sample_git_repo))
        assert result.repository_path is not None
        assert isinstance(result.files_analyzed, int)
        assert isinstance(result.total_commits, int)
        assert isinstance(result.unique_authors, int)
        assert isinstance(result.overall_bus_factor, float)
        assert result.files is not None
        assert result.risk_summary is not None
    def test_analyze_repository_min_commits(self, sample_git_repo):
        """Test analyze with min_commits filter."""
        from repohealth.cli.cli import RepoHealthCLI
        cli = RepoHealthCLI()
        result_all = cli.analyze_repository(
            str(sample_git_repo),
            min_commits=1
        )
        result_filtered = cli.analyze_repository(
            str(sample_git_repo),
            min_commits=100
        )
        assert result_all.files_analyzed >= result_filtered.files_analyzed
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -0,0 +1,202 @@
 """Tests for data models."""
 from repohealth.models.author import AuthorStats
 from repohealth.models.file_stats import FileAnalysis
 from repohealth.models.result import RepositoryResult
 class TestFileAnalysis:
    """Tests for FileAnalysis model."""
    def test_file_analysis_creation(self):
        """Test creating a FileAnalysis instance."""
        analysis = FileAnalysis(
            path="src/main.py",
            total_commits=10,
            author_commits={"author1@example.com": 6, "author2@example.com": 4}
        )
        assert analysis.path == "src/main.py"
        assert analysis.total_commits == 10
        assert analysis.num_authors == 2
        assert analysis.bus_factor == 1.0
    def test_num_authors(self):
        """Test num_authors property."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=5,
            author_commits={"a@x.com": 3, "b@x.com": 2}
        )
        assert analysis.num_authors == 2
    def test_num_authors_empty(self):
        """Test num_authors with empty commits."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=0,
            author_commits={}
        )
        assert analysis.num_authors == 0
    def test_top_author(self):
        """Test top_author property."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=10,
            author_commits={"a@x.com": 7, "b@x.com": 3}
        )
        top_author, count = analysis.top_author
        assert top_author == "a@x.com"
        assert count == 7
    def test_top_author_empty(self):
        """Test top_author with empty commits."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=0,
            author_commits={}
        )
        assert analysis.top_author is None
    def test_top_author_share(self):
        """Test top_author_share property."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=10,
            author_commits={"a@x.com": 8, "b@x.com": 2}
        )
        assert analysis.top_author_share == 0.8
    def test_top_author_share_empty(self):
        """Test top_author_share with no commits."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=0,
            author_commits={}
        )
        assert analysis.top_author_share == 0.0
    def test_get_author_share(self):
        """Test get_author_share method."""
        analysis = FileAnalysis(
            path="test.py",
            total_commits=10,
            author_commits={"a@x.com": 5, "b@x.com": 5}
        )
        assert analysis.get_author_share("a@x.com") == 0.5
        assert analysis.get_author_share("b@x.com") == 0.50
        assert analysis.get_author_share("c@x.com") == 0.0
    def test_module_and_extension(self):
        """Test module and extension extraction."""
        analysis = FileAnalysis(
            path="src/core/main.py",
            total_commits=5,
            author_commits={},
            module="src/core",
            extension="py"
        )
        assert analysis.module == "src/core"
        assert analysis.extension == "py"
 class TestAuthorStats:
    """Tests for AuthorStats model."""
    def test_author_stats_creation(self):
        """Test creating an AuthorStats instance."""
        stats = AuthorStats(
            name="Test Author",
            email="test@example.com",
            total_commits=100
        )
        assert stats.name == "Test Author"
        assert stats.email == "test@example.com"
        assert stats.total_commits == 100
        assert len(stats.files_touched) == 0
    def test_add_file(self):
        """Test adding a file contribution."""
        stats = AuthorStats(name="Test", email="test@test.com")
        stats.add_file("src/main.py", "src")
        assert "src/main.py" in stats.files_touched
        assert "src" in stats.modules_contributed
        assert stats.total_contributions == 1
    def test_merge(self):
        """Test merging two AuthorStats."""
        stats1 = AuthorStats(name="Test", email="test@test.com")
        stats1.total_commits = 10
        stats1.files_touched = {"file1.py"}
        stats2 = AuthorStats(name="Test", email="test@test.com")
        stats2.total_commits = 5
        stats2.files_touched = {"file2.py"}
        stats1.merge(stats2)
        assert stats1.total_commits == 15
        assert "file1.py" in stats1.files_touched
        assert "file2.py" in stats1.files_touched
 class TestRepositoryResult:
    """Tests for RepositoryResult model."""
    def test_repository_result_creation(self):
        """Test creating a RepositoryResult instance."""
        result = RepositoryResult(
            repository_path="/test/repo",
            files_analyzed=100,
            total_commits=500,
            unique_authors=5
        )
        assert result.repository_path == "/test/repo"
        assert result.files_analyzed == 100
        assert result.total_commits == 500
        assert result.unique_authors == 5
    def test_risk_count_properties(self):
        """Test risk count properties."""
        result = RepositoryResult(
            repository_path="/test/repo",
            files=[
                {"risk_level": "critical"},
                {"risk_level": "critical"},
                {"risk_level": "high"},
                {"risk_level": "high"},
                {"risk_level": "medium"},
                {"risk_level": "low"}
            ]
        )
        assert result.high_risk_count == 2
        assert result.medium_risk_count == 1
        assert result.low_risk_count == 1
    def test_to_dict(self):
        """Test to_dict serialization."""
        result = RepositoryResult(
            repository_path="/test/repo",
            files_analyzed=10,
            total_commits=50,
            unique_authors=3
        )
        result_dict = result.to_dict()
        assert result_dict["repository"] == "/test/repo"
        assert result_dict["files_analyzed"] == 10
        assert "analyzed_at" in result_dict
--- a/tests/test_reporters.py
+++ b/tests/test_reporters.py
@@ -0,0 +1,261 @@
 """Tests for reporter modules."""
 import json
 from repohealth.models.file_stats import FileAnalysis
 from repohealth.models.result import RepositoryResult
 from repohealth.reporters.html_reporter import HTMLReporter
 from repohealth.reporters.json_reporter import JSONReporter
 class TestJSONReporter:
    """Tests for JSONReporter."""
    def setup_method(self):
        """Set up test fixtures."""
        self.reporter = JSONReporter()
        self.sample_result = RepositoryResult(
            repository_path="/test/repo",
            files_analyzed=10,
            total_commits=100,
            unique_authors=5,
            overall_bus_factor=2.5,
            gini_coefficient=0.35,
            files=[
                {
                    "path": "src/main.py",
                    "total_commits": 20,
                    "num_authors": 2,
                    "author_commits": {"a@x.com": 15, "b@x.com": 5},
                    "bus_factor": 1.5,
                    "risk_level": "high",
                    "top_author_share": 0.75,
                    "module": "src",
                    "extension": "py"
                }
            ],
            hotspots=[
                {
                    "file_path": "src/main.py",
                    "risk_level": "high",
                    "bus_factor": 1.5,
                    "top_author": "a@x.com",
                    "top_author_share": 0.75,
                    "total_commits": 20,
                    "num_authors": 2,
                    "module": "src",
                    "suggestion": "Consider code reviews"
                }
            ],
            suggestions=[
                {
                    "file_path": "src/main.py",
                    "current_author": "a@x.com",
                    "suggested_authors": ["b@x.com"],
                    "priority": "high",
                    "reason": "High ownership concentration",
                    "action": "Assign reviews to b@x.com"
                }
            ],
            risk_summary={
                "critical": 0,
                "high": 1,
                "medium": 3,
                "low": 6,
                "percentage_critical": 0.0,
                "percentage_high": 10.0,
                "overall_risk": "low"
            }
        )
    def test_generate_json(self):
        """Test JSON generation."""
        json_output = self.reporter.generate(self.sample_result)
        assert isinstance(json_output, str)
        parsed = json.loads(json_output)
        assert parsed["repository"] == "/test/repo"
        assert parsed["summary"]["overall_bus_factor"] == 2.5
    def test_generate_file_dict(self):
        """Test file analysis to dictionary conversion."""
        analysis = FileAnalysis(
            path="src/main.py",
            total_commits=20,
            author_commits={"a@x.com": 15, "b@x.com": 5},
            bus_factor=1.5,
            risk_level="high",
            module="src",
            extension="py"
        )
        file_dict = self.reporter.generate_file_dict(analysis)
        assert file_dict["path"] == "src/main.py"
        assert file_dict["total_commits"] == 20
        assert file_dict["num_authors"] == 2
        assert file_dict["bus_factor"] == 1.5
    def test_save_json(self, temp_dir):
        """Test saving JSON to file."""
        output_file = temp_dir / "output.json"
        self.reporter.save(self.sample_result, str(output_file))
        assert output_file.exists()
        content = json.loads(output_file.read_text())
        assert content["repository"] == "/test/repo"
    def test_indent_parameter(self):
        """Test JSON indentation."""
        reporter_no_indent = JSONReporter(indent=0)
        json_output = reporter_no_indent.generate(self.sample_result)
        lines = json_output.strip().split("\n")
        assert len(lines) <= 2
    def test_json_contains_required_fields(self):
        """Test that JSON output contains all required fields."""
        json_output = self.reporter.generate(self.sample_result)
        parsed = json.loads(json_output)
        assert "version" in parsed
        assert "repository" in parsed
        assert "analyzed_at" in parsed
        assert "summary" in parsed
        assert "files" in parsed
        assert "hotspots" in parsed
        assert "suggestions" in parsed
 class TestHTMLReporter:
    """Tests for HTMLReporter."""
    def setup_method(self):
        """Set up test fixtures."""
        self.reporter = HTMLReporter()
        self.sample_result = RepositoryResult(
            repository_path="/test/repo",
            files_analyzed=10,
            total_commits=100,
            unique_authors=5,
            overall_bus_factor=2.5,
            gini_coefficient=0.35,
            files=[
                {
                    "path": "src/main.py",
                    "total_commits": 20,
                    "num_authors": 2,
                    "author_commits": {"a@x.com": 15, "b@x.com": 5},
                    "bus_factor": 1.5,
                    "risk_level": "high",
                    "top_author_share": 0.75,
                    "module": "src",
                    "extension": "py"
                }
            ],
            hotspots=[
                {
                    "file_path": "src/main.py",
                    "risk_level": "high",
                    "bus_factor": 1.5,
                    "top_author": "a@x.com",
                    "top_author_share": 0.75,
                    "total_commits": 20,
                    "num_authors": 2,
                    "module": "src",
                    "suggestion": "Consider code reviews"
                }
            ],
            suggestions=[
                {
                    "file_path": "src/main.py",
                    "current_author": "a@x.com",
                    "suggested_authors": ["b@x.com"],
                    "priority": "high",
                    "reason": "High ownership concentration",
                    "action": "Assign reviews to b@x.com"
                }
            ],
            risk_summary={
                "critical": 0,
                "high": 1,
                "medium": 3,
                "low": 6,
                "percentage_critical": 0.0,
                "percentage_high": 10.0,
                "overall_risk": "low"
            }
        )
    def test_generate_standalone(self):
        """Test standalone HTML generation."""
        html_output = self.reporter.generate_standalone(self.sample_result)
        assert isinstance(html_output, str)
        assert "<!doctype html>" in html_output.lower() or "<html>" in html_output.lower()
        assert "</html>" in html_output
    def test_standalone_contains_summary(self):
        """Test that standalone HTML contains summary section."""
        html_output = self.reporter.generate_standalone(self.sample_result)
        assert "repository health report" in html_output.lower()
    def test_standalone_contains_chart_data(self):
        """Test that standalone HTML includes Chart.js."""
        html_output = self.reporter.generate_standalone(self.sample_result)
        assert "chart.js" in html_output.lower()
    def test_save_standalone(self, temp_dir):
        """Test saving standalone HTML to file."""
        output_file = temp_dir / "report.html"
        self.reporter.save_standalone(self.sample_result, str(output_file))
        assert output_file.exists()
        content = output_file.read_text()
        assert "<!doctype html>" in content.lower() or "<html>" in content.lower()
    def test_generate_charts_data(self):
        """Test chart data generation."""
        charts_data = self.reporter.generate_charts_data(self.sample_result)
        assert "risk_distribution" in charts_data
        assert "top_hotspots" in charts_data
        assert "file_data" in charts_data
        assert "summary" in charts_data
    def test_risk_colors_defined(self):
        """Test that risk colors are properly defined."""
        assert "critical" in self.reporter.RISK_COLORS
        assert "high" in self.reporter.RISK_COLORS
        assert "medium" in self.reporter.RISK_COLORS
        assert "low" in self.reporter.RISK_COLORS
 class TestTerminalReporter:
    """Tests for TerminalReporter."""
    def test_reporter_initialization(self):
        """Test terminal reporter initialization."""
        from repohealth.reporters.terminal import TerminalReporter
        reporter = TerminalReporter()
        assert reporter.RISK_COLORS is not None
    def test_risk_colors_mapping(self):
        """Test risk color mappings."""
        from repohealth.reporters.terminal import TerminalReporter
        reporter = TerminalReporter()
        assert reporter.RISK_COLORS["critical"] == "red"
        assert reporter.RISK_COLORS["high"] == "orange3"
        assert reporter.RISK_COLORS["medium"] == "yellow"
        assert reporter.RISK_COLORS["low"] == "green"
		`@@ -0,0 +1,3 @@`
							`"""RepoHealth CLI - Git repository analysis tool for bus factor calculation."""`

							`__version__ = "1.0.0"`