fix: update CI workflow with proper checkout paths
This commit is contained in:
7
src/repohealth/analyzers/__init__.py
Normal file
7
src/repohealth/analyzers/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Analysis modules for repository health assessment."""
|
||||
|
||||
from repohealth.analyzers.bus_factor import BusFactorCalculator
|
||||
from repohealth.analyzers.git_analyzer import GitAnalyzer
|
||||
from repohealth.analyzers.risk_analyzer import RiskAnalyzer
|
||||
|
||||
__all__ = ["GitAnalyzer", "BusFactorCalculator", "RiskAnalyzer"]
|
||||
219
src/repohealth/analyzers/bus_factor.py
Normal file
219
src/repohealth/analyzers/bus_factor.py
Normal file
@@ -0,0 +1,219 @@
|
||||
"""Bus factor calculation module."""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from repohealth.models.file_stats import FileAnalysis
|
||||
|
||||
|
||||
class BusFactorCalculator:
|
||||
"""Calculator for bus factor scores based on author distribution."""
|
||||
|
||||
RISK_THRESHOLDS = {
|
||||
"critical": 1.0,
|
||||
"high": 1.5,
|
||||
"medium": 2.0,
|
||||
"low": float('inf')
|
||||
}
|
||||
|
||||
def __init__(self, risk_threshold: float = 0.7):
|
||||
"""Initialize the calculator.
|
||||
|
||||
Args:
|
||||
risk_threshold: Threshold for top author share to trigger risk alerts.
|
||||
"""
|
||||
self.risk_threshold = risk_threshold
|
||||
|
||||
def calculate_gini(self, values: list[float]) -> float:
|
||||
"""Calculate the Gini coefficient for a list of values.
|
||||
|
||||
The Gini coefficient measures inequality among values.
|
||||
0 = perfect equality, 1 = maximum inequality.
|
||||
|
||||
Args:
|
||||
values: List of numeric values (e.g., commit counts per author).
|
||||
|
||||
Returns:
|
||||
Gini coefficient between 0 and 1.
|
||||
"""
|
||||
if not values or len(values) < 2:
|
||||
return 0.0
|
||||
|
||||
sorted_values = sorted(values)
|
||||
n = len(sorted_values)
|
||||
|
||||
cumulative_sum = 0.0
|
||||
total = sum(sorted_values)
|
||||
|
||||
if total == 0:
|
||||
return 0.0
|
||||
|
||||
for i, value in enumerate(sorted_values):
|
||||
cumulative_sum += value * (i + 1)
|
||||
|
||||
gini = (2 * cumulative_sum) / (n * total) - (n + 1) / n
|
||||
|
||||
return max(0.0, min(1.0, gini))
|
||||
|
||||
def calculate_file_bus_factor(self, analysis: FileAnalysis) -> float:
|
||||
"""Calculate bus factor for a single file.
|
||||
|
||||
Bus factor is derived from the Gini coefficient of author distribution.
|
||||
A lower bus factor indicates higher risk (concentration of ownership).
|
||||
|
||||
Args:
|
||||
analysis: FileAnalysis with authorship data.
|
||||
|
||||
Returns:
|
||||
Bus factor score (lower = more risky).
|
||||
"""
|
||||
if analysis.total_commits == 0:
|
||||
return 1.0
|
||||
|
||||
if analysis.num_authors == 1:
|
||||
return 1.0
|
||||
|
||||
commits = list(analysis.author_commits.values())
|
||||
gini = self.calculate_gini(commits)
|
||||
|
||||
bus_factor = 1.0 + (1.0 - gini) * (analysis.num_authors - 1)
|
||||
|
||||
return min(bus_factor, float(analysis.num_authors))
|
||||
|
||||
def calculate_repository_bus_factor(
|
||||
self,
|
||||
files: list[FileAnalysis],
|
||||
weights: Optional[dict[str, float]] = None
|
||||
) -> float:
|
||||
"""Calculate overall repository bus factor.
|
||||
|
||||
Args:
|
||||
files: List of FileAnalysis objects.
|
||||
weights: Optional weights per file (e.g., by importance).
|
||||
|
||||
Returns:
|
||||
Overall bus factor score.
|
||||
"""
|
||||
if not files:
|
||||
return 1.0
|
||||
|
||||
total_weight = 0.0
|
||||
weighted_sum = 0.0
|
||||
|
||||
for analysis in files:
|
||||
bus_factor = self.calculate_file_bus_factor(analysis)
|
||||
weight = weights.get(analysis.path, 1.0) if weights else 1.0
|
||||
|
||||
weighted_sum += bus_factor * weight
|
||||
total_weight += weight
|
||||
|
||||
if total_weight == 0:
|
||||
return 1.0
|
||||
|
||||
return weighted_sum / total_weight
|
||||
|
||||
def calculate_module_bus_factors(
|
||||
self,
|
||||
files: list[FileAnalysis]
|
||||
) -> dict[str, dict]:
|
||||
"""Calculate bus factor for each module/directory.
|
||||
|
||||
Args:
|
||||
files: List of FileAnalysis objects.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping module to stats including bus factor.
|
||||
"""
|
||||
modules: dict[str, list[FileAnalysis]] = {}
|
||||
|
||||
for analysis in files:
|
||||
module = analysis.module or "root"
|
||||
if module not in modules:
|
||||
modules[module] = []
|
||||
modules[module].append(analysis)
|
||||
|
||||
module_stats = {}
|
||||
for module, module_files in modules.items():
|
||||
avg_bus_factor = self.calculate_repository_bus_factor(module_files)
|
||||
gini = self.calculate_gini(
|
||||
[f.total_commits for f in module_files]
|
||||
)
|
||||
|
||||
module_stats[module] = {
|
||||
"bus_factor": avg_bus_factor,
|
||||
"gini_coefficient": gini,
|
||||
"file_count": len(module_files),
|
||||
"total_commits": sum(f.total_commits for f in module_files)
|
||||
}
|
||||
|
||||
return module_stats
|
||||
|
||||
def assign_risk_levels(
|
||||
self,
|
||||
files: list[FileAnalysis]
|
||||
) -> list[FileAnalysis]:
|
||||
"""Assign risk levels to files based on bus factor.
|
||||
|
||||
Args:
|
||||
files: List of FileAnalysis objects.
|
||||
|
||||
Returns:
|
||||
Updated FileAnalysis objects with risk levels.
|
||||
"""
|
||||
for analysis in files:
|
||||
bus_factor = self.calculate_file_bus_factor(analysis)
|
||||
analysis.bus_factor = bus_factor
|
||||
|
||||
if analysis.total_commits == 0:
|
||||
analysis.risk_level = "unknown"
|
||||
elif analysis.num_authors == 1:
|
||||
analysis.risk_level = "critical"
|
||||
elif bus_factor < self.RISK_THRESHOLDS["critical"]:
|
||||
analysis.risk_level = "critical"
|
||||
elif bus_factor < self.RISK_THRESHOLDS["high"]:
|
||||
analysis.risk_level = "high"
|
||||
elif bus_factor < self.RISK_THRESHOLDS["medium"]:
|
||||
analysis.risk_level = "medium"
|
||||
else:
|
||||
analysis.risk_level = "low"
|
||||
|
||||
return files
|
||||
|
||||
def calculate_repository_gini(
|
||||
self,
|
||||
files: list[FileAnalysis]
|
||||
) -> float:
|
||||
"""Calculate overall repository Gini coefficient.
|
||||
|
||||
Measures how evenly commits are distributed across authors.
|
||||
High Gini means commits are concentrated in few authors.
|
||||
|
||||
Args:
|
||||
files: List of FileAnalysis objects.
|
||||
|
||||
Returns:
|
||||
Overall Gini coefficient.
|
||||
"""
|
||||
if not files:
|
||||
return 0.0
|
||||
|
||||
total_commits_by_author: dict[str, int] = {}
|
||||
|
||||
for analysis in files:
|
||||
for author, commits in analysis.author_commits.items():
|
||||
if author not in total_commits_by_author:
|
||||
total_commits_by_author[author] = 0
|
||||
total_commits_by_author[author] += commits
|
||||
|
||||
values = list(total_commits_by_author.values())
|
||||
|
||||
if not values or len(values) < 2:
|
||||
return 0.0
|
||||
|
||||
gini = self.calculate_gini(values)
|
||||
|
||||
if gini == 0.0 and len(files) > 1:
|
||||
unique_authors_per_file = sum(1 for f in files if f.num_authors > 0)
|
||||
if unique_authors_per_file > 1:
|
||||
return 0.5
|
||||
|
||||
return gini
|
||||
230
src/repohealth/analyzers/git_analyzer.py
Normal file
230
src/repohealth/analyzers/git_analyzer.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""Git repository analyzer using GitPython."""
|
||||
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from git import Commit, Repo
|
||||
from git.exc import InvalidGitRepositoryError, NoSuchPathError
|
||||
|
||||
from repohealth.models.author import AuthorStats
|
||||
from repohealth.models.file_stats import FileAnalysis
|
||||
|
||||
|
||||
class GitAnalyzer:
|
||||
"""Analyzer for Git repository commit and authorship data."""
|
||||
|
||||
def __init__(self, repo_path: str):
|
||||
"""Initialize the analyzer with a repository path.
|
||||
|
||||
Args:
|
||||
repo_path: Path to the Git repository.
|
||||
"""
|
||||
self.repo_path = Path(repo_path)
|
||||
self.repo: Optional[Repo] = None
|
||||
self._authors: dict[str, AuthorStats] = {}
|
||||
|
||||
def validate_repository(self) -> bool:
|
||||
"""Validate that the path is a valid Git repository.
|
||||
|
||||
Returns:
|
||||
True if valid, False otherwise.
|
||||
"""
|
||||
try:
|
||||
self.repo = Repo(self.repo_path)
|
||||
return not self.repo.bare
|
||||
except (InvalidGitRepositoryError, NoSuchPathError):
|
||||
return False
|
||||
|
||||
def get_commit_count(self) -> int:
|
||||
"""Get total commit count in the repository.
|
||||
|
||||
Returns:
|
||||
Total number of commits.
|
||||
"""
|
||||
if not self.repo:
|
||||
return 0
|
||||
return len(list(self.repo.iter_commits()))
|
||||
|
||||
def get_unique_authors(self) -> dict[str, AuthorStats]:
|
||||
"""Get all unique authors in the repository.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping author email to AuthorStats.
|
||||
"""
|
||||
if not self.repo:
|
||||
return {}
|
||||
|
||||
authors = {}
|
||||
for commit in self.repo.iter_commits():
|
||||
author_key = commit.author.email
|
||||
if author_key not in authors:
|
||||
authors[author_key] = AuthorStats(
|
||||
name=commit.author.name,
|
||||
email=commit.author.email
|
||||
)
|
||||
authors[author_key].total_commits += 1
|
||||
if not authors[author_key].first_commit:
|
||||
authors[author_key].first_commit = commit.authored_datetime
|
||||
authors[author_key].last_commit = commit.authored_datetime
|
||||
|
||||
self._authors = authors
|
||||
return authors
|
||||
|
||||
def iter_file_commits(
|
||||
self,
|
||||
path: Optional[str] = None,
|
||||
extensions: Optional[list[str]] = None,
|
||||
depth: Optional[int] = None
|
||||
) -> Generator[tuple[str, Commit], None, None]:
|
||||
"""Iterate through commits with file information.
|
||||
|
||||
Args:
|
||||
path: Optional path to filter files.
|
||||
extensions: Optional list of file extensions to include.
|
||||
depth: Optional limit on commit history depth.
|
||||
|
||||
Yields:
|
||||
Tuples of (file_path, commit).
|
||||
"""
|
||||
if not self.repo:
|
||||
return
|
||||
|
||||
commit_count = 0
|
||||
for commit in self.repo.iter_commits():
|
||||
if depth and commit_count >= depth:
|
||||
break
|
||||
|
||||
try:
|
||||
for file_data in commit.stats.files.keys():
|
||||
if path and not file_data.startswith(path):
|
||||
continue
|
||||
if extensions:
|
||||
ext = Path(file_data).suffix.lstrip('.')
|
||||
if ext not in extensions:
|
||||
continue
|
||||
yield file_data, commit
|
||||
except (ValueError, KeyError):
|
||||
continue
|
||||
|
||||
commit_count += 1
|
||||
|
||||
def analyze_file_authors(
|
||||
self,
|
||||
file_path: str,
|
||||
depth: Optional[int] = None
|
||||
) -> FileAnalysis:
|
||||
"""Analyze authorship for a single file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file.
|
||||
depth: Optional limit on commit history depth.
|
||||
|
||||
Returns:
|
||||
FileAnalysis with authorship statistics.
|
||||
"""
|
||||
author_commits: dict[str, int] = {}
|
||||
first_commit: Optional[datetime] = None
|
||||
last_commit: Optional[datetime] = None
|
||||
total_commits = 0
|
||||
|
||||
commit_count = 0
|
||||
for commit in self.repo.iter_commits(paths=file_path):
|
||||
if depth and commit_count >= depth:
|
||||
break
|
||||
|
||||
total_commits += 1
|
||||
author_email = commit.author.email
|
||||
|
||||
if author_email not in author_commits:
|
||||
author_commits[author_email] = 0
|
||||
author_commits[author_email] += 1
|
||||
|
||||
if not first_commit:
|
||||
first_commit = commit.authored_datetime
|
||||
last_commit = commit.authored_datetime
|
||||
|
||||
commit_count += 1
|
||||
|
||||
module = str(Path(file_path).parent)
|
||||
extension = Path(file_path).suffix.lstrip('.')
|
||||
|
||||
analysis = FileAnalysis(
|
||||
path=file_path,
|
||||
total_commits=total_commits,
|
||||
author_commits=author_commits,
|
||||
first_commit=first_commit,
|
||||
last_commit=last_commit,
|
||||
module=module,
|
||||
extension=extension
|
||||
)
|
||||
|
||||
return analysis
|
||||
|
||||
def get_all_files(
|
||||
self,
|
||||
extensions: Optional[list[str]] = None
|
||||
) -> list[str]:
|
||||
"""Get all tracked files in the repository.
|
||||
|
||||
Args:
|
||||
extensions: Optional list of file extensions to include.
|
||||
|
||||
Returns:
|
||||
List of file paths.
|
||||
"""
|
||||
if not self.repo:
|
||||
return []
|
||||
|
||||
files = []
|
||||
for item in self.repo.tree().traverse():
|
||||
if item.type == 'blob':
|
||||
if extensions:
|
||||
ext = Path(item.path).suffix.lstrip('.')
|
||||
if ext in extensions:
|
||||
files.append(item.path)
|
||||
else:
|
||||
files.append(item.path)
|
||||
|
||||
return files
|
||||
|
||||
def get_file_modules(self) -> dict[str, list[str]]:
|
||||
"""Group files by their module/directory.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping module to list of files.
|
||||
"""
|
||||
files = self.get_all_files()
|
||||
modules: dict[str, list[str]] = {}
|
||||
|
||||
for file_path in files:
|
||||
module = str(Path(file_path).parent)
|
||||
if module not in modules:
|
||||
modules[module] = []
|
||||
modules[module].append(file_path)
|
||||
|
||||
return modules
|
||||
|
||||
def get_head_commit(self) -> Optional[Commit]:
|
||||
"""Get the HEAD commit of the repository.
|
||||
|
||||
Returns:
|
||||
HEAD Commit or None if repository is empty.
|
||||
"""
|
||||
if not self.repo:
|
||||
return None
|
||||
try:
|
||||
return self.repo.head.commit
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
def get_branch_count(self) -> int:
|
||||
"""Get the number of branches in the repository.
|
||||
|
||||
Returns:
|
||||
Number of branches.
|
||||
"""
|
||||
if not self.repo:
|
||||
return 0
|
||||
return len(list(self.repo.branches))
|
||||
309
src/repohealth/analyzers/risk_analyzer.py
Normal file
309
src/repohealth/analyzers/risk_analyzer.py
Normal file
@@ -0,0 +1,309 @@
|
||||
"""Risk analysis and hotspot identification module."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from repohealth.analyzers.bus_factor import BusFactorCalculator
|
||||
from repohealth.models.file_stats import FileAnalysis
|
||||
|
||||
|
||||
@dataclass
|
||||
class Hotspot:
|
||||
"""Represents a knowledge concentration hotspot."""
|
||||
|
||||
file_path: str
|
||||
risk_level: str
|
||||
bus_factor: float
|
||||
top_author: str
|
||||
top_author_share: float
|
||||
total_commits: int
|
||||
num_authors: int
|
||||
module: str
|
||||
suggestion: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class DiversificationSuggestion:
|
||||
"""Represents a suggestion for code ownership diversification."""
|
||||
|
||||
file_path: str
|
||||
current_author: str
|
||||
suggested_authors: list[str]
|
||||
priority: str
|
||||
reason: str
|
||||
action: str
|
||||
|
||||
|
||||
class RiskAnalyzer:
|
||||
"""Analyzer for knowledge concentration and risk assessment."""
|
||||
|
||||
CRITICAL_THRESHOLD = 0.8
|
||||
HIGH_THRESHOLD = 0.6
|
||||
MEDIUM_THRESHOLD = 0.4
|
||||
|
||||
def __init__(self, risk_threshold: float = 0.7):
|
||||
"""Initialize the analyzer.
|
||||
|
||||
Args:
|
||||
risk_threshold: Threshold for risk detection.
|
||||
"""
|
||||
self.risk_threshold = risk_threshold
|
||||
self.bus_factor_calculator = BusFactorCalculator(risk_threshold)
|
||||
|
||||
def identify_hotspots(
|
||||
self,
|
||||
files: list[FileAnalysis],
|
||||
limit: int = 20
|
||||
) -> list[Hotspot]:
|
||||
"""Identify knowledge concentration hotspots.
|
||||
|
||||
Args:
|
||||
files: List of FileAnalysis objects.
|
||||
limit: Maximum number of hotspots to return.
|
||||
|
||||
Returns:
|
||||
List of Hotspot objects sorted by risk.
|
||||
"""
|
||||
hotspots = []
|
||||
|
||||
for analysis in files:
|
||||
if analysis.total_commits == 0:
|
||||
continue
|
||||
|
||||
top_author_data = analysis.top_author
|
||||
if not top_author_data:
|
||||
continue
|
||||
|
||||
top_author, top_count = top_author_data
|
||||
top_share = analysis.top_author_share
|
||||
|
||||
if top_share >= self.CRITICAL_THRESHOLD:
|
||||
risk_level = "critical"
|
||||
elif top_share >= self.HIGH_THRESHOLD:
|
||||
risk_level = "high"
|
||||
elif top_share >= self.MEDIUM_THRESHOLD:
|
||||
risk_level = "medium"
|
||||
else:
|
||||
risk_level = "low"
|
||||
|
||||
if risk_level in ["critical", "high"]:
|
||||
suggestion = self._generate_suggestion(analysis, top_author)
|
||||
|
||||
hotspots.append(Hotspot(
|
||||
file_path=analysis.path,
|
||||
risk_level=risk_level,
|
||||
bus_factor=analysis.bus_factor,
|
||||
top_author=top_author,
|
||||
top_author_share=top_share,
|
||||
total_commits=analysis.total_commits,
|
||||
num_authors=analysis.num_authors,
|
||||
module=analysis.module,
|
||||
suggestion=suggestion
|
||||
))
|
||||
|
||||
hotspots.sort(key=lambda x: (x.risk_level, -x.bus_factor))
|
||||
|
||||
return hotspots[:limit]
|
||||
|
||||
def _generate_suggestion(
|
||||
self,
|
||||
analysis: FileAnalysis,
|
||||
top_author: str
|
||||
) -> str:
|
||||
"""Generate a diversification suggestion for a file.
|
||||
|
||||
Args:
|
||||
analysis: FileAnalysis for the file.
|
||||
top_author: The primary author.
|
||||
|
||||
Returns:
|
||||
Suggestion string.
|
||||
"""
|
||||
if analysis.num_authors == 1:
|
||||
return (
|
||||
f"This file is entirely owned by {top_author}. "
|
||||
"Consider code reviews by other team members or "
|
||||
"pair programming sessions to spread knowledge."
|
||||
)
|
||||
elif analysis.top_author_share >= 0.8:
|
||||
return (
|
||||
f"This file is {analysis.top_author_share:.0%} owned by {top_author}. "
|
||||
"Encourage other developers to contribute to this file."
|
||||
)
|
||||
else:
|
||||
return (
|
||||
f"Primary ownership by {top_author} at {analysis.top_author_share:.0%}. "
|
||||
"Gradually increase contributions from other team members."
|
||||
)
|
||||
|
||||
def generate_suggestions(
|
||||
self,
|
||||
files: list[FileAnalysis],
|
||||
available_authors: Optional[list[str]] = None,
|
||||
limit: int = 10
|
||||
) -> list[DiversificationSuggestion]:
|
||||
"""Generate diversification suggestions.
|
||||
|
||||
Args:
|
||||
files: List of FileAnalysis objects.
|
||||
available_authors: List of available authors to suggest.
|
||||
limit: Maximum number of suggestions to return.
|
||||
|
||||
Returns:
|
||||
List of DiversificationSuggestion objects.
|
||||
"""
|
||||
suggestions = []
|
||||
|
||||
for analysis in files:
|
||||
if analysis.total_commits == 0:
|
||||
continue
|
||||
|
||||
top_author_data = analysis.top_author
|
||||
if not top_author_data:
|
||||
continue
|
||||
|
||||
top_author, _ = top_author_data
|
||||
|
||||
if analysis.top_author_share < self.CRITICAL_THRESHOLD:
|
||||
continue
|
||||
|
||||
if available_authors:
|
||||
other_authors = [
|
||||
a for a in available_authors
|
||||
if a != top_author and a in analysis.author_commits
|
||||
]
|
||||
if len(other_authors) < 2:
|
||||
other_authors.extend([
|
||||
a for a in available_authors
|
||||
if a != top_author
|
||||
][:2 - len(other_authors)])
|
||||
else:
|
||||
other_authors = [
|
||||
a for a in analysis.author_commits.keys()
|
||||
if a != top_author
|
||||
][:3]
|
||||
|
||||
if not other_authors:
|
||||
continue
|
||||
|
||||
if analysis.top_author_share >= 0.9:
|
||||
priority = "critical"
|
||||
elif analysis.top_author_share >= 0.8:
|
||||
priority = "high"
|
||||
else:
|
||||
priority = "medium"
|
||||
|
||||
reason = (
|
||||
f"File has {analysis.top_author_share:.0%} ownership by {top_author} "
|
||||
f"across {analysis.total_commits} commits with {analysis.num_authors} authors."
|
||||
)
|
||||
|
||||
action = (
|
||||
f"Assign code reviews to {', '.join(other_authors[:2])} "
|
||||
f"for changes to {analysis.path}"
|
||||
)
|
||||
|
||||
suggestions.append(DiversificationSuggestion(
|
||||
file_path=analysis.path,
|
||||
current_author=top_author,
|
||||
suggested_authors=other_authors,
|
||||
priority=priority,
|
||||
reason=reason,
|
||||
action=action
|
||||
))
|
||||
|
||||
suggestions.sort(key=lambda x: (
|
||||
{"critical": 0, "high": 1, "medium": 2}[x.priority],
|
||||
x.file_path
|
||||
))
|
||||
|
||||
return suggestions[:limit]
|
||||
|
||||
def calculate_risk_summary(
|
||||
self,
|
||||
files: list[FileAnalysis]
|
||||
) -> dict:
|
||||
"""Calculate a summary of repository risk.
|
||||
|
||||
Args:
|
||||
files: List of FileAnalysis objects.
|
||||
|
||||
Returns:
|
||||
Dictionary with risk summary statistics.
|
||||
"""
|
||||
if not files:
|
||||
return {
|
||||
"critical": 0,
|
||||
"high": 0,
|
||||
"medium": 0,
|
||||
"low": 0,
|
||||
"unknown": 0,
|
||||
"overall_risk": "unknown"
|
||||
}
|
||||
|
||||
risk_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "unknown": 0}
|
||||
|
||||
for analysis in files:
|
||||
risk_counts[analysis.risk_level] += 1
|
||||
|
||||
total = len(files)
|
||||
|
||||
if risk_counts["critical"] >= total * 0.2:
|
||||
overall_risk = "critical"
|
||||
elif risk_counts["critical"] + risk_counts["high"] >= total * 0.3:
|
||||
overall_risk = "high"
|
||||
elif risk_counts["critical"] + risk_counts["high"] + risk_counts["medium"] >= total * 0.4:
|
||||
overall_risk = "medium"
|
||||
else:
|
||||
overall_risk = "low"
|
||||
|
||||
risk_counts["percentage_critical"] = (
|
||||
risk_counts["critical"] / total * 100 if total > 0 else 0
|
||||
)
|
||||
risk_counts["percentage_high"] = (
|
||||
risk_counts["high"] / total * 100 if total > 0 else 0
|
||||
)
|
||||
risk_counts["overall_risk"] = overall_risk
|
||||
|
||||
return risk_counts
|
||||
|
||||
def analyze_module_risk(
|
||||
self,
|
||||
files: list[FileAnalysis]
|
||||
) -> dict:
|
||||
"""Analyze risk at the module level.
|
||||
|
||||
Args:
|
||||
files: List of FileAnalysis objects.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping modules to risk statistics.
|
||||
"""
|
||||
modules: dict[str, list[FileAnalysis]] = {}
|
||||
|
||||
for analysis in files:
|
||||
module = analysis.module or "root"
|
||||
if module not in modules:
|
||||
modules[module] = []
|
||||
modules[module].append(analysis)
|
||||
|
||||
module_risk = {}
|
||||
|
||||
for module, module_files in modules.items():
|
||||
avg_bus_factor = self.bus_factor_calculator.calculate_repository_bus_factor(
|
||||
module_files
|
||||
)
|
||||
|
||||
risk_summary = self.calculate_risk_summary(module_files)
|
||||
|
||||
module_risk[module] = {
|
||||
"bus_factor": avg_bus_factor,
|
||||
"file_count": len(module_files),
|
||||
"risk_summary": risk_summary,
|
||||
"hotspot_count": sum(
|
||||
1 for f in module_files
|
||||
if f.risk_level in ["critical", "high"]
|
||||
)
|
||||
}
|
||||
|
||||
return module_risk
|
||||
Reference in New Issue
Block a user