diff --git a/repohealth-cli/src/repohealth/models/file_stats.py b/repohealth-cli/src/repohealth/models/file_stats.py new file mode 100644 index 0000000..d3f5235 --- /dev/null +++ b/repohealth-cli/src/repohealth/models/file_stats.py @@ -0,0 +1,45 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + + +@dataclass +class FileAnalysis: + """Analysis result for a single file.""" + + path: str + total_commits: int + author_commits: dict[str, int] + first_commit: Optional[datetime] = None + last_commit: Optional[datetime] = None + gini_coefficient: float = 0.0 + bus_factor: float = 1.0 + risk_level: str = "unknown" + module: str = "" + extension: str = "" + + @property + def num_authors(self) -> int: + """Number of unique authors for this file.""" + return len(self.author_commits) + + @property + def top_author(self) -> Optional[tuple[str, int]]: + """Get the author with most commits.""" + if not self.author_commits: + return None + return max(self.author_commits.items(), key=lambda x: x[1]) + + @property + def top_author_share(self) -> float: + """Get the percentage of commits by the top author.""" + if not self.author_commits or self.total_commits == 0: + return 0.0 + top_count = self.top_author[1] if self.top_author else 0 + return top_count / self.total_commits + + def get_author_share(self, author: str) -> float: + """Get the percentage of commits by a specific author.""" + if not self.author_commits or self.total_commits == 0: + return 0.0 + return self.author_commits.get(author, 0) / self.total_commits