Initial upload: GitPulse - Developer Productivity Analyzer CLI tool
Some checks failed
CI / test (push) Has been cancelled
CI / release (push) Has been cancelled

This commit is contained in:
2026-02-04 15:45:23 +00:00
parent de78f9812f
commit 695946fd99

477
src/commands/analyze.rs Normal file
View File

@@ -0,0 +1,477 @@
use crate::cli::analyze::AnalyzeArgs;
use crate::config::Config;
use crate::git::{Commit, Repository, TimeFilter};
use crate::models::{
AuthorStats, CodeChurn, CommitChange, CommitFrequency, ContributorStats,
FileChange, RefactoringActivity, RenameInfo, RepositoryInfo, TimeRange,
};
use crate::utils::author::{normalize_author_identity, AuthorAggregator};
use anyhow::{Context, Result};
use chrono::{Datelike, Duration, Utc};
use std::collections::HashMap;
use std::path::PathBuf;
pub mod analyze_args {
use clap::{Args, Parser};
use std::path::PathBuf;
#[derive(Parser, Debug)]
pub struct AnalyzeArgs {
#[arg(short, long, value_name = "PERIOD")]
#[arg(help = "Time period (e.g., 7d, 2w, 1m, 1y, or YYYY-MM-DD:YYYY-MM-DD)")]
pub since: Option<String>,
#[arg(short, long, value_name = "DATE")]
#[arg(help = "End date for analysis")]
pub until: Option<String>,
#[arg(long, value_name = "N")]
#[arg(help = "Analyze last N commits")]
pub commits: Option<usize>,
#[arg(long)]
#[arg(help = "Include merge commits")]
pub include_merges: bool,
#[arg(long)]
#[arg(help = "Skip code churn analysis")]
pub no_churn: bool,
#[arg(long)]
#[arg(help = "Skip refactoring detection")]
pub no_refactor: bool,
#[arg(long)]
#[arg(help = "Output in JSON format")]
pub json: bool,
#[arg(long)]
#[arg(help = "Show commit history")]
pub history: bool,
#[arg(long, value_name = "N")]
#[arg(help = "Limit to top N contributors")]
pub top: Option<usize>,
#[arg(long, value_name = "PATH")]
#[arg(help = "Output file path (for JSON/CSV)")]
pub output: Option<PathBuf>,
}
}
pub use analyze_args::AnalyzeArgs;
pub fn run(repo_path: Option<PathBuf>, args: AnalyzeArgs, config: &Config, verbose: bool) -> Result<()> {
let repo = Repository::new(repo_path)?;
if verbose {
eprintln!("Analyzing repository at: {}", repo.path().display());
}
let time_filter = build_time_filter(&args)?;
let mut commits = collect_commits(&repo, &time_filter, args.include_merges)?;
if args.commits.is_some() && args.commits.unwrap() > 0 {
let limit = args.commits.unwrap();
if commits.len() > limit {
commits = commits[..limit].to_vec();
}
}
if commits.is_empty() {
anyhow::bail!("No commits found in the specified time range");
}
let repo_info = analyze_repository(&repo)?;
let commit_frequency = analyze_commit_frequency(&commits);
let code_churn = if !args.no_churn {
analyze_code_churn(&repo, &commits)?
} else {
CodeChurn::default()
};
let contributors = analyze_contributors(&commits, args.top)?;
let refactoring = if !args.no_refactor {
analyze_refactoring(&repo, &commits)?
} else {
RefactoringActivity::default()
};
let start_time = commits.last().map(|c| c.time()).unwrap_or_else(|| Utc::now());
let end_time = commits.first().map(|c| c.time()).unwrap_or_else(|| Utc::now());
let time_range = TimeRange {
start: start_time.to_rfc3339(),
end: end_time.to_rfc3339(),
days: (end_time - start_time).num_days() as u32 + 1,
};
let result = crate::models::AnalysisResult {
repository: repo_info,
time_range,
commit_frequency,
code_churn,
contributors,
refactoring,
time_series: Vec::new(),
};
if args.json {
export::export_json(&result, args.output)?;
} else if let Some(output) = args.output {
if output.to_string_lossy().ends_with(".csv") {
export::export_csv(&result, &output)?;
} else {
export::export_json(&result, Some(output))?;
}
} else {
print_analysis(&result, args.history);
}
Ok(())
}
fn build_time_filter(args: &AnalyzeArgs) -> Result<TimeFilter> {
let mut filter = TimeFilter::new();
if let Some(since) = &args.since {
let since_date = crate::git::filter::parse_date(since)?;
filter = filter.since(since_date);
}
if let Some(until) = &args.until {
let until_date = crate::git::filter::parse_date(until)?;
filter = filter.until(until_date);
}
Ok(filter.build())
}
fn collect_commits(
repo: &Repository,
time_filter: &TimeFilter,
include_merges: bool,
) -> Result<Vec<Commit>> {
let walker = crate::git::CommitWalker::new(repo.raw())?;
let mut commits: Vec<Commit> = walker
.filter_map(|c| c.ok())
.filter(|c| {
if !include_merges && c.is_merge() {
return false;
}
time_filter.contains(c)
})
.collect();
commits.sort_by(|a, b| a.time().cmp(&b.time()));
Ok(commits)
}
fn analyze_repository(repo: &Repository) -> Result<RepositoryInfo> {
let branch = repo.branch_name()?;
let remote_url = repo.remote_url()?;
let total_commits = repo.commit_count()?;
let total_authors = {
let walker = crate::git::CommitWalker::new(repo.raw())?;
let authors: std::collections::HashSet<String> = walker
.filter_map(|c| c.ok())
.map(|c| normalize_author_identity(&c.author_name(), &c.author_email()))
.collect();
authors.len()
};
let first_commit = repo.first_commit()?;
let age_days = if let Some(commit) = first_commit {
let commit_time = commit.time();
let now = Utc::now();
(now - commit_time).num_days()
} else {
0
};
Ok(RepositoryInfo {
path: repo.path().to_string_lossy().to_string(),
branch,
remote_url,
total_commits,
total_authors,
repository_age_days: age_days,
})
}
fn analyze_commit_frequency(commits: &[Commit]) -> CommitFrequency {
let total = commits.len();
let mut day_counts: HashMap<String, usize> = HashMap::new();
let mut hour_counts: HashMap<u32, usize> = HashMap::new();
let mut dates: Vec<String> = Vec::new();
let day_names = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"];
for commit in commits {
let time = commit.time();
let day_name = day_names[time.weekday() as usize].to_string();
let hour = time.hour();
let date = time.format("%Y-%m-%d").to_string();
*day_counts.entry(day_name).or_insert(0) += 1;
*hour_counts.entry(hour).or_insert(0) += 1;
dates.push(date);
}
let (busiest_day, busiest_count) = day_counts
.iter()
.max_by_key(|(_, v)| *v)
.map(|(k, v)| (k.clone(), *v))
.unwrap_or((String::new(), 0));
let (quietest_day, quietest_count) = day_counts
.iter()
.min_by_key(|(_, v)| *v)
.map(|(k, v)| (k.clone(), *v))
.unwrap_or((String::new(), 0));
let unique_days = dates.iter().collect::<std::collections::HashSet<_>>().len();
let days = if unique_days > 0 { unique_days as f64 } else { 1.0 };
let weeks = days / 7.0;
let months = days / 30.0;
CommitFrequency {
total_commits: total,
commits_per_day: total as f64 / days,
commits_per_week: total as f64 / weeks,
commits_per_month: total as f64 / months,
busiest_day,
busiest_day_count: busiest_count,
quietest_day,
quietest_day_count: quietest_count,
day_distribution: day_counts,
hour_distribution: hour_counts,
}
}
fn analyze_code_churn(repo: &Repository, commits: &[Commit]) -> Result<CodeChurn> {
let mut total_added = 0;
let mut total_removed = 0;
let mut largest_commit = None;
let mut churn_files: HashMap<String, FileChange> = HashMap::new();
let mut additions_by_day: HashMap<String, usize> = HashMap::new();
let mut removals_by_day: HashMap<String, usize> = HashMap::new();
for commit in commits {
let diff = commit.diff_to_parent()?;
if let Some(mut diff) = diff {
let stats = repo.stats(&diff)?;
let added = stats.files_changed() as usize;
let removed = stats.deletions() as usize;
let net = added as i64 - removed as i64;
let date = commit.time().format("%Y-%m-%d").to_string();
*additions_by_day.entry(date.clone()).or_insert(0) += added;
*removals_by_day.entry(date).or_insert(0) += removed;
total_added += added;
total_removed += removed;
if largest_commit.as_ref().map(|c| c.lines_added).unwrap_or(0) < added {
largest_commit = Some(CommitChange {
oid: commit.hex(),
message: commit.message_short(80),
author: commit.author_name(),
timestamp: commit.time().to_rfc3339(),
lines_added: added,
lines_removed: removed,
net_change: net,
});
}
let diff_stats = diff.stats_with_context(git2::DiffStatsFormat::FULL, 0)?;
for i in 0..diff_stats.files_changed() {
if let Ok(path) = diff_stats.get_name(i) {
let stats_i = diff_stats.get_delta(i).unwrap_or_default();
let added = stats_i.additions() as usize;
let removed = stats_i.deletions() as usize;
let entry = churn_files.entry(path.to_string()).or_insert(FileChange {
path: path.to_string(),
lines_added: 0,
lines_removed: 0,
net_change: 0,
commit_count: 0,
});
entry.lines_added += added;
entry.lines_removed += removed;
entry.commit_count += 1;
}
}
}
}
let mut top_files: Vec<_> = churn_files.into_values().collect();
top_files.sort_by(|a, b| (a.lines_added + a.lines_removed).cmp(&(b.lines_added + b.lines_removed)));
top_files.reverse();
top_files.truncate(20);
let total_commits = commits.len() as f64;
let avg_size = if total_commits > 0.0 {
(total_added + total_removed) as f64 / total_commits
} else {
0.0
};
Ok(CodeChurn {
total_lines_added: total_added,
total_lines_removed: total_removed,
net_change: total_added as i64 - total_removed as i64,
average_commit_size: avg_size,
largest_commit,
top_churn_files: top_files,
additions_by_day,
removals_by_day,
})
}
fn analyze_contributors(commits: &[Commit], top: Option<usize>) -> Result<ContributorStats> {
let mut aggregator = AuthorAggregator::new();
for commit in commits {
aggregator.add_commit(
&commit.author_name(),
&commit.author_email(),
commit.time(),
None,
);
}
let top_contributors: Vec<_> = aggregator
.stats
.into_values()
.take(top.unwrap_or(50))
.collect();
let mut contributions: HashMap<String, usize> = HashMap::new();
for stats in &top_contributors {
contributions.insert(stats.name.clone(), stats.commits);
}
Ok(ContributorStats {
total_contributors: top_contributors.len(),
top_contributors,
contributions_by_author: contributions,
})
}
fn analyze_refactoring(repo: &Repository, commits: &[Commit]) -> Result<RefactoringActivity> {
let mut renames = 0;
let mut copies = 0;
let mut renamed_files = Vec::new();
let mut rename_counts: HashMap<String, usize> = HashMap::new();
for commit in commits {
let diff = commit.diff_to_parent()?;
if let Some(mut diff) = diff {
repo.diff_find_similar(&mut diff)?;
let diff_stats = diff.stats_with_context(git2::DiffStatsFormat::FULL, 0)?;
for i in 0..diff_stats.files_changed() {
if let Ok(new_path) = diff_stats.get_name(i) {
if let Ok(old_path) = diff_stats.get_oldname(i) {
if new_path != old_path {
renames += 1;
*rename_counts
.entry(new_path.to_string())
.or_insert(0) += 1;
renamed_files.push(RenameInfo {
old_path: old_path.to_string(),
new_path: new_path.to_string(),
commit_oid: commit.hex(),
commit_message: commit.message_short(80),
similarity: 0.0,
timestamp: commit.time().to_rfc3339(),
});
}
}
if diff_stats.get_delta(i).map(|d| d.status()).unwrap_or_default()
== git2::Delta::Copied
{
copies += 1;
}
}
}
}
}
let mut hotspots: Vec<_> = rename_counts
.into_iter()
.filter(|(_, count)| *count >= 2)
.map(|(path, count)| crate::models::RefactoringHotspot {
file_path: path,
rename_count: count,
last_rename: String::new(),
})
.collect();
hotspots.sort_by(|a, b| b.rename_count.cmp(&a.rename_count));
let score = if !commits.is_empty() {
(renames as f64 + copies as f64) / commits.len() as f64 * 100.0
} else {
0.0
};
Ok(RefactoringActivity {
total_renames: renames,
total_copies: copies,
refactoring_score: score,
renamed_files,
refactoring_hotspots: hotspots,
})
}
fn print_analysis(result: &crate::models::AnalysisResult, show_history: bool) {
println!("\n{}", "=".repeat(60));
println!(" GitPulse Analysis Report");
println!("{}", "=".repeat(60));
println!("\nRepository: {}", result.repository.path);
if let Some(branch) = &result.repository.branch {
println!("Branch: {}", branch);
}
println!("Time Range: {} to {} ({} days)",
result.time_range.start.split('T').next().unwrap_or(""),
result.time_range.end.split('T').next().unwrap_or(""),
result.time_range.days);
println!("\n{}", "-".repeat(60));
println!("Commit Frequency");
println!("{}", "-".repeat(60));
println!(" Total Commits: {}", result.commit_frequency.total_commits);
println!(" Commits/day: {:.2}", result.commit_frequency.commits_per_day);
println!(" Busiest Day: {} ({})",
result.commit_frequency.busiest_day,
result.commit_frequency.busiest_day_count);
println!(" Quietest Day: {} ({})",
result.commit_frequency.quietest_day,
result.commit_frequency.quietest_day_count);
if result.code_churn.total_lines_added > 0 || result.code_churn.total_lines_removed > 0 {
println!("\n{}", "-".repeat(60));
println!("Code Churn");
println!("{}", "-".repeat(60));
println!(" Lines Added: {}", result.code_churn.total_lines_added);
println!(" Lines Removed: {}", result.code_churn.total_lines_removed);
println!(" Net Change: {}", result.code_churn.net_change);
println!(" Avg/Commit: {:.1}", result.code_churn.average_commit_size);
}
println!("\n{}", "-".repeat(60));
println!("Top Contributors");
println!("{}", "-".repeat(60));
for (i, author) in result.contributors.top_contributors.iter().take(10).enumerate() {
println!(" {}. {} ({})", i + 1, author.name, author.commits);
}
if result.refactoring.total_renames > 0 {
println!("\n{}", "-".repeat(60));
println!("Refactoring Activity");
println!("{}", "-".repeat(60));
println!(" Renames: {}", result.refactoring.total_renames);
println!(" Copies: {}", result.refactoring.total_copies);
println!(" Refactoring Score: {:.1}", result.refactoring.refactoring_score);
}
println!("\n");
}