From 695946fd995281b262d823c863288cbc42ac2821 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Wed, 4 Feb 2026 15:45:23 +0000 Subject: [PATCH] Initial upload: GitPulse - Developer Productivity Analyzer CLI tool --- src/commands/analyze.rs | 477 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 477 insertions(+) create mode 100644 src/commands/analyze.rs diff --git a/src/commands/analyze.rs b/src/commands/analyze.rs new file mode 100644 index 0000000..28a77ee --- /dev/null +++ b/src/commands/analyze.rs @@ -0,0 +1,477 @@ +use crate::cli::analyze::AnalyzeArgs; +use crate::config::Config; +use crate::git::{Commit, Repository, TimeFilter}; +use crate::models::{ + AuthorStats, CodeChurn, CommitChange, CommitFrequency, ContributorStats, + FileChange, RefactoringActivity, RenameInfo, RepositoryInfo, TimeRange, +}; +use crate::utils::author::{normalize_author_identity, AuthorAggregator}; +use anyhow::{Context, Result}; +use chrono::{Datelike, Duration, Utc}; +use std::collections::HashMap; +use std::path::PathBuf; + +pub mod analyze_args { + use clap::{Args, Parser}; + use std::path::PathBuf; + + #[derive(Parser, Debug)] + pub struct AnalyzeArgs { + #[arg(short, long, value_name = "PERIOD")] + #[arg(help = "Time period (e.g., 7d, 2w, 1m, 1y, or YYYY-MM-DD:YYYY-MM-DD)")] + pub since: Option, + + #[arg(short, long, value_name = "DATE")] + #[arg(help = "End date for analysis")] + pub until: Option, + + #[arg(long, value_name = "N")] + #[arg(help = "Analyze last N commits")] + pub commits: Option, + + #[arg(long)] + #[arg(help = "Include merge commits")] + pub include_merges: bool, + + #[arg(long)] + #[arg(help = "Skip code churn analysis")] + pub no_churn: bool, + + #[arg(long)] + #[arg(help = "Skip refactoring detection")] + pub no_refactor: bool, + + #[arg(long)] + #[arg(help = "Output in JSON format")] + pub json: bool, + + #[arg(long)] + #[arg(help = "Show commit history")] + pub history: bool, + + #[arg(long, value_name = "N")] + #[arg(help = "Limit to top N contributors")] + pub top: Option, + + #[arg(long, value_name = "PATH")] + #[arg(help = "Output file path (for JSON/CSV)")] + pub output: Option, + } +} + +pub use analyze_args::AnalyzeArgs; + +pub fn run(repo_path: Option, args: AnalyzeArgs, config: &Config, verbose: bool) -> Result<()> { + let repo = Repository::new(repo_path)?; + + if verbose { + eprintln!("Analyzing repository at: {}", repo.path().display()); + } + + let time_filter = build_time_filter(&args)?; + let mut commits = collect_commits(&repo, &time_filter, args.include_merges)?; + + if args.commits.is_some() && args.commits.unwrap() > 0 { + let limit = args.commits.unwrap(); + if commits.len() > limit { + commits = commits[..limit].to_vec(); + } + } + + if commits.is_empty() { + anyhow::bail!("No commits found in the specified time range"); + } + + let repo_info = analyze_repository(&repo)?; + let commit_frequency = analyze_commit_frequency(&commits); + let code_churn = if !args.no_churn { + analyze_code_churn(&repo, &commits)? + } else { + CodeChurn::default() + }; + let contributors = analyze_contributors(&commits, args.top)?; + let refactoring = if !args.no_refactor { + analyze_refactoring(&repo, &commits)? + } else { + RefactoringActivity::default() + }; + + let start_time = commits.last().map(|c| c.time()).unwrap_or_else(|| Utc::now()); + let end_time = commits.first().map(|c| c.time()).unwrap_or_else(|| Utc::now()); + let time_range = TimeRange { + start: start_time.to_rfc3339(), + end: end_time.to_rfc3339(), + days: (end_time - start_time).num_days() as u32 + 1, + }; + + let result = crate::models::AnalysisResult { + repository: repo_info, + time_range, + commit_frequency, + code_churn, + contributors, + refactoring, + time_series: Vec::new(), + }; + + if args.json { + export::export_json(&result, args.output)?; + } else if let Some(output) = args.output { + if output.to_string_lossy().ends_with(".csv") { + export::export_csv(&result, &output)?; + } else { + export::export_json(&result, Some(output))?; + } + } else { + print_analysis(&result, args.history); + } + + Ok(()) +} + +fn build_time_filter(args: &AnalyzeArgs) -> Result { + let mut filter = TimeFilter::new(); + + if let Some(since) = &args.since { + let since_date = crate::git::filter::parse_date(since)?; + filter = filter.since(since_date); + } + + if let Some(until) = &args.until { + let until_date = crate::git::filter::parse_date(until)?; + filter = filter.until(until_date); + } + + Ok(filter.build()) +} + +fn collect_commits( + repo: &Repository, + time_filter: &TimeFilter, + include_merges: bool, +) -> Result> { + let walker = crate::git::CommitWalker::new(repo.raw())?; + let mut commits: Vec = walker + .filter_map(|c| c.ok()) + .filter(|c| { + if !include_merges && c.is_merge() { + return false; + } + time_filter.contains(c) + }) + .collect(); + + commits.sort_by(|a, b| a.time().cmp(&b.time())); + Ok(commits) +} + +fn analyze_repository(repo: &Repository) -> Result { + let branch = repo.branch_name()?; + let remote_url = repo.remote_url()?; + let total_commits = repo.commit_count()?; + let total_authors = { + let walker = crate::git::CommitWalker::new(repo.raw())?; + let authors: std::collections::HashSet = walker + .filter_map(|c| c.ok()) + .map(|c| normalize_author_identity(&c.author_name(), &c.author_email())) + .collect(); + authors.len() + }; + + let first_commit = repo.first_commit()?; + let age_days = if let Some(commit) = first_commit { + let commit_time = commit.time(); + let now = Utc::now(); + (now - commit_time).num_days() + } else { + 0 + }; + + Ok(RepositoryInfo { + path: repo.path().to_string_lossy().to_string(), + branch, + remote_url, + total_commits, + total_authors, + repository_age_days: age_days, + }) +} + +fn analyze_commit_frequency(commits: &[Commit]) -> CommitFrequency { + let total = commits.len(); + let mut day_counts: HashMap = HashMap::new(); + let mut hour_counts: HashMap = HashMap::new(); + let mut dates: Vec = Vec::new(); + + let day_names = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"]; + + for commit in commits { + let time = commit.time(); + let day_name = day_names[time.weekday() as usize].to_string(); + let hour = time.hour(); + let date = time.format("%Y-%m-%d").to_string(); + + *day_counts.entry(day_name).or_insert(0) += 1; + *hour_counts.entry(hour).or_insert(0) += 1; + dates.push(date); + } + + let (busiest_day, busiest_count) = day_counts + .iter() + .max_by_key(|(_, v)| *v) + .map(|(k, v)| (k.clone(), *v)) + .unwrap_or((String::new(), 0)); + + let (quietest_day, quietest_count) = day_counts + .iter() + .min_by_key(|(_, v)| *v) + .map(|(k, v)| (k.clone(), *v)) + .unwrap_or((String::new(), 0)); + + let unique_days = dates.iter().collect::>().len(); + let days = if unique_days > 0 { unique_days as f64 } else { 1.0 }; + let weeks = days / 7.0; + let months = days / 30.0; + + CommitFrequency { + total_commits: total, + commits_per_day: total as f64 / days, + commits_per_week: total as f64 / weeks, + commits_per_month: total as f64 / months, + busiest_day, + busiest_day_count: busiest_count, + quietest_day, + quietest_day_count: quietest_count, + day_distribution: day_counts, + hour_distribution: hour_counts, + } +} + +fn analyze_code_churn(repo: &Repository, commits: &[Commit]) -> Result { + let mut total_added = 0; + let mut total_removed = 0; + let mut largest_commit = None; + let mut churn_files: HashMap = HashMap::new(); + let mut additions_by_day: HashMap = HashMap::new(); + let mut removals_by_day: HashMap = HashMap::new(); + + for commit in commits { + let diff = commit.diff_to_parent()?; + if let Some(mut diff) = diff { + let stats = repo.stats(&diff)?; + let added = stats.files_changed() as usize; + let removed = stats.deletions() as usize; + let net = added as i64 - removed as i64; + + let date = commit.time().format("%Y-%m-%d").to_string(); + *additions_by_day.entry(date.clone()).or_insert(0) += added; + *removals_by_day.entry(date).or_insert(0) += removed; + + total_added += added; + total_removed += removed; + + if largest_commit.as_ref().map(|c| c.lines_added).unwrap_or(0) < added { + largest_commit = Some(CommitChange { + oid: commit.hex(), + message: commit.message_short(80), + author: commit.author_name(), + timestamp: commit.time().to_rfc3339(), + lines_added: added, + lines_removed: removed, + net_change: net, + }); + } + + let diff_stats = diff.stats_with_context(git2::DiffStatsFormat::FULL, 0)?; + for i in 0..diff_stats.files_changed() { + if let Ok(path) = diff_stats.get_name(i) { + let stats_i = diff_stats.get_delta(i).unwrap_or_default(); + let added = stats_i.additions() as usize; + let removed = stats_i.deletions() as usize; + let entry = churn_files.entry(path.to_string()).or_insert(FileChange { + path: path.to_string(), + lines_added: 0, + lines_removed: 0, + net_change: 0, + commit_count: 0, + }); + entry.lines_added += added; + entry.lines_removed += removed; + entry.commit_count += 1; + } + } + } + } + + let mut top_files: Vec<_> = churn_files.into_values().collect(); + top_files.sort_by(|a, b| (a.lines_added + a.lines_removed).cmp(&(b.lines_added + b.lines_removed))); + top_files.reverse(); + top_files.truncate(20); + + let total_commits = commits.len() as f64; + let avg_size = if total_commits > 0.0 { + (total_added + total_removed) as f64 / total_commits + } else { + 0.0 + }; + + Ok(CodeChurn { + total_lines_added: total_added, + total_lines_removed: total_removed, + net_change: total_added as i64 - total_removed as i64, + average_commit_size: avg_size, + largest_commit, + top_churn_files: top_files, + additions_by_day, + removals_by_day, + }) +} + +fn analyze_contributors(commits: &[Commit], top: Option) -> Result { + let mut aggregator = AuthorAggregator::new(); + + for commit in commits { + aggregator.add_commit( + &commit.author_name(), + &commit.author_email(), + commit.time(), + None, + ); + } + + let top_contributors: Vec<_> = aggregator + .stats + .into_values() + .take(top.unwrap_or(50)) + .collect(); + + let mut contributions: HashMap = HashMap::new(); + for stats in &top_contributors { + contributions.insert(stats.name.clone(), stats.commits); + } + + Ok(ContributorStats { + total_contributors: top_contributors.len(), + top_contributors, + contributions_by_author: contributions, + }) +} + +fn analyze_refactoring(repo: &Repository, commits: &[Commit]) -> Result { + let mut renames = 0; + let mut copies = 0; + let mut renamed_files = Vec::new(); + let mut rename_counts: HashMap = HashMap::new(); + + for commit in commits { + let diff = commit.diff_to_parent()?; + if let Some(mut diff) = diff { + repo.diff_find_similar(&mut diff)?; + + let diff_stats = diff.stats_with_context(git2::DiffStatsFormat::FULL, 0)?; + for i in 0..diff_stats.files_changed() { + if let Ok(new_path) = diff_stats.get_name(i) { + if let Ok(old_path) = diff_stats.get_oldname(i) { + if new_path != old_path { + renames += 1; + *rename_counts + .entry(new_path.to_string()) + .or_insert(0) += 1; + renamed_files.push(RenameInfo { + old_path: old_path.to_string(), + new_path: new_path.to_string(), + commit_oid: commit.hex(), + commit_message: commit.message_short(80), + similarity: 0.0, + timestamp: commit.time().to_rfc3339(), + }); + } + } + if diff_stats.get_delta(i).map(|d| d.status()).unwrap_or_default() + == git2::Delta::Copied + { + copies += 1; + } + } + } + } + } + + let mut hotspots: Vec<_> = rename_counts + .into_iter() + .filter(|(_, count)| *count >= 2) + .map(|(path, count)| crate::models::RefactoringHotspot { + file_path: path, + rename_count: count, + last_rename: String::new(), + }) + .collect(); + hotspots.sort_by(|a, b| b.rename_count.cmp(&a.rename_count)); + + let score = if !commits.is_empty() { + (renames as f64 + copies as f64) / commits.len() as f64 * 100.0 + } else { + 0.0 + }; + + Ok(RefactoringActivity { + total_renames: renames, + total_copies: copies, + refactoring_score: score, + renamed_files, + refactoring_hotspots: hotspots, + }) +} + +fn print_analysis(result: &crate::models::AnalysisResult, show_history: bool) { + println!("\n{}", "=".repeat(60)); + println!(" GitPulse Analysis Report"); + println!("{}", "=".repeat(60)); + println!("\nRepository: {}", result.repository.path); + if let Some(branch) = &result.repository.branch { + println!("Branch: {}", branch); + } + println!("Time Range: {} to {} ({} days)", + result.time_range.start.split('T').next().unwrap_or(""), + result.time_range.end.split('T').next().unwrap_or(""), + result.time_range.days); + + println!("\n{}", "-".repeat(60)); + println!("Commit Frequency"); + println!("{}", "-".repeat(60)); + println!(" Total Commits: {}", result.commit_frequency.total_commits); + println!(" Commits/day: {:.2}", result.commit_frequency.commits_per_day); + println!(" Busiest Day: {} ({})", + result.commit_frequency.busiest_day, + result.commit_frequency.busiest_day_count); + println!(" Quietest Day: {} ({})", + result.commit_frequency.quietest_day, + result.commit_frequency.quietest_day_count); + + if result.code_churn.total_lines_added > 0 || result.code_churn.total_lines_removed > 0 { + println!("\n{}", "-".repeat(60)); + println!("Code Churn"); + println!("{}", "-".repeat(60)); + println!(" Lines Added: {}", result.code_churn.total_lines_added); + println!(" Lines Removed: {}", result.code_churn.total_lines_removed); + println!(" Net Change: {}", result.code_churn.net_change); + println!(" Avg/Commit: {:.1}", result.code_churn.average_commit_size); + } + + println!("\n{}", "-".repeat(60)); + println!("Top Contributors"); + println!("{}", "-".repeat(60)); + for (i, author) in result.contributors.top_contributors.iter().take(10).enumerate() { + println!(" {}. {} ({})", i + 1, author.name, author.commits); + } + + if result.refactoring.total_renames > 0 { + println!("\n{}", "-".repeat(60)); + println!("Refactoring Activity"); + println!("{}", "-".repeat(60)); + println!(" Renames: {}", result.refactoring.total_renames); + println!(" Copies: {}", result.refactoring.total_copies); + println!(" Refactoring Score: {:.1}", result.refactoring.refactoring_score); + } + println!("\n"); +}