From e32af63e27907ce058296f8b2dd7b97ed7373918 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Thu, 29 Jan 2026 19:59:28 +0000 Subject: [PATCH] Initial commit: git-issue-commit CLI tool --- src/parser/markdown.rs | 142 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 src/parser/markdown.rs diff --git a/src/parser/markdown.rs b/src/parser/markdown.rs new file mode 100644 index 0000000..63a8d4e --- /dev/null +++ b/src/parser/markdown.rs @@ -0,0 +1,142 @@ +use regex::Regex; + +#[derive(Debug, Clone, Default)] +pub struct MarkdownParser; + +impl MarkdownParser { + pub fn new() -> Self { + MarkdownParser + } + + pub fn extract_title(&self, content: &str) -> Option { + let lines: Vec<&str> = content.lines().collect(); + extract_title(&lines) + } + + pub fn extract_code_blocks(&self, content: &str) -> Vec { + extract_code_blocks(content) + } + + pub fn extract_key_points(&self, content: &str) -> Vec { + let re = Regex::new(r"^[\s]*[-*+]\s+(.+)$").expect("Invalid regex pattern"); + re.captures_iter(content) + .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string())) + .filter(|s| !s.starts_with('[')) + .collect() + } + + pub fn extract_breaking_change_section(&self, content: &str) -> Option { + let re = Regex::new(r"(?i)BREAKING\s*CHANGE[^\n]*\n*([^\n#][^\n]*)").expect("Invalid regex"); + re.captures(content) + .and_then(|cap| cap.get(1)) + .map(|m| m.as_str().trim().to_string()) + } + + pub fn clean_text(&self, content: &str) -> String { + content + .lines() + .map(|line| line.trim().trim_start_matches('#').trim()) + .filter(|line| !line.is_empty()) + .collect::>() + .join(" ") + .trim() + .to_string() + } +} + +pub fn parse_markdown(content: &str) -> MarkdownParseResult { + let lines: Vec<&str> = content.lines().collect(); + + let title = extract_title(&lines); + let body = extract_body(&lines); + let code_blocks = extract_code_blocks(content); + let links = extract_links(content); + let lists = extract_lists(content); + + MarkdownParseResult { + title, + body, + code_blocks, + links, + lists, + } +} + +fn extract_title(lines: &[&str]) -> Option { + for line in lines { + if line.starts_with("# ") { + return Some(line[2..].trim().to_string()); + } + } + None +} + +fn extract_body(lines: &[&str]) -> String { + let mut body_lines = Vec::new(); + let mut in_code_block = false; + + for line in lines { + if line.starts_with("```") { + in_code_block = !in_code_block; + continue; + } + if !in_code_block && !line.starts_with('#') { + body_lines.push(*line); + } + } + + body_lines.join("\n") +} + +fn extract_code_blocks(content: &str) -> Vec { + let re = Regex::new(r"```(\w*)\n([\s\S]*?)```").expect("Invalid regex pattern for code blocks"); + re.captures_iter(content) + .map(|cap| CodeBlock { + language: cap.get(1).map(|m| m.as_str().to_string()).unwrap_or_default(), + code: cap.get(2).map(|m| m.as_str().to_string()).unwrap_or_default(), + }) + .collect() +} + +fn extract_links(content: &str) -> Vec { + let re = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("Invalid regex pattern for links"); + re.captures_iter(content) + .map(|cap| Link { + text: cap.get(1).map(|m| m.as_str().to_string()).unwrap_or_default(), + url: cap.get(2).map(|m| m.as_str().to_string()).unwrap_or_default(), + }) + .collect() +} + +fn extract_lists(content: &str) -> Vec { + let re = Regex::new(r"^[\s]*[-*+]\s+(\[.*?\]\s+)?(.+)$").expect("Invalid regex pattern for lists"); + re.captures_iter(content) + .map(|cap| ListItem { + text: cap.get(2).map(|m| m.as_str().to_string()).unwrap_or_default(), + checked: cap.get(1).map(|m| !m.as_str().contains(' ')).unwrap_or(false), + }) + .collect() +} + +pub struct MarkdownParseResult { + pub title: Option, + pub body: String, + pub code_blocks: Vec, + pub links: Vec, + pub lists: Vec, +} + +pub struct CodeBlock { + pub language: String, + pub code: String, +} + +pub struct Link { + pub text: String, + pub url: String, +} + +pub struct ListItem { + pub text: String, + pub checked: bool, +}