This commit is contained in:
370
app/techdebt-tracker-cli/src/core/language.rs
Normal file
370
app/techdebt-tracker-cli/src/core/language.rs
Normal file
@@ -0,0 +1,370 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::models::{CommentType, FileLocation, PatternConfig, Priority, TechDebtItem};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Language {
|
||||
JavaScript,
|
||||
TypeScript,
|
||||
Python,
|
||||
Rust,
|
||||
Go,
|
||||
Java,
|
||||
C,
|
||||
Cpp,
|
||||
Ruby,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl Language {
|
||||
pub fn from_path(path: &PathBuf) -> Option<Self> {
|
||||
path.extension().and_then(|ext| {
|
||||
let ext_str = ext.to_str()?.to_lowercase();
|
||||
match ext_str.as_str() {
|
||||
"js" => Some(Language::JavaScript),
|
||||
"ts" => Some(Language::TypeScript),
|
||||
"jsx" => Some(Language::JavaScript),
|
||||
"tsx" => Some(Language::TypeScript),
|
||||
"py" => Some(Language::Python),
|
||||
"rs" => Some(Language::Rust),
|
||||
"go" => Some(Language::Go),
|
||||
"java" => Some(Language::Java),
|
||||
"c" => Some(Language::C),
|
||||
"cpp" | "cc" | "cxx" => Some(Language::Cpp),
|
||||
"h" | "hpp" => Some(Language::Cpp),
|
||||
"rb" => Some(Language::Ruby),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Language::JavaScript => "JavaScript",
|
||||
Language::TypeScript => "TypeScript",
|
||||
Language::Python => "Python",
|
||||
Language::Rust => "Rust",
|
||||
Language::Go => "Go",
|
||||
Language::Java => "Java",
|
||||
Language::C => "C",
|
||||
Language::Cpp => "C++",
|
||||
Language::Ruby => "Ruby",
|
||||
Language::Unknown => "Unknown",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn single_line_comment(&self) -> Option<&'static str> {
|
||||
match self {
|
||||
Language::JavaScript | Language::TypeScript | Language::Java | Language::C
|
||||
| Language::Cpp | Language::Rust | Language::Go | Language::Ruby => Some("//"),
|
||||
Language::Python => Some("#"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn multi_line_comment_start(&self) -> Option<&'static str> {
|
||||
match self {
|
||||
Language::JavaScript | Language::TypeScript | Language::Java | Language::C
|
||||
| Language::Cpp | Language::Ruby => Some("/*"),
|
||||
Language::Python => Some(""),
|
||||
Language::Rust => Some("/*"),
|
||||
Language::Go => Some("/*"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn multi_line_comment_end(&self) -> Option<&'static str> {
|
||||
match self {
|
||||
Language::JavaScript | Language::TypeScript | Language::Java | Language::C
|
||||
| Language::Cpp | Language::Ruby => Some("*/"),
|
||||
Language::Python => Some(""),
|
||||
Language::Rust => Some("*/"),
|
||||
Language::Go => Some("*/"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn doc_comment_start(&self) -> Option<&'static str> {
|
||||
match self {
|
||||
Language::JavaScript | Language::TypeScript => Some("/**"),
|
||||
Language::Java => Some("/**"),
|
||||
Language::Rust => Some("///"),
|
||||
Language::Python => Some("##"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LanguageParser {
|
||||
language: Language,
|
||||
}
|
||||
|
||||
impl LanguageParser {
|
||||
pub fn new(language: Language) -> Self {
|
||||
Self { language }
|
||||
}
|
||||
|
||||
pub fn parse(
|
||||
&self,
|
||||
content: &str,
|
||||
path: &PathBuf,
|
||||
patterns: &[PatternConfig],
|
||||
) -> Result<Vec<TechDebtItem>, anyhow::Error> {
|
||||
let mut items = Vec::new();
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
|
||||
let single_line_comment = self.language.single_line_comment();
|
||||
let multi_line_start = self.language.multi_line_comment_start();
|
||||
let multi_line_end = self.language.multi_line_comment_end();
|
||||
|
||||
let mut in_multi_line = false;
|
||||
let mut multi_line_start_line = 0;
|
||||
let mut multi_line_content = String::new();
|
||||
let mut multi_line_start_col = 0;
|
||||
|
||||
let single_patterns: Vec<&PatternConfig> =
|
||||
patterns.iter().filter(|p| !p.regex).collect();
|
||||
let regex_patterns: Vec<(regex::Regex, &PatternConfig)> = patterns
|
||||
.iter()
|
||||
.filter(|p| p.regex)
|
||||
.filter_map(|p| {
|
||||
regex::Regex::new(&p.keyword)
|
||||
.ok()
|
||||
.map(|re| (re, p))
|
||||
})
|
||||
.collect();
|
||||
|
||||
for (line_num, line) in lines.iter().enumerate() {
|
||||
let line_num = line_num + 1;
|
||||
|
||||
if let Some(slc) = single_line_comment {
|
||||
if let Some(comment_start) = line.find(slc) {
|
||||
let comment_text = &line[comment_start + slc.len()..];
|
||||
let col_start = comment_start + slc.len() + 1;
|
||||
|
||||
for pattern in &single_patterns {
|
||||
if let Some(pos) = comment_text.find(&pattern.keyword) {
|
||||
let item_content = &comment_text[pos..];
|
||||
let content_clean = item_content
|
||||
.lines()
|
||||
.next()
|
||||
.unwrap_or(item_content)
|
||||
.trim();
|
||||
|
||||
if self.matches_pattern(content_clean, &single_patterns)
|
||||
|| self.matches_regex(content_clean, ®ex_patterns)
|
||||
{
|
||||
let item = TechDebtItem::new(
|
||||
pattern.keyword.clone(),
|
||||
content_clean.to_string(),
|
||||
FileLocation {
|
||||
path: path.clone(),
|
||||
line: line_num,
|
||||
column: col_start + pos,
|
||||
end_line: None,
|
||||
end_column: None,
|
||||
},
|
||||
self.language.as_str().to_string(),
|
||||
CommentType::SingleLine,
|
||||
);
|
||||
items.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (regex, pattern) in ®ex_patterns {
|
||||
if let Some(mat) = regex.find(comment_text) {
|
||||
let item = TechDebtItem::new(
|
||||
pattern.keyword.clone(),
|
||||
mat.as_str().to_string(),
|
||||
FileLocation {
|
||||
path: path.clone(),
|
||||
line: line_num,
|
||||
column: col_start + mat.start(),
|
||||
end_line: None,
|
||||
end_column: None,
|
||||
},
|
||||
self.language.as_str().to_string(),
|
||||
CommentType::SingleLine,
|
||||
);
|
||||
items.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(mls) = multi_line_start {
|
||||
if !in_multi_line {
|
||||
if let Some(start_pos) = line.find(mls) {
|
||||
in_multi_line = true;
|
||||
multi_line_start_line = line_num;
|
||||
multi_line_start_col = start_pos + mls.len();
|
||||
if let Some(end_pos) = line.find(multi_line_end.unwrap_or("")) {
|
||||
let comment_content = &line[start_pos + mls.len()..end_pos];
|
||||
if let Some(content) = self.extract_comment_content(
|
||||
comment_content,
|
||||
&lines,
|
||||
line_num,
|
||||
start_pos + mls.len() + 1,
|
||||
&single_patterns,
|
||||
®ex_patterns,
|
||||
path,
|
||||
) {
|
||||
items.extend(content);
|
||||
}
|
||||
in_multi_line = false;
|
||||
} else {
|
||||
multi_line_content = line
|
||||
[start_pos + mls.len()..]
|
||||
.to_string();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if let Some(end_pos) = line.find(multi_line_end.unwrap_or("")) {
|
||||
multi_line_content.push('\n');
|
||||
multi_line_content.push_str(&line[..end_pos]);
|
||||
if let Some(content) = self.extract_comment_content(
|
||||
&multi_line_content,
|
||||
&lines,
|
||||
multi_line_start_line,
|
||||
multi_line_start_col,
|
||||
&single_patterns,
|
||||
®ex_patterns,
|
||||
path,
|
||||
) {
|
||||
items.extend(content);
|
||||
}
|
||||
in_multi_line = false;
|
||||
multi_line_content.clear();
|
||||
} else {
|
||||
multi_line_content.push('\n');
|
||||
multi_line_content.push_str(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(dls) = self.language.doc_comment_start() {
|
||||
if let Some(doc_start) = line.find(dls) {
|
||||
let is_block_comment = dls == "/**";
|
||||
let comment_text = if is_block_comment {
|
||||
if let Some(end_pos) = line.find("*/") {
|
||||
&line[doc_start + 3..end_pos]
|
||||
} else {
|
||||
&line[doc_start + 3..]
|
||||
}
|
||||
} else {
|
||||
&line[doc_start + 3..]
|
||||
};
|
||||
|
||||
for pattern in &single_patterns {
|
||||
if let Some(pos) = comment_text.find(&pattern.keyword) {
|
||||
let item_content = &comment_text[pos..];
|
||||
let content_clean = item_content
|
||||
.lines()
|
||||
.next()
|
||||
.unwrap_or(item_content)
|
||||
.trim();
|
||||
|
||||
if self.matches_pattern(content_clean, &single_patterns)
|
||||
|| self.matches_regex(content_clean, ®ex_patterns)
|
||||
{
|
||||
let item = TechDebtItem::new(
|
||||
pattern.keyword.clone(),
|
||||
content_clean.to_string(),
|
||||
FileLocation {
|
||||
path: path.clone(),
|
||||
line: line_num,
|
||||
column: doc_start + 3 + pos,
|
||||
end_line: None,
|
||||
end_column: None,
|
||||
},
|
||||
self.language.as_str().to_string(),
|
||||
CommentType::DocBlock,
|
||||
);
|
||||
items.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
fn matches_pattern(&self, content: &str, patterns: &[&PatternConfig]) -> bool {
|
||||
patterns.iter().any(|p| content.contains(&p.keyword))
|
||||
}
|
||||
|
||||
fn matches_regex(
|
||||
&self,
|
||||
content: &str,
|
||||
regex_patterns: &[(regex::Regex, &PatternConfig)],
|
||||
) -> bool {
|
||||
regex_patterns.iter().any(|(re, _)| re.is_match(content))
|
||||
}
|
||||
|
||||
fn extract_comment_content(
|
||||
&self,
|
||||
content: &str,
|
||||
lines: &[&str],
|
||||
start_line: usize,
|
||||
start_col: usize,
|
||||
patterns: &[&PatternConfig],
|
||||
regex_patterns: &[(regex::Regex, &PatternConfig)],
|
||||
path: &PathBuf,
|
||||
) -> Option<Vec<TechDebtItem>> {
|
||||
let mut items = Vec::new();
|
||||
|
||||
for pattern in patterns {
|
||||
let regex = regex::Regex::new(&format!(r"(?i){}", pattern.keyword)).unwrap();
|
||||
for mat in regex.find_iter(content) {
|
||||
let line_in_content = content[..mat.start()].lines().count() + start_line;
|
||||
let col_in_content = content[..mat.start()].lines().last().map_or(0, |l| l.len());
|
||||
|
||||
let item = TechDebtItem::new(
|
||||
pattern.keyword.clone(),
|
||||
mat.as_str().to_string(),
|
||||
FileLocation {
|
||||
path: path.clone(),
|
||||
line: line_in_content,
|
||||
column: start_col + col_in_content,
|
||||
end_line: None,
|
||||
end_column: None,
|
||||
},
|
||||
self.language.as_str().to_string(),
|
||||
CommentType::MultiLine,
|
||||
);
|
||||
items.push(item);
|
||||
}
|
||||
}
|
||||
|
||||
for (regex, pattern) in regex_patterns {
|
||||
for mat in regex.find_iter(content) {
|
||||
let line_in_content = content[..mat.start()].lines().count() + start_line;
|
||||
let col_in_content = content[..mat.start()].lines().last().map_or(0, |l| l.len());
|
||||
|
||||
let item = TechDebtItem::new(
|
||||
pattern.keyword.clone(),
|
||||
mat.as_str().to_string(),
|
||||
FileLocation {
|
||||
path: path.clone(),
|
||||
line: line_in_content,
|
||||
column: start_col + col_in_content,
|
||||
end_line: None,
|
||||
end_column: None,
|
||||
},
|
||||
self.language.as_str().to_string(),
|
||||
CommentType::MultiLine,
|
||||
);
|
||||
items.push(item);
|
||||
}
|
||||
}
|
||||
|
||||
if items.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(items)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user