Initial upload with CI/CD workflow
Some checks failed
CI / test (push) Has been cancelled

This commit is contained in:
2026-02-05 14:40:57 +00:00
parent 3c62101ecf
commit 5c53f88a08

370
src/core/language.rs Normal file
View File

@@ -0,0 +1,370 @@
use std::path::PathBuf;
use crate::models::{CommentType, FileLocation, PatternConfig, Priority, TechDebtItem};
#[derive(Debug, Clone, PartialEq)]
pub enum Language {
JavaScript,
TypeScript,
Python,
Rust,
Go,
Java,
C,
Cpp,
Ruby,
Unknown,
}
impl Language {
pub fn from_path(path: &PathBuf) -> Option<Self> {
path.extension().and_then(|ext| {
let ext_str = ext.to_str()?.to_lowercase();
match ext_str.as_str() {
"js" => Some(Language::JavaScript),
"ts" => Some(Language::TypeScript),
"jsx" => Some(Language::JavaScript),
"tsx" => Some(Language::TypeScript),
"py" => Some(Language::Python),
"rs" => Some(Language::Rust),
"go" => Some(Language::Go),
"java" => Some(Language::Java),
"c" => Some(Language::C),
"cpp" | "cc" | "cxx" => Some(Language::Cpp),
"h" | "hpp" => Some(Language::Cpp),
"rb" => Some(Language::Ruby),
_ => None,
}
})
}
pub fn as_str(&self) -> &'static str {
match self {
Language::JavaScript => "JavaScript",
Language::TypeScript => "TypeScript",
Language::Python => "Python",
Language::Rust => "Rust",
Language::Go => "Go",
Language::Java => "Java",
Language::C => "C",
Language::Cpp => "C++",
Language::Ruby => "Ruby",
Language::Unknown => "Unknown",
}
}
pub fn single_line_comment(&self) -> Option<&'static str> {
match self {
Language::JavaScript | Language::TypeScript | Language::Java | Language::C
| Language::Cpp | Language::Rust | Language::Go | Language::Ruby => Some("//"),
Language::Python => Some("#"),
_ => None,
}
}
pub fn multi_line_comment_start(&self) -> Option<&'static str> {
match self {
Language::JavaScript | Language::TypeScript | Language::Java | Language::C
| Language::Cpp | Language::Ruby => Some("/*"),
Language::Python => Some(""),
Language::Rust => Some("/*"),
Language::Go => Some("/*"),
_ => None,
}
}
pub fn multi_line_comment_end(&self) -> Option<&'static str> {
match self {
Language::JavaScript | Language::TypeScript | Language::Java | Language::C
| Language::Cpp | Language::Ruby => Some("*/"),
Language::Python => Some(""),
Language::Rust => Some("*/"),
Language::Go => Some("*/"),
_ => None,
}
}
pub fn doc_comment_start(&self) -> Option<&'static str> {
match self {
Language::JavaScript | Language::TypeScript => Some("/**"),
Language::Java => Some("/**"),
Language::Rust => Some("///"),
Language::Python => Some("##"),
_ => None,
}
}
}
pub struct LanguageParser {
language: Language,
}
impl LanguageParser {
pub fn new(language: Language) -> Self {
Self { language }
}
pub fn parse(
&self,
content: &str,
path: &PathBuf,
patterns: &[PatternConfig],
) -> Result<Vec<TechDebtItem>, std::io::Error> {
let mut items = Vec::new();
let lines: Vec<&str> = content.lines().collect();
let single_line_comment = self.language.single_line_comment();
let multi_line_start = self.language.multi_line_comment_start();
let multi_line_end = self.language.multi_line_comment_end();
let mut in_multi_line = false;
let mut multi_line_start_line = 0;
let mut multi_line_content = String::new();
let mut multi_line_start_col = 0;
let single_patterns: Vec<&PatternConfig> =
patterns.iter().filter(|p| !p.regex).collect();
let regex_patterns: Vec<(regex::Regex, &PatternConfig)> = patterns
.iter()
.filter(|p| p.regex)
.filter_map(|p| {
regex::Regex::new(&p.keyword)
.ok()
.map(|re| (re, p))
})
.collect();
for (line_num, line) in lines.iter().enumerate() {
let line_num = line_num + 1;
if let Some(slc) = single_line_comment {
if let Some(comment_start) = line.find(slc) {
let comment_text = &line[comment_start + slc.len()..];
let col_start = comment_start + slc.len() + 1;
for pattern in &single_patterns {
if let Some(pos) = comment_text.find(&pattern.keyword) {
let item_content = &comment_text[pos..];
let content_clean = item_content
.lines()
.next()
.unwrap_or(item_content)
.trim();
if self.matches_pattern(content_clean, &single_patterns)
|| self.matches_regex(content_clean, &regex_patterns)
{
let item = TechDebtItem::new(
pattern.keyword.clone(),
content_clean.to_string(),
FileLocation {
path: path.clone(),
line: line_num,
column: col_start + pos,
end_line: None,
end_column: None,
},
self.language.as_str().to_string(),
CommentType::SingleLine,
);
items.push(item);
}
}
}
for (regex, pattern) in &regex_patterns {
if let Some(mat) = regex.find(comment_text) {
let item = TechDebtItem::new(
pattern.keyword.clone(),
mat.as_str().to_string(),
FileLocation {
path: path.clone(),
line: line_num,
column: col_start + mat.start(),
end_line: None,
end_column: None,
},
self.language.as_str().to_string(),
CommentType::SingleLine,
);
items.push(item);
}
}
}
}
if let Some(mls) = multi_line_start {
if !in_multi_line {
if let Some(start_pos) = line.find(mls) {
in_multi_line = true;
multi_line_start_line = line_num;
multi_line_start_col = start_pos + mls.len();
if let Some(end_pos) = line.find(multi_line_end.unwrap_or("")) {
let comment_content = &line[start_pos + mls.len()..end_pos];
if let Some(content) = self.extract_comment_content(
comment_content,
&lines,
line_num,
start_pos + mls.len() + 1,
&single_patterns,
&regex_patterns,
path,
) {
items.extend(content);
}
in_multi_line = false;
} else {
multi_line_content = line
[start_pos + mls.len()..]
.to_string();
}
}
} else {
if let Some(end_pos) = line.find(multi_line_end.unwrap_or("")) {
multi_line_content.push('\n');
multi_line_content.push_str(&line[..end_pos]);
if let Some(content) = self.extract_comment_content(
&multi_line_content,
&lines,
multi_line_start_line,
multi_line_start_col,
&single_patterns,
&regex_patterns,
path,
) {
items.extend(content);
}
in_multi_line = false;
multi_line_content.clear();
} else {
multi_line_content.push('\n');
multi_line_content.push_str(line);
}
}
}
if let Some(dls) = self.language.doc_comment_start() {
if let Some(doc_start) = line.find(dls) {
let is_block_comment = dls == "/**";
let comment_text = if is_block_comment {
if let Some(end_pos) = line.find("*/") {
&line[doc_start + 3..end_pos]
} else {
&line[doc_start + 3..]
}
} else {
&line[doc_start + 3..]
};
for pattern in &single_patterns {
if let Some(pos) = comment_text.find(&pattern.keyword) {
let item_content = &comment_text[pos..];
let content_clean = item_content
.lines()
.next()
.unwrap_or(item_content)
.trim();
if self.matches_pattern(content_clean, &single_patterns)
|| self.matches_regex(content_clean, &regex_patterns)
{
let item = TechDebtItem::new(
pattern.keyword.clone(),
content_clean.to_string(),
FileLocation {
path: path.clone(),
line: line_num,
column: doc_start + 3 + pos,
end_line: None,
end_column: None,
},
self.language.as_str().to_string(),
CommentType::DocBlock,
);
items.push(item);
}
}
}
}
}
}
Ok(items)
}
fn matches_pattern(&self, content: &str, patterns: &[&PatternConfig]) -> bool {
patterns.iter().any(|p| content.contains(&p.keyword))
}
fn matches_regex(
&self,
content: &str,
regex_patterns: &[(regex::Regex, &PatternConfig)],
) -> bool {
regex_patterns.iter().any(|(re, _)| re.is_match(content))
}
fn extract_comment_content(
&self,
content: &str,
lines: &[&str],
start_line: usize,
start_col: usize,
patterns: &[&PatternConfig],
regex_patterns: &[(regex::Regex, &PatternConfig)],
path: &PathBuf,
) -> Option<Vec<TechDebtItem>> {
let mut items = Vec::new();
for pattern in patterns {
let regex = regex::Regex::new(&format!(r"(?i){}", pattern.keyword)).unwrap();
for mat in regex.find_iter(content) {
let line_in_content = content[..mat.start()].lines().count() + start_line;
let col_in_content = content[..mat.start()].lines().last().map_or(0, |l| l.len());
let item = TechDebtItem::new(
pattern.keyword.clone(),
mat.as_str().to_string(),
FileLocation {
path: path.clone(),
line: line_in_content,
column: start_col + col_in_content,
end_line: None,
end_column: None,
},
self.language.as_str().to_string(),
CommentType::MultiLine,
);
items.push(item);
}
}
for (regex, pattern) in regex_patterns {
for mat in regex.find_iter(content) {
let line_in_content = content[..mat.start()].lines().count() + start_line;
let col_in_content = content[..mat.start()].lines().last().map_or(0, |l| l.len());
let item = TechDebtItem::new(
pattern.keyword.clone(),
mat.as_str().to_string(),
FileLocation {
path: path.clone(),
line: line_in_content,
column: start_col + col_in_content,
end_line: None,
end_column: None,
},
self.language.as_str().to_string(),
CommentType::MultiLine,
);
items.push(item);
}
}
if items.is_empty() {
None
} else {
Some(items)
}
}
}