Add Java parser

This commit is contained in:
2026-02-02 21:34:12 +00:00
parent 9212f95417
commit 7dd2d6c6b7

148
depaudit/parsers/java.py Normal file
View File

@@ -0,0 +1,148 @@
from __future__ import annotations
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Any
from depaudit.parsers import Parser, ParsedManifest, Dependency
class JavaParser(Parser):
language = "java"
def can_parse(self, file_path: Path) -> bool:
return file_path.name in ("pom.xml", "build.gradle", "build.gradle.kts")
def parse(self, file_path: Path) -> ParsedManifest:
manifest = ParsedManifest(
language=self.language,
file_path=file_path,
)
if file_path.name == "pom.xml":
self._parse_pom_xml(file_path, manifest)
elif file_path.name.startswith("build.gradle"):
self._parse_gradle(file_path, manifest)
return manifest
def _parse_pom_xml(self, file_path: Path, manifest: ParsedManifest) -> None:
tree = ET.parse(file_path)
root = tree.getroot()
ns = {"maven": "http://maven.apache.org/POM/4.0.0"}
artifact_id = root.find("maven:artifactId", ns)
if artifact_id is None:
artifact_id = root.find("artifactId")
group_id = root.find("maven:groupId", ns)
if group_id is None:
group_id = root.find("groupId")
version = root.find("maven:version", ns)
if version is None:
version = root.find("version")
if group_id is not None and artifact_id is not None:
manifest.project_name = group_id.text + "/" + artifact_id.text
else:
manifest.project_name = None
manifest.project_version = version.text if version is not None else None
self._parse_dependencies(root, ns, file_path, manifest)
self._parse_dependency_management(root, ns, file_path, manifest)
def _parse_dependencies(
self, root: ET.Element, ns: dict[str, str], file_path: Path, manifest: ParsedManifest
) -> None:
dependencies_elem = root.find("maven:dependencies", ns)
if dependencies_elem is None:
dependencies_elem = root.find("dependencies")
if dependencies_elem is None:
return
for dep in dependencies_elem.findall("maven:dependency", ns):
group_id = dep.find("maven:groupId", ns)
if group_id is None:
group_id = dep.find("groupId")
artifact_id = dep.find("maven:artifactId", ns)
if artifact_id is None:
artifact_id = dep.find("artifactId")
version = dep.find("maven:version", ns)
if version is None:
version = dep.find("version")
scope = dep.find("maven:scope", ns)
if scope is None:
scope = dep.find("scope")
if group_id is not None and artifact_id is not None:
name = str(group_id.text) + ":" + str(artifact_id.text)
ver = version.text if version is not None else ""
dev = scope is not None and scope.text in ["test", "provided"]
manifest.dependencies.append(
self._create_dependency(file_path, name, ver, dev=dev)
)
def _parse_dependency_management(
self, root: ET.Element, ns: dict[str, str], file_path: Path, manifest: ParsedManifest
) -> None:
dm_elem = root.find("maven:dependencyManagement", ns)
if dm_elem is None:
dm_elem = root.find("dependencyManagement")
if dm_elem is None:
return
deps_elem = dm_elem.find("maven:dependencies", ns)
if deps_elem is None:
deps_elem = dm_elem.find("dependencies")
if deps_elem is None:
return
for dep in deps_elem.findall("maven:dependency", ns):
group_id = dep.find("maven:groupId", ns)
artifact_id = dep.find("maven:artifactId", ns)
version = dep.find("maven:version", ns)
if group_id is not None and artifact_id is not None:
name = str(group_id.text) + ":" + str(artifact_id.text)
ver = version.text if version is not None else ""
manifest.dependencies.append(
self._create_dependency(file_path, name, ver)
)
def _parse_gradle(self, file_path: Path, manifest: ParsedManifest) -> None:
import re
content = file_path.read_text(encoding="utf-8")
name_match = self._extract_gradle_value(content, "name")
if name_match:
manifest.project_name = name_match
version_match = self._extract_gradle_value(content, "version")
if version_match:
manifest.project_version = version_match
dep_pattern = r"(?:implementation|api|compileOnly|runtimeOnly|testImplementation|testCompileOnly)\s*[\(\"']?\s*([^\s:\'\")]+)\s*:\s*([^\s:\'\")]+)\s*:\s*([^\s:\'\")]+)"
for match in re.finditer(dep_pattern, content):
group = match.group(1)
name = match.group(2)
version = match.group(4)
full_name = group + ":" + name
manifest.dependencies.append(
self._create_dependency(file_path, full_name, version)
)
def _extract_gradle_value(self, content: str, key: str) -> str | None:
import re
patterns = [
key + "\s*=\s*["']([^"\']+)["']",
key + "\s*=\s*([^\s]+)",
]
for pattern in patterns:
match = re.search(pattern, content)
if match:
return match.group(1).strip()
return None