Add source code files

This commit is contained in:
2026-02-01 02:55:40 +00:00
parent 5ebf92ac70
commit a8b62c9ab3

View File

@@ -0,0 +1,71 @@
"""Language detection module."""
from pathlib import Path
from typing import Optional
from codeguard.core.models import Language
class LanguageDetector:
EXTENSION_MAP = {
".py": Language.PYTHON,
".pyw": Language.PYTHON,
".js": Language.JAVASCRIPT,
".jsx": Language.JAVASCRIPT,
".ts": Language.TYPESCRIPT,
".tsx": Language.TYPESCRIPT,
".go": Language.GO,
".rs": Language.RUST,
}
SHEBANG_PATTERNS = {
"python": [b"python", b"python3", b"python2"],
"bash": [b"bash", b"sh"],
}
def __init__(self):
pass
def detect(self, file_path: Path) -> Optional[Language]:
ext = file_path.suffix.lower()
return self.EXTENSION_MAP.get(ext)
def detect_from_content(self, content: bytes) -> Optional[Language]:
if not content:
return None
first_line = content.split(b"\n")[0].lower()
for lang, patterns in self.SHEBANG_PATTERNS.items():
for pattern in patterns:
if pattern in first_line:
if lang == "python":
return Language.PYTHON
return None
def is_supported(self, file_path: Path) -> bool:
return self.detect(file_path) is not None
def get_language_by_name(self, name: str) -> Optional[Language]:
name = name.lower()
for lang in Language:
if lang.value == name:
return lang
return None
def chunk_code(self, code: str, max_tokens: int = 8000) -> list[str]:
lines = code.split("\n")
chunks = []
current_chunk = []
for line in lines:
current_chunk.append(line)
if len("".join(current_chunk)) > max_tokens:
if current_chunk:
chunks.append("\n".join(current_chunk[:-1]))
current_chunk = [line]
if current_chunk:
chunks.append("\n".join(current_chunk))
return chunks if chunks else [code]