diff --git a/tests/unit/test_language_detector.py b/tests/unit/test_language_detector.py new file mode 100644 index 0000000..553d998 --- /dev/null +++ b/tests/unit/test_language_detector.py @@ -0,0 +1,168 @@ +"""Unit tests for language detection module.""" + +from pathlib import Path + +from codesnap.core.language_detector import ( + EXTENSION_TO_LANGUAGE, + detect_language, + detect_language_by_extension, + detect_language_by_shebang, + get_language_info, + get_supported_extensions, + get_supported_languages, +) + + +class TestDetectLanguageByExtension: + """Tests for extension-based language detection.""" + + def test_python_extension_py(self): + assert detect_language_by_extension(Path("test.py")) == "python" + + def test_python_extension_pyi(self): + assert detect_language_by_extension(Path("test.pyi")) == "python" + + def test_javascript_extension_js(self): + assert detect_language_by_extension(Path("test.js")) == "javascript" + + def test_typescript_extension_ts(self): + assert detect_language_by_extension(Path("test.ts")) == "typescript" + + def test_go_extension(self): + assert detect_language_by_extension(Path("main.go")) == "go" + + def test_rust_extension(self): + assert detect_language_by_extension(Path("main.rs")) == "rust" + + def test_java_extension(self): + assert detect_language_by_extension(Path("Main.java")) == "java" + + def test_cpp_extension(self): + assert detect_language_by_extension(Path("test.cpp")) == "cpp" + assert detect_language_by_extension(Path("test.hpp")) == "cpp" + + def test_ruby_extension(self): + assert detect_language_by_extension(Path("script.rb")) == "ruby" + + def test_php_extension(self): + assert detect_language_by_extension(Path("script.php")) == "php" + + def test_unknown_extension(self): + assert detect_language_by_extension(Path("test.xyz")) is None + + def test_case_insensitive(self): + assert detect_language_by_extension(Path("test.PY")) == "python" + assert detect_language_by_extension(Path("test.JS")) == "javascript" + + +class TestDetectLanguageByShebang: + """Tests for shebang-based language detection.""" + + def test_python_shebang(self): + content = "#!/usr/bin/env python3\nprint('hello')" + assert detect_language_by_shebang(content) == "python" + + def test_python_shebang_alt(self): + content = "#!/usr/bin/env python\nprint('hello')" + assert detect_language_by_shebang(content) == "python" + + def test_node_shebang(self): + content = "#!/usr/bin/env node\nconsole.log('hello')" + assert detect_language_by_shebang(content) == "javascript" + + def test_ruby_shebang(self): + content = "#!/usr/bin/env ruby\nputs 'hello'" + assert detect_language_by_shebang(content) == "ruby" + + def test_php_shebang(self): + content = "#!/usr/bin/env php\necho 'hello';" + assert detect_language_by_shebang(content) == "php" + + def test_no_shebang(self): + content = "print('hello')" + assert detect_language_by_shebang(content) is None + + def test_empty_content(self): + assert detect_language_by_shebang("") is None + + +class TestDetectLanguage: + """Tests for combined language detection.""" + + def test_detection_by_extension(self): + assert detect_language(Path("test.py")) == "python" + assert detect_language(Path("test.js")) == "javascript" + + def test_detection_fallback_to_shebang(self): + file_path = Path("script") + assert detect_language(file_path, "#!/usr/bin/env python") == "python" + assert detect_language(file_path, "#!/usr/bin/env node") == "javascript" + + def test_unknown_file_no_content(self): + assert detect_language(Path("unknown.xyz")) is None + + +class TestGetLanguageInfo: + """Tests for language info retrieval.""" + + def test_get_python_info(self): + info = get_language_info("python") + assert info is not None + assert info.name == "python" + assert ".py" in info.extensions + + def test_get_unknown_language(self): + info = get_language_info("unknown") + assert info is None + + +class TestGetSupportedExtensions: + """Tests for supported extensions.""" + + def test_returns_set(self): + extensions = get_supported_extensions() + assert isinstance(extensions, set) + + def test_includes_common_extensions(self): + extensions = get_supported_extensions() + assert ".py" in extensions + assert ".js" in extensions + assert ".ts" in extensions + assert ".go" in extensions + + +class TestGetSupportedLanguages: + """Tests for supported programming languages.""" + + def test_returns_list(self): + languages = get_supported_languages() + assert isinstance(languages, list) + + def test_includes_main_languages(self): + languages = get_supported_languages() + assert "python" in languages + assert "javascript" in languages + assert "typescript" in languages + assert "go" in languages + assert "rust" in languages + assert "java" in languages + + def test_excludes_config_formats(self): + languages = get_supported_languages() + assert "json" not in languages + assert "yaml" not in languages + assert "markdown" not in languages + + +class TestExtensionToLanguage: + """Tests for extension to language mapping.""" + + def test_mapping_completeness(self): + for _ext, lang in EXTENSION_TO_LANGUAGE.items(): + assert lang in ["python", "javascript", "typescript", "go", "rust", + "java", "c", "cpp", "ruby", "php", "shell", + "json", "yaml", "markdown"] + + def test_no_duplicate_extensions(self): + extensions = list(EXTENSION_TO_LANGUAGE.keys()) + assert len(extensions) == len(set(extensions))