82 lines
2.2 KiB
Python
82 lines
2.2 KiB
Python
"""Base indexer interface for documentation parsing."""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from pathlib import Path
|
|
from typing import Generator, List
|
|
|
|
from local_api_docs_search.models.document import Document, SourceType
|
|
|
|
|
|
class BaseIndexer(ABC):
|
|
"""Abstract base class for document indexers."""
|
|
|
|
source_type: SourceType
|
|
|
|
@abstractmethod
|
|
def index(self, path: Path, recursive: bool = False) -> List[Document]:
|
|
"""Index documents from the given path.
|
|
|
|
Args:
|
|
path: Path to file or directory to index
|
|
recursive: Whether to search directories recursively
|
|
|
|
Returns:
|
|
List of indexed Document objects
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_documents(self) -> List[Document]:
|
|
"""Get all indexed documents.
|
|
|
|
Returns:
|
|
List of Document objects
|
|
"""
|
|
pass
|
|
|
|
def _find_files(self, path: Path, recursive: bool = False) -> Generator[Path, None, None]:
|
|
"""Find files to index in the given path.
|
|
|
|
Args:
|
|
path: Path to file or directory
|
|
recursive: Whether to search recursively
|
|
|
|
Yields:
|
|
Path objects for each file found
|
|
"""
|
|
if path.is_file():
|
|
if self._is_supported_file(path):
|
|
yield path
|
|
elif path.is_dir():
|
|
pattern = "**/*" if recursive else "*"
|
|
for file_path in path.glob(pattern):
|
|
if file_path.is_file() and self._is_supported_file(file_path):
|
|
yield file_path
|
|
|
|
@abstractmethod
|
|
def _is_supported_file(self, path: Path) -> bool:
|
|
"""Check if the file is supported by this indexer.
|
|
|
|
Args:
|
|
path: Path to the file
|
|
|
|
Returns:
|
|
True if the file is supported
|
|
"""
|
|
pass
|
|
|
|
def _generate_id(self, file_path: Path, suffix: str = "") -> str:
|
|
"""Generate a unique document ID.
|
|
|
|
Args:
|
|
file_path: Path to the source file
|
|
suffix: Optional suffix to add to the ID
|
|
|
|
Returns:
|
|
Unique document ID string
|
|
"""
|
|
stem = file_path.stem.replace(" ", "_").lower()
|
|
if suffix:
|
|
return f"{stem}_{suffix}"
|
|
return stem
|