Add JSON, syslog, and Apache parsers
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / test (3.9) (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-02-02 10:06:58 +00:00
parent 4f6f5e0370
commit f2ca3181ee

View File

@@ -1,70 +1,190 @@
import json """JSON log parser."""
import re
from datetime import datetime from datetime import datetime
from typing import Any, Optional from typing import Any, Optional
from loglens.parsers.base import BaseParser, LogFormat, ParsedEntry import orjson
from loglens.parsers.base import LogParser, ParsedLogEntry
class JSONParser(BaseParser): class JSONParser(LogParser):
"""Parser for JSON log formats.""" """Parser for JSON-formatted logs."""
def get_format(self) -> LogFormat: format_name = "json"
return LogFormat.JSON
def parse(self, line: str) -> Optional[ParsedEntry]: def __init__(self):
self.timestamp_fields = [
"@timestamp",
"timestamp",
"time",
"date",
"datetime",
"created_at",
"updated_at",
"log_time",
"event_time",
]
self.level_fields = ["level", "severity", "log_level", "priority", "levelname"]
self.message_fields = ["message", "msg", "log", "text", "content"]
self.logger_fields = ["logger", "logger_name", "name", "source"]
def can_parse(self, line: str) -> bool:
"""Check if line is valid JSON."""
line = line.strip()
if not line:
return False
if line.startswith("[") or line.startswith("{"):
try:
orjson.loads(line)
return True
except orjson.JSONDecodeError:
pass
return False
def parse(self, line: str, line_number: int = 0) -> Optional[ParsedLogEntry]:
"""Parse a JSON log line.""" """Parse a JSON log line."""
line = line.strip()
if not line:
return None
try: try:
data = json.loads(line.strip()) data = orjson.loads(line)
except json.JSONDecodeError: except orjson.JSONDecodeError as e:
return None return ParsedLogEntry(
raw_line=line,
message=f"JSON parse error: {str(e)}",
line_number=line_number,
severity="error",
)
if isinstance(data, list): entry = ParsedLogEntry(raw_line=line, line_number=line_number)
return None
if not isinstance(data, dict): if isinstance(data, dict):
return None entry.timestamp = self._extract_timestamp(data)
entry.level = self._extract_field(data, self.level_fields)
entry.message = self._extract_field(data, self.message_fields)
entry.logger = self._extract_field(data, self.logger_fields)
entry.extra = {
k: v
for k, v in data.items()
if k not in self.timestamp_fields
and k not in self.level_fields
and k not in self.message_fields
and k not in self.logger_fields
and not k.startswith("_")
}
elif isinstance(data, list):
entry.message = str(data)
entry.extra = {"array_length": len(data)}
timestamp = self._extract_timestamp(data) return entry
level = self._extract_level(data)
message = self._extract_message(data)
return ParsedEntry( def _extract_timestamp(self, data: dict[str, Any]) -> Optional[datetime]:
raw_line=line.strip(), """Extract timestamp from data dict."""
format=self.get_format(), for field in self.timestamp_fields:
timestamp=timestamp,
level=level,
message=message,
metadata=data,
)
def _extract_timestamp(self, data: dict[str, Any]) -> Optional[str]:
timestamp_fields = ["timestamp", "time", "@timestamp", "date", "created_at"]
for field in timestamp_fields:
if field in data: if field in data:
value = data[field]
if isinstance(value, (int, float)):
return datetime.fromtimestamp(value)
elif isinstance(value, str):
try:
return datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
pass
return None
def _extract_field(self, data: dict[str, Any], fields: list[str]) -> Optional[str]:
"""Extract first matching field from data."""
for field in fields:
if field in data and data[field] is not None:
value = data[field] value = data[field]
if isinstance(value, str): if isinstance(value, str):
return value return value
elif isinstance(value, (int, float)): return str(value)
return datetime.fromtimestamp(value).isoformat()
return None return None
def _extract_level(self, data: dict[str, Any]) -> Optional[str]: def parse_batch(self, lines: list[str]) -> list[ParsedLogEntry]:
level_fields = ["level", "severity", "log_level", "levelname", "status"] """Parse multiple lines, handling multi-line JSON."""
for field in level_fields: results = []
if field in data: buffer = ""
value = data[field] line_number = 0
if isinstance(value, str):
return value.lower()
return None
def _extract_message(self, data: dict[str, Any]) -> str: for line in lines:
message_fields = ["message", "msg", "text", "content", "error", "reason"] line_number += 1
for field in message_fields: line_stripped = line.strip()
if field in data:
value = data[field] if not line_stripped:
if isinstance(value, str): continue
return value
elif isinstance(value, dict): if buffer:
return json.dumps(value) buffer += line_stripped
return str(data) else:
buffer = line_stripped
try:
data = orjson.loads(buffer)
entry = self._create_entry_from_data(data, line, line_number)
results.append(entry)
buffer = ""
except orjson.JSONDecodeError:
if line_stripped.startswith("{") or line_stripped.startswith("["):
if line_stripped.endswith("}") or line_stripped.endswith("]"):
results.append(
ParsedLogEntry(
raw_line=line,
message="Invalid JSON",
line_number=line_number,
severity="error",
)
)
buffer = ""
elif buffer.endswith("}") or buffer.endswith("]"):
try:
data = orjson.loads(buffer)
entry = self._create_entry_from_data(data, buffer, line_number)
results.append(entry)
except orjson.JSONDecodeError:
results.append(
ParsedLogEntry(
raw_line=buffer,
message="Invalid JSON",
line_number=line_number,
severity="error",
)
)
buffer = ""
elif len(buffer) > 10000:
results.append(
ParsedLogEntry(
raw_line=buffer[:100] + "...",
message="JSON too large to parse",
line_number=line_number,
severity="error",
)
)
buffer = ""
return results
def _create_entry_from_data(self, data: Any, raw_line: str, line_number: int) -> ParsedLogEntry:
"""Create ParsedLogEntry from parsed JSON data."""
entry = ParsedLogEntry(raw_line=raw_line, line_number=line_number)
if isinstance(data, dict):
entry.timestamp = self._extract_timestamp(data)
entry.level = self._extract_field(data, self.level_fields)
entry.message = self._extract_field(data, self.message_fields)
entry.logger = self._extract_field(data, self.logger_fields)
entry.extra = {
k: v
for k, v in data.items()
if k not in self.timestamp_fields
and k not in self.level_fields
and k not in self.message_fields
and k not in self.logger_fields
}
else:
entry.message = str(data)
return entry