Add parsers: JSON, Syslog, Apache, and factory
Some checks failed
Some checks failed
This commit is contained in:
207
loglens/parsers/syslog_parser.py
Normal file
207
loglens/parsers/syslog_parser.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""Syslog parser for RFC 3164 and RFC 5424 formats."""
|
||||
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Match, Optional
|
||||
from dateutil import parser as date_parser
|
||||
|
||||
from loglens.parsers.base import LogParser, ParsedLogEntry
|
||||
|
||||
|
||||
class SyslogParser(LogParser):
|
||||
"""Parser for syslog format (RFC 3164 and RFC 5424)."""
|
||||
|
||||
format_name = "syslog"
|
||||
|
||||
SYSLOG_RFC3164_PATTERN = re.compile(
|
||||
r'^(?P<month>[A-Z][a-z]{2})\s+(?P<day>\d{1,2})\s+(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})\s+(?P<hostname>[\w.-]+)\s+(?P<process>[\w\[\]]+):\s*(?P<message>.*)$'
|
||||
)
|
||||
|
||||
SYSLOG_RFC5424_PATTERN = re.compile(
|
||||
r'^(?P<pri><\d+>)?(?P<version>\d+)\s+(?P<timestamp>\S+)\s+(?P<hostname>\S+)\s+(?P<process>\S+)\s+(?P<pid>\S+)\s+(?P<msgid>\S+)?\s*(?P<struct_data>-)\s*(?P<message>.*)$'
|
||||
)
|
||||
|
||||
PRIORITY_MAP = {
|
||||
0: "emergency",
|
||||
1: "alert",
|
||||
2: "critical",
|
||||
3: "error",
|
||||
4: "warning",
|
||||
5: "notice",
|
||||
6: "info",
|
||||
7: "debug"
|
||||
}
|
||||
|
||||
FACILITY_MAP = {
|
||||
0: "kernel",
|
||||
1: "user",
|
||||
2: "mail",
|
||||
3: "daemon",
|
||||
4: "auth",
|
||||
5: "syslog",
|
||||
6: "lpr",
|
||||
7: "news",
|
||||
8: "uucp",
|
||||
9: "clock",
|
||||
10: "authpriv",
|
||||
11: "ftp",
|
||||
12: "ntp",
|
||||
13: "logaudit",
|
||||
14: "logalert",
|
||||
15: "cron",
|
||||
16: "local0",
|
||||
17: "local1",
|
||||
18: "local2",
|
||||
19: "local3",
|
||||
20: "local4",
|
||||
21: "local5",
|
||||
22: "local6",
|
||||
23: "local7"
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.month_map = {
|
||||
"Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6,
|
||||
"Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12
|
||||
}
|
||||
|
||||
def can_parse(self, line: str) -> bool:
|
||||
"""Check if line matches syslog format."""
|
||||
line = line.strip()
|
||||
if not line:
|
||||
return False
|
||||
|
||||
if line.startswith("<") and ">" in line:
|
||||
parts = line.split(">", 1)
|
||||
if parts[0][1:].isdigit():
|
||||
return True
|
||||
|
||||
match = self.SYSLOG_RFC3164_PATTERN.match(line)
|
||||
if match:
|
||||
month = match.group("month")
|
||||
if month in self.month_map:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def parse(self, line: str, line_number: int = 0) -> Optional[ParsedLogEntry]:
|
||||
"""Parse a syslog line."""
|
||||
line = line.strip()
|
||||
if not line:
|
||||
return None
|
||||
|
||||
entry = ParsedLogEntry(
|
||||
raw_line=line,
|
||||
line_number=line_number
|
||||
)
|
||||
|
||||
if line.startswith("<"):
|
||||
parsed = self._parse_rfc5424(line)
|
||||
else:
|
||||
parsed = self._parse_rfc3164(line)
|
||||
|
||||
if parsed:
|
||||
entry.timestamp = parsed.get("timestamp")
|
||||
entry.host = parsed.get("hostname")
|
||||
entry.level = parsed.get("level")
|
||||
entry.message = parsed.get("message", "")
|
||||
entry.facility = parsed.get("facility")
|
||||
entry.logger = parsed.get("process")
|
||||
|
||||
return entry
|
||||
|
||||
def _parse_rfc3164(self, line: str) -> Optional[Dict[str, Any]]:
|
||||
"""Parse RFC 3164 syslog format."""
|
||||
match = self.SYSLOG_RFC3164_PATTERN.match(line)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
month = match.group("month")
|
||||
day = int(match.group("day"))
|
||||
hour = int(match.group("hour"))
|
||||
minute = int(match.group("minute"))
|
||||
second = int(match.group("second"))
|
||||
hostname = match.group("hostname")
|
||||
process = match.group("process")
|
||||
message = match.group("message")
|
||||
|
||||
current_year = datetime.now().year
|
||||
timestamp = datetime(
|
||||
current_year, self.month_map[month], day, hour, minute, second
|
||||
)
|
||||
|
||||
level = self._infer_level(message)
|
||||
|
||||
return {
|
||||
"timestamp": timestamp,
|
||||
"hostname": hostname,
|
||||
"process": process,
|
||||
"message": message,
|
||||
"level": level
|
||||
}
|
||||
|
||||
def _parse_rfc5424(self, line: str) -> Optional[Dict[str, Any]]:
|
||||
"""Parse RFC 5424 syslog format."""
|
||||
match = self.SYSLOG_RFC5424_PATTERN.match(line)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
raw_pri = match.group("pri")
|
||||
version = match.group("version")
|
||||
timestamp_str = match.group("timestamp")
|
||||
hostname = match.group("hostname")
|
||||
process = match.group("process")
|
||||
pid = match.group("pid")
|
||||
msgid = match.group("msgid")
|
||||
struct_data = match.group("struct_data")
|
||||
message = match.group("message")
|
||||
|
||||
try:
|
||||
timestamp = date_parser.isoparse(timestamp_str)
|
||||
except ValueError:
|
||||
timestamp = datetime.now()
|
||||
|
||||
priority = None
|
||||
facility = None
|
||||
level = None
|
||||
if raw_pri:
|
||||
pri_num = int(raw_pri[1:-1])
|
||||
priority = pri_num & 0x07
|
||||
facility_num = pri_num >> 3
|
||||
facility = self.FACILITY_MAP.get(facility_num)
|
||||
level = self.PRIORITY_MAP.get(priority)
|
||||
|
||||
if not level:
|
||||
level = self._infer_level(message)
|
||||
|
||||
return {
|
||||
"timestamp": timestamp,
|
||||
"hostname": hostname,
|
||||
"process": f"{process}[{pid}]" if pid else process,
|
||||
"message": message,
|
||||
"level": level,
|
||||
"facility": facility
|
||||
}
|
||||
|
||||
def _infer_level(self, message: str) -> Optional[str]:
|
||||
"""Infer log level from message content."""
|
||||
message_lower = message.lower()
|
||||
|
||||
if any(kw in message_lower for kw in ["emerg", "panic", "critical system"]):
|
||||
return "emergency"
|
||||
elif any(kw in message_lower for kw in ["alert", "immediate action"]):
|
||||
return "alert"
|
||||
elif any(kw in message_lower for kw in ["critical", "fatal", "segfault"]):
|
||||
return "critical"
|
||||
elif any(kw in message_lower for kw in ["error", "exception", "failed", "failure"]):
|
||||
return "error"
|
||||
elif any(kw in message_lower for kw in ["warning", "warn", "deprecation"]):
|
||||
return "warning"
|
||||
elif any(kw in message_lower for kw in ["notice"]):
|
||||
return "notice"
|
||||
elif any(kw in message_lower for kw in ["info", "information"]):
|
||||
return "info"
|
||||
elif any(kw in message_lower for kw in ["debug", "trace"]):
|
||||
return "debug"
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user