Add NLP, scheduler, generator, and describer modules
Some checks failed
CI / test (push) Has been cancelled
Some checks failed
CI / test (push) Has been cancelled
This commit is contained in:
200
src/cronparse/nlp.py
Normal file
200
src/cronparse/nlp.py
Normal file
@@ -0,0 +1,200 @@
|
||||
"""Natural language to cron expression conversion module."""
|
||||
|
||||
import re
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
DAYS = {
|
||||
"sunday": 0, "sun": 0,
|
||||
"monday": 1, "mon": 1,
|
||||
"tuesday": 2, "tue": 2, "tues": 2,
|
||||
"wednesday": 3, "wed": 3,
|
||||
"thursday": 4, "thu": 4, "thur": 4, "thurs": 4,
|
||||
"friday": 5, "fri": 5,
|
||||
"saturday": 6, "sat": 6,
|
||||
}
|
||||
|
||||
|
||||
def text_to_cron(text: str) -> Dict[str, Any]:
|
||||
"""Convert natural language text to a cron expression.
|
||||
|
||||
Args:
|
||||
text: Natural language description of a schedule.
|
||||
|
||||
Returns:
|
||||
Dict with 'cron' and optionally 'description'.
|
||||
|
||||
Raises:
|
||||
ValueError: If the text cannot be parsed.
|
||||
"""
|
||||
text = text.lower().strip()
|
||||
|
||||
patterns = [
|
||||
(r"every\s+(\d+)\s+minutes?", "every_n_minutes"),
|
||||
(r"every\s+(\d+)\s+hours?", "every_n_hours"),
|
||||
(r"every\s+(\d+)\s+days?", "every_n_days"),
|
||||
(r"every\s+minute", "every_minute"),
|
||||
(r"every\s+hour", "every_hour"),
|
||||
(r"daily\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "daily_at"),
|
||||
(r"daily\s+at\s+(\d{1,2})\s*(am|pm)?", "daily_at_simple"),
|
||||
(r"every\s+day\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "daily_at"),
|
||||
(r"every\s+day\s+at\s+(\d{1,2})\s*(am|pm)?", "daily_at_simple"),
|
||||
(r"at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "at_time"),
|
||||
(r"every\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tue|wed|thu|fri|sat|sun)\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "weekly_at"),
|
||||
(r"on\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tue|wed|thu|fri|sat|sun)\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "weekly_at"),
|
||||
(r"on\s+the\s+(\d{1,2})(?:st|nd|rd|th)?\s+of\s+every\s+month\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"),
|
||||
(r"on\s+the\s+(\d{1,2})(?:st|nd|rd|th)?\s+of\s+each\s+month\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"),
|
||||
(r"on\s+day\s+(\d{1,2})\s+of\s+every\s+month\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"),
|
||||
(r"every\s+week\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "weekly_at_simple"),
|
||||
(r"every\s+month\s+on\s+day\s+(\d{1,2})\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"),
|
||||
]
|
||||
|
||||
for pattern, pattern_type in patterns:
|
||||
match = re.search(pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
return parse_match(pattern_type, match, text)
|
||||
|
||||
raise ValueError(f"Could not parse: '{text}'. Try patterns like 'every Monday at 9am' or 'daily at 14:30'")
|
||||
|
||||
|
||||
def parse_match(pattern_type: str, match, original_text: str) -> Dict[str, Any]:
|
||||
"""Parse a regex match based on pattern type.
|
||||
|
||||
Args:
|
||||
pattern_type: The type of pattern matched.
|
||||
match: The regex match object.
|
||||
original_text: The original input text.
|
||||
|
||||
Returns:
|
||||
Dict with 'cron' and optionally 'description'.
|
||||
"""
|
||||
if pattern_type == "every_n_minutes":
|
||||
minutes = match.group(1)
|
||||
return {
|
||||
"cron": f"*/{minutes} * * * *",
|
||||
"description": f"Every {minutes} minutes"
|
||||
}
|
||||
|
||||
if pattern_type == "every_n_hours":
|
||||
hours = match.group(1)
|
||||
return {
|
||||
"cron": f"0 */{hours} * * *",
|
||||
"description": f"Every {hours} hours"
|
||||
}
|
||||
|
||||
if pattern_type == "every_n_days":
|
||||
days = match.group(1)
|
||||
return {
|
||||
"cron": f"0 0 */{days} * *",
|
||||
"description": f"Every {days} days"
|
||||
}
|
||||
|
||||
if pattern_type == "every_minute":
|
||||
return {
|
||||
"cron": "* * * * *",
|
||||
"description": "Every minute"
|
||||
}
|
||||
|
||||
if pattern_type == "every_hour":
|
||||
return {
|
||||
"cron": "0 * * * *",
|
||||
"description": "Every hour"
|
||||
}
|
||||
|
||||
if pattern_type == "daily_at":
|
||||
hour = parse_hour(match.group(1), match.group(3))
|
||||
minute = match.group(2) or "0"
|
||||
return {
|
||||
"cron": f"{minute} {hour} * * *",
|
||||
"description": f"Daily at {format_time(hour, minute)}"
|
||||
}
|
||||
|
||||
if pattern_type == "daily_at_simple":
|
||||
hour = parse_hour(match.group(1), match.group(2))
|
||||
return {
|
||||
"cron": f"0 {hour} * * *",
|
||||
"description": f"Daily at {format_time(hour, '0')}"
|
||||
}
|
||||
|
||||
if pattern_type == "at_time":
|
||||
hour = parse_hour(match.group(1), match.group(3))
|
||||
minute = match.group(2) or "0"
|
||||
return {
|
||||
"cron": f"{minute} {hour} * * *",
|
||||
"description": f"Daily at {format_time(hour, minute)}"
|
||||
}
|
||||
|
||||
if pattern_type == "weekly_at":
|
||||
day = DAYS.get(match.group(1).lower(), "*")
|
||||
hour = parse_hour(match.group(2), match.group(4))
|
||||
minute = match.group(3) or "0"
|
||||
day_name = match.group(1).title()
|
||||
return {
|
||||
"cron": f"{minute} {hour} * * {day}",
|
||||
"description": f"{day_name} at {format_time(hour, minute)}"
|
||||
}
|
||||
|
||||
if pattern_type == "weekly_at_simple":
|
||||
hour = parse_hour(match.group(1), match.group(2))
|
||||
minute = match.group(3) or "0"
|
||||
return {
|
||||
"cron": f"{minute} {hour} * * 0",
|
||||
"description": f"Weekly on Sunday at {format_time(hour, minute)}"
|
||||
}
|
||||
|
||||
if pattern_type == "monthly_at":
|
||||
day = match.group(1)
|
||||
hour = parse_hour(match.group(2), match.group(4))
|
||||
minute = match.group(3) or "0"
|
||||
return {
|
||||
"cron": f"{minute} {hour} {day} * *",
|
||||
"description": f"Monthly on day {day} at {format_time(hour, minute)}"
|
||||
}
|
||||
|
||||
raise ValueError(f"Unsupported pattern type: {pattern_type}")
|
||||
|
||||
|
||||
def parse_hour(hour_str: str, ampm: Optional[str]) -> str:
|
||||
"""Parse hour string with AM/PM conversion.
|
||||
|
||||
Args:
|
||||
hour_str: Hour as string.
|
||||
ampm: AM or PM suffix.
|
||||
|
||||
Returns:
|
||||
24-hour format hour string.
|
||||
"""
|
||||
hour = int(hour_str)
|
||||
if ampm:
|
||||
ampm = ampm.lower()
|
||||
if ampm == "am":
|
||||
if hour == 12:
|
||||
hour = 0
|
||||
elif ampm == "pm":
|
||||
if hour != 12:
|
||||
hour += 12
|
||||
return str(hour)
|
||||
|
||||
|
||||
def format_time(hour: str, minute: str) -> str:
|
||||
"""Format time in human-readable format.
|
||||
|
||||
Args:
|
||||
hour: Hour in 24-hour format.
|
||||
minute: Minute.
|
||||
|
||||
Returns:
|
||||
Formatted time string.
|
||||
"""
|
||||
h = int(hour)
|
||||
m = int(minute)
|
||||
ampm = "AM"
|
||||
display_hour = h
|
||||
if h >= 12:
|
||||
ampm = "PM"
|
||||
if h > 12:
|
||||
display_hour = h - 12
|
||||
if display_hour == 0:
|
||||
display_hour = 12
|
||||
return f"{display_hour}:{m:02d} {ampm}"
|
||||
Reference in New Issue
Block a user