Add NLP, scheduler, generator, and describer modules
Some checks failed
CI / test (push) Has been cancelled

This commit is contained in:
2026-02-01 15:08:38 +00:00
parent c33bc1128f
commit bc76b19745

200
src/cronparse/nlp.py Normal file
View File

@@ -0,0 +1,200 @@
"""Natural language to cron expression conversion module."""
import re
from typing import Dict, Any, Optional
from datetime import datetime
DAYS = {
"sunday": 0, "sun": 0,
"monday": 1, "mon": 1,
"tuesday": 2, "tue": 2, "tues": 2,
"wednesday": 3, "wed": 3,
"thursday": 4, "thu": 4, "thur": 4, "thurs": 4,
"friday": 5, "fri": 5,
"saturday": 6, "sat": 6,
}
def text_to_cron(text: str) -> Dict[str, Any]:
"""Convert natural language text to a cron expression.
Args:
text: Natural language description of a schedule.
Returns:
Dict with 'cron' and optionally 'description'.
Raises:
ValueError: If the text cannot be parsed.
"""
text = text.lower().strip()
patterns = [
(r"every\s+(\d+)\s+minutes?", "every_n_minutes"),
(r"every\s+(\d+)\s+hours?", "every_n_hours"),
(r"every\s+(\d+)\s+days?", "every_n_days"),
(r"every\s+minute", "every_minute"),
(r"every\s+hour", "every_hour"),
(r"daily\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "daily_at"),
(r"daily\s+at\s+(\d{1,2})\s*(am|pm)?", "daily_at_simple"),
(r"every\s+day\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "daily_at"),
(r"every\s+day\s+at\s+(\d{1,2})\s*(am|pm)?", "daily_at_simple"),
(r"at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "at_time"),
(r"every\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tue|wed|thu|fri|sat|sun)\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "weekly_at"),
(r"on\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tue|wed|thu|fri|sat|sun)\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "weekly_at"),
(r"on\s+the\s+(\d{1,2})(?:st|nd|rd|th)?\s+of\s+every\s+month\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"),
(r"on\s+the\s+(\d{1,2})(?:st|nd|rd|th)?\s+of\s+each\s+month\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"),
(r"on\s+day\s+(\d{1,2})\s+of\s+every\s+month\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"),
(r"every\s+week\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "weekly_at_simple"),
(r"every\s+month\s+on\s+day\s+(\d{1,2})\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"),
]
for pattern, pattern_type in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
return parse_match(pattern_type, match, text)
raise ValueError(f"Could not parse: '{text}'. Try patterns like 'every Monday at 9am' or 'daily at 14:30'")
def parse_match(pattern_type: str, match, original_text: str) -> Dict[str, Any]:
"""Parse a regex match based on pattern type.
Args:
pattern_type: The type of pattern matched.
match: The regex match object.
original_text: The original input text.
Returns:
Dict with 'cron' and optionally 'description'.
"""
if pattern_type == "every_n_minutes":
minutes = match.group(1)
return {
"cron": f"*/{minutes} * * * *",
"description": f"Every {minutes} minutes"
}
if pattern_type == "every_n_hours":
hours = match.group(1)
return {
"cron": f"0 */{hours} * * *",
"description": f"Every {hours} hours"
}
if pattern_type == "every_n_days":
days = match.group(1)
return {
"cron": f"0 0 */{days} * *",
"description": f"Every {days} days"
}
if pattern_type == "every_minute":
return {
"cron": "* * * * *",
"description": "Every minute"
}
if pattern_type == "every_hour":
return {
"cron": "0 * * * *",
"description": "Every hour"
}
if pattern_type == "daily_at":
hour = parse_hour(match.group(1), match.group(3))
minute = match.group(2) or "0"
return {
"cron": f"{minute} {hour} * * *",
"description": f"Daily at {format_time(hour, minute)}"
}
if pattern_type == "daily_at_simple":
hour = parse_hour(match.group(1), match.group(2))
return {
"cron": f"0 {hour} * * *",
"description": f"Daily at {format_time(hour, '0')}"
}
if pattern_type == "at_time":
hour = parse_hour(match.group(1), match.group(3))
minute = match.group(2) or "0"
return {
"cron": f"{minute} {hour} * * *",
"description": f"Daily at {format_time(hour, minute)}"
}
if pattern_type == "weekly_at":
day = DAYS.get(match.group(1).lower(), "*")
hour = parse_hour(match.group(2), match.group(4))
minute = match.group(3) or "0"
day_name = match.group(1).title()
return {
"cron": f"{minute} {hour} * * {day}",
"description": f"{day_name} at {format_time(hour, minute)}"
}
if pattern_type == "weekly_at_simple":
hour = parse_hour(match.group(1), match.group(2))
minute = match.group(3) or "0"
return {
"cron": f"{minute} {hour} * * 0",
"description": f"Weekly on Sunday at {format_time(hour, minute)}"
}
if pattern_type == "monthly_at":
day = match.group(1)
hour = parse_hour(match.group(2), match.group(4))
minute = match.group(3) or "0"
return {
"cron": f"{minute} {hour} {day} * *",
"description": f"Monthly on day {day} at {format_time(hour, minute)}"
}
raise ValueError(f"Unsupported pattern type: {pattern_type}")
def parse_hour(hour_str: str, ampm: Optional[str]) -> str:
"""Parse hour string with AM/PM conversion.
Args:
hour_str: Hour as string.
ampm: AM or PM suffix.
Returns:
24-hour format hour string.
"""
hour = int(hour_str)
if ampm:
ampm = ampm.lower()
if ampm == "am":
if hour == 12:
hour = 0
elif ampm == "pm":
if hour != 12:
hour += 12
return str(hour)
def format_time(hour: str, minute: str) -> str:
"""Format time in human-readable format.
Args:
hour: Hour in 24-hour format.
minute: Minute.
Returns:
Formatted time string.
"""
h = int(hour)
m = int(minute)
ampm = "AM"
display_hour = h
if h >= 12:
ampm = "PM"
if h > 12:
display_hour = h - 12
if display_hour == 0:
display_hour = 12
return f"{display_hour}:{m:02d} {ampm}"