diff --git a/src/cronparse/nlp.py b/src/cronparse/nlp.py new file mode 100644 index 0000000..1b657e3 --- /dev/null +++ b/src/cronparse/nlp.py @@ -0,0 +1,200 @@ +"""Natural language to cron expression conversion module.""" + +import re +from typing import Dict, Any, Optional +from datetime import datetime + + +DAYS = { + "sunday": 0, "sun": 0, + "monday": 1, "mon": 1, + "tuesday": 2, "tue": 2, "tues": 2, + "wednesday": 3, "wed": 3, + "thursday": 4, "thu": 4, "thur": 4, "thurs": 4, + "friday": 5, "fri": 5, + "saturday": 6, "sat": 6, +} + + +def text_to_cron(text: str) -> Dict[str, Any]: + """Convert natural language text to a cron expression. + + Args: + text: Natural language description of a schedule. + + Returns: + Dict with 'cron' and optionally 'description'. + + Raises: + ValueError: If the text cannot be parsed. + """ + text = text.lower().strip() + + patterns = [ + (r"every\s+(\d+)\s+minutes?", "every_n_minutes"), + (r"every\s+(\d+)\s+hours?", "every_n_hours"), + (r"every\s+(\d+)\s+days?", "every_n_days"), + (r"every\s+minute", "every_minute"), + (r"every\s+hour", "every_hour"), + (r"daily\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "daily_at"), + (r"daily\s+at\s+(\d{1,2})\s*(am|pm)?", "daily_at_simple"), + (r"every\s+day\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "daily_at"), + (r"every\s+day\s+at\s+(\d{1,2})\s*(am|pm)?", "daily_at_simple"), + (r"at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "at_time"), + (r"every\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tue|wed|thu|fri|sat|sun)\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "weekly_at"), + (r"on\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tue|wed|thu|fri|sat|sun)\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "weekly_at"), + (r"on\s+the\s+(\d{1,2})(?:st|nd|rd|th)?\s+of\s+every\s+month\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"), + (r"on\s+the\s+(\d{1,2})(?:st|nd|rd|th)?\s+of\s+each\s+month\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"), + (r"on\s+day\s+(\d{1,2})\s+of\s+every\s+month\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"), + (r"every\s+week\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "weekly_at_simple"), + (r"every\s+month\s+on\s+day\s+(\d{1,2})\s+at\s+(\d{1,2})[:.](\d{2})?\s*(am|pm)?", "monthly_at"), + ] + + for pattern, pattern_type in patterns: + match = re.search(pattern, text, re.IGNORECASE) + if match: + return parse_match(pattern_type, match, text) + + raise ValueError(f"Could not parse: '{text}'. Try patterns like 'every Monday at 9am' or 'daily at 14:30'") + + +def parse_match(pattern_type: str, match, original_text: str) -> Dict[str, Any]: + """Parse a regex match based on pattern type. + + Args: + pattern_type: The type of pattern matched. + match: The regex match object. + original_text: The original input text. + + Returns: + Dict with 'cron' and optionally 'description'. + """ + if pattern_type == "every_n_minutes": + minutes = match.group(1) + return { + "cron": f"*/{minutes} * * * *", + "description": f"Every {minutes} minutes" + } + + if pattern_type == "every_n_hours": + hours = match.group(1) + return { + "cron": f"0 */{hours} * * *", + "description": f"Every {hours} hours" + } + + if pattern_type == "every_n_days": + days = match.group(1) + return { + "cron": f"0 0 */{days} * *", + "description": f"Every {days} days" + } + + if pattern_type == "every_minute": + return { + "cron": "* * * * *", + "description": "Every minute" + } + + if pattern_type == "every_hour": + return { + "cron": "0 * * * *", + "description": "Every hour" + } + + if pattern_type == "daily_at": + hour = parse_hour(match.group(1), match.group(3)) + minute = match.group(2) or "0" + return { + "cron": f"{minute} {hour} * * *", + "description": f"Daily at {format_time(hour, minute)}" + } + + if pattern_type == "daily_at_simple": + hour = parse_hour(match.group(1), match.group(2)) + return { + "cron": f"0 {hour} * * *", + "description": f"Daily at {format_time(hour, '0')}" + } + + if pattern_type == "at_time": + hour = parse_hour(match.group(1), match.group(3)) + minute = match.group(2) or "0" + return { + "cron": f"{minute} {hour} * * *", + "description": f"Daily at {format_time(hour, minute)}" + } + + if pattern_type == "weekly_at": + day = DAYS.get(match.group(1).lower(), "*") + hour = parse_hour(match.group(2), match.group(4)) + minute = match.group(3) or "0" + day_name = match.group(1).title() + return { + "cron": f"{minute} {hour} * * {day}", + "description": f"{day_name} at {format_time(hour, minute)}" + } + + if pattern_type == "weekly_at_simple": + hour = parse_hour(match.group(1), match.group(2)) + minute = match.group(3) or "0" + return { + "cron": f"{minute} {hour} * * 0", + "description": f"Weekly on Sunday at {format_time(hour, minute)}" + } + + if pattern_type == "monthly_at": + day = match.group(1) + hour = parse_hour(match.group(2), match.group(4)) + minute = match.group(3) or "0" + return { + "cron": f"{minute} {hour} {day} * *", + "description": f"Monthly on day {day} at {format_time(hour, minute)}" + } + + raise ValueError(f"Unsupported pattern type: {pattern_type}") + + +def parse_hour(hour_str: str, ampm: Optional[str]) -> str: + """Parse hour string with AM/PM conversion. + + Args: + hour_str: Hour as string. + ampm: AM or PM suffix. + + Returns: + 24-hour format hour string. + """ + hour = int(hour_str) + if ampm: + ampm = ampm.lower() + if ampm == "am": + if hour == 12: + hour = 0 + elif ampm == "pm": + if hour != 12: + hour += 12 + return str(hour) + + +def format_time(hour: str, minute: str) -> str: + """Format time in human-readable format. + + Args: + hour: Hour in 24-hour format. + minute: Minute. + + Returns: + Formatted time string. + """ + h = int(hour) + m = int(minute) + ampm = "AM" + display_hour = h + if h >= 12: + ampm = "PM" + if h > 12: + display_hour = h - 12 + if display_hour == 0: + display_hour = 12 + return f"{display_hour}:{m:02d} {ampm}"