diff --git a/config_convert/utils/type_inference.py b/config_convert/utils/type_inference.py new file mode 100644 index 0000000..4c1afba --- /dev/null +++ b/config_convert/utils/type_inference.py @@ -0,0 +1,104 @@ +"""Smart type inference utilities for converting string values to Python types.""" + +import re +from typing import Any, Optional, Union + + +BOOLEAN_TRUE_VALUES = {"true", "yes", "on", "1"} +BOOLEAN_FALSE_VALUES = {"false", "no", "off", "0"} + + +def detect_type(value: str) -> str: + """Detect the type of a string value.""" + value_lower = value.lower().strip() + + if value_lower == "null" or value_lower == "~" or value_lower == "": + return "null" + + if re.match(r"^-?\d+$", value): + return "integer" + + if re.match(r"^-?\d+\.\d+$", value): + return "float" + + if re.match(r"^-?\d+\.?\d*[eE][+-]?\d+$", value): + return "float" + + if value_lower in BOOLEAN_TRUE_VALUES or value_lower in BOOLEAN_FALSE_VALUES: + return "boolean" + + if value.startswith("[") and value.endswith("]"): + return "array" + + return "string" + + +def convert_value(value: str) -> Any: + """Convert a string value to its inferred Python type.""" + detected_type = detect_type(value) + + if detected_type == "boolean": + return value.lower() in BOOLEAN_TRUE_VALUES + elif detected_type == "null": + return None + elif detected_type == "integer": + return int(value) + elif detected_type == "float": + return float(value) + elif detected_type == "array": + return parse_array(value) + else: + return value + + +def parse_array(value: str) -> list: + """Parse a string representation of an array.""" + content = value.strip()[1:-1] + if not content: + return [] + + items = [] + current = "" + in_string = False + depth = 0 + + for char in content: + if char == '"' and not in_string: + in_string = True + current += char + elif char == '"' and in_string: + in_string = False + current += char + elif char == "," and not in_string and depth == 0: + items.append(current.strip()) + current = "" + else: + if char == "[": + depth += 1 + elif char == "]": + depth -= 1 + current += char + + if current: + items.append(current.strip()) + + result = [] + for item in items: + if item.startswith('"') and item.endswith('"'): + result.append(item[1:-1]) + else: + result.append(convert_value(item)) + + return result + + +def infer_types(data: Any) -> Any: + """Recursively infer types in nested structures.""" + if isinstance(data, dict): + return {key: infer_types(value) for key, value in data.items()} + elif isinstance(data, list): + return [infer_types(item) for item in data] + elif isinstance(data, str): + return convert_value(data) + else: + return data