From dc534716af405a48fe7defcc7dbfa4508cb24f70 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Sun, 22 Mar 2026 19:45:34 +0000 Subject: [PATCH] Initial upload: testdata-cli with CI/CD workflow --- .../providers/testdata_provider.py | 336 ++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 src/testdatagen/providers/testdata_provider.py diff --git a/src/testdatagen/providers/testdata_provider.py b/src/testdatagen/providers/testdata_provider.py new file mode 100644 index 0000000..b3f057b --- /dev/null +++ b/src/testdatagen/providers/testdata_provider.py @@ -0,0 +1,336 @@ +"""TestDataProvider - Custom Faker provider for pattern-based and schema-based generation.""" + +import string +from typing import Any, Dict, List + +from faker.providers import BaseProvider + + +class TestDataProvider(BaseProvider): + """Custom Faker provider for JSON Schema-based test data generation.""" + + def json_schema_type( + self, + schema: Dict[str, Any], + faker_instance: Any = None + ) -> Any: + """Generate data based on JSON Schema type definition. + + Args: + schema: JSON Schema definition + faker_instance: Faker instance to use for generation + + Returns: + Generated data matching the schema + """ + if faker_instance is None: + faker_instance = self + + if "anyOf" in schema or "oneOf" in schema: + schemas = schema.get("anyOf", []) or schema.get("oneOf", []) + import random + chosen = random.choice(schemas) + return self.json_schema_type(chosen, faker_instance) + + if "allOf" in schema: + result = {} + for subschema in schema["allOf"]: + subschema_result = self.json_schema_type(subschema, faker_instance) + if isinstance(subschema_result, dict): + result.update(subschema_result) + return result + + json_type = schema.get("type") + + if json_type == "null": + return None + + if json_type == "boolean": + return faker_instance.pybool() + + if json_type == "integer": + minimum = schema.get("minimum") + maximum = schema.get("maximum") + exclusive_minimum = schema.get("exclusiveMinimum") + exclusive_maximum = schema.get("exclusiveMaximum") + + min_val = ( + minimum if minimum is not None + else (exclusive_minimum + 1 if exclusive_minimum is not None else 0) + ) + max_val = ( + maximum if maximum is not None + else (exclusive_maximum - 1 if exclusive_maximum is not None else 10000) + ) + + return faker_instance.random_int(min=min_val, max=max_val) + + if json_type == "number": + return faker_instance.pyfloat( + min_value=schema.get("minimum"), + max_value=schema.get("maximum") + ) + + if json_type == "string": + return self._generate_string(schema, faker_instance) + + if json_type == "array": + return self._generate_array(schema, faker_instance) + + if json_type == "object": + return self._generate_object(schema, faker_instance) + + if "enum" in schema: + import random + return random.choice(schema["enum"]) + + if "const" in schema: + return schema["const"] + + return None + + def _generate_string( + self, + schema: Dict[str, Any], + faker_instance: Any + ) -> str: + """Generate a string based on string-specific schema constraints.""" + format_type = schema.get("format", "") + + if format_type == "email": + return faker_instance.email() + + if format_type == "date-time" or format_type == "date": + return faker_instance.iso8601() + + if format_type == "time": + return faker_instance.time() + + if format_type == "uuid": + return faker_instance.uuid4() + + if format_type == "uri": + return faker_instance.uri() + + if format_type == "hostname": + return faker_instance.hostname() + + if format_type == "ipv4": + return faker_instance.ipv4() + + if format_type == "ipv6": + return faker_instance.ipv6() + + if format_type == "regex": + pattern = schema.get("pattern", ".*") + return self._generate_from_pattern(pattern) + + if format_type == "json": + return faker_instance.json() + + if format_type == "password": + return faker_instance.password() + + if format_type == "firstName": + return faker_instance.first_name() + + if format_type == "lastName": + return faker_instance.last_name() + + if format_type == "fullName": + return faker_instance.name() + + if format_type == "phoneNumber": + return faker_instance.phone_number() + + if format_type == "address": + return faker_instance.address() + + if format_type == "city": + return faker_instance.city() + + if format_type == "country": + return faker_instance.country() + + if format_type == "company": + return faker_instance.company() + + if format_type == "job": + return faker_instance.job() + + if format_type == "url": + return faker_instance.url() + + if format_type == "userName": + return faker_instance.user_name() + + pattern = schema.get("pattern") + if pattern: + return self._generate_from_pattern(pattern) + + min_length = schema.get("minLength", 0) + max_length = schema.get("maxLength", 100) + + if min_length == max_length and min_length > 0: + import random + return ''.join(random.choices(string.ascii_letters, k=min_length)) + + return faker_instance.text(max_nb_chars=max_length) + + def _generate_from_pattern(self, pattern: str) -> str: + """Generate a string that matches the given regex pattern. + + Args: + pattern: Regular expression pattern + + Returns: + String matching the pattern + """ + import random + + result = [] + i = 0 + + while i < len(pattern): + if pattern[i] == '\\' and i + 1 < len(pattern): + char = pattern[i + 1] + if char in 'd': + result.append(str(random.randint(0, 9))) + elif char in 'w': + result.append( + random.choice(string.ascii_letters + string.digits + '_') + ) + elif char in 's': + result.append(' ') + elif char in 'D': + result.append(random.choice(string.ascii_letters)) + elif char in 'W': + result.append(random.choice(string.punctuation + ' ')) + elif char in 'n': + result.append('\n') + elif char in 't': + result.append('\t') + else: + result.append(char) + i += 2 + elif pattern[i] == '[': + end = pattern.find(']', i) + if end != -1: + char_class = pattern[i + 1:end] + result.append(random.choice(char_class)) + i = end + 1 + else: + result.append(pattern[i]) + i += 1 + elif pattern[i] == '*': + if result and isinstance(result[-1], str): + last = result[-1] + if len(last) > 0: + result[-1] = last * random.randint(0, 3) + i += 1 + elif pattern[i] == '+': + if result and isinstance(result[-1], str): + last = result[-1] + if len(last) > 0: + result[-1] = last * random.randint(1, 3) + i += 1 + elif pattern[i] == '?': + if result and random.random() > 0.5: + if isinstance(result[-1], str) and len(result[-1]) > 0: + result[-1] = result[-1][:-1] + i += 1 + elif pattern[i] == '(': + end = pattern.find(')', i) + if end != -1: + group_content = pattern[i + 1:end] + if '|' in group_content: + options = group_content.split('|') + result.append(random.choice(options)) + else: + result.append(self._generate_from_pattern(group_content)) + i = end + 1 + else: + result.append(pattern[i]) + i += 1 + elif pattern[i] == '{': + end = pattern.find('}', i) + if end != -1: + count_str = pattern[i + 1:end] + if ',' in count_str: + min_count, max_count = count_str.split(',') + min_c = int(min_count) if min_count else 0 + max_c = int(max_count) if max_count else min_c + else: + min_c = max_c = int(count_str) + + if result and isinstance(result[-1], str): + result[-1] = result[-1] * random.randint(min_c, max_c) + i = end + 1 + else: + result.append(pattern[i]) + i += 1 + elif pattern[i] == '.': + result.append(random.choice(string.ascii_letters + string.digits)) + i += 1 + elif pattern[i] in string.ascii_letters: + result.append(pattern[i]) + i += 1 + elif pattern[i] in string.digits: + result.append(pattern[i]) + i += 1 + else: + i += 1 + + final_result = ''.join(result) + if len(final_result) > 100: + final_result = final_result[:100] + + return final_result if final_result else ''.join( + random.choices(string.ascii_letters, k=10) + ) + + def _generate_array( + self, + schema: Dict[str, Any], + faker_instance: Any + ) -> List[Any]: + """Generate an array based on array schema definition.""" + import random + + items_schema = schema.get("items", {}) + min_items = schema.get("minItems", 1) + max_items = schema.get("maxItems", 10) + + count = random.randint(min_items, max_items) + + unique_items = schema.get("uniqueItems", False) + results = [] + seen = set() + + for _ in range(count): + item = self.json_schema_type(items_schema, faker_instance) + if unique_items: + item_key = str(item) + attempts = 0 + while item_key in seen and attempts < 100: + item = self.json_schema_type(items_schema, faker_instance) + item_key = str(item) + attempts += 1 + seen.add(item_key) + results.append(item) + + return results + + def _generate_object( + self, + schema: Dict[str, Any], + faker_instance: Any + ) -> Dict[str, Any]: + """Generate an object based on object schema definition.""" + result = {} + properties = schema.get("properties", {}) + + for prop_name, prop_schema in properties.items(): + result[prop_name] = self.json_schema_type(prop_schema, faker_instance) + + return result \ No newline at end of file