Initial upload: testdata-cli with CI/CD workflow
Some checks failed
CI / test (push) Failing after 12s
CI / build (push) Has been skipped

This commit is contained in:
2026-03-22 19:45:34 +00:00
parent 02c8de2055
commit dc534716af

View File

@@ -0,0 +1,336 @@
"""TestDataProvider - Custom Faker provider for pattern-based and schema-based generation."""
import string
from typing import Any, Dict, List
from faker.providers import BaseProvider
class TestDataProvider(BaseProvider):
"""Custom Faker provider for JSON Schema-based test data generation."""
def json_schema_type(
self,
schema: Dict[str, Any],
faker_instance: Any = None
) -> Any:
"""Generate data based on JSON Schema type definition.
Args:
schema: JSON Schema definition
faker_instance: Faker instance to use for generation
Returns:
Generated data matching the schema
"""
if faker_instance is None:
faker_instance = self
if "anyOf" in schema or "oneOf" in schema:
schemas = schema.get("anyOf", []) or schema.get("oneOf", [])
import random
chosen = random.choice(schemas)
return self.json_schema_type(chosen, faker_instance)
if "allOf" in schema:
result = {}
for subschema in schema["allOf"]:
subschema_result = self.json_schema_type(subschema, faker_instance)
if isinstance(subschema_result, dict):
result.update(subschema_result)
return result
json_type = schema.get("type")
if json_type == "null":
return None
if json_type == "boolean":
return faker_instance.pybool()
if json_type == "integer":
minimum = schema.get("minimum")
maximum = schema.get("maximum")
exclusive_minimum = schema.get("exclusiveMinimum")
exclusive_maximum = schema.get("exclusiveMaximum")
min_val = (
minimum if minimum is not None
else (exclusive_minimum + 1 if exclusive_minimum is not None else 0)
)
max_val = (
maximum if maximum is not None
else (exclusive_maximum - 1 if exclusive_maximum is not None else 10000)
)
return faker_instance.random_int(min=min_val, max=max_val)
if json_type == "number":
return faker_instance.pyfloat(
min_value=schema.get("minimum"),
max_value=schema.get("maximum")
)
if json_type == "string":
return self._generate_string(schema, faker_instance)
if json_type == "array":
return self._generate_array(schema, faker_instance)
if json_type == "object":
return self._generate_object(schema, faker_instance)
if "enum" in schema:
import random
return random.choice(schema["enum"])
if "const" in schema:
return schema["const"]
return None
def _generate_string(
self,
schema: Dict[str, Any],
faker_instance: Any
) -> str:
"""Generate a string based on string-specific schema constraints."""
format_type = schema.get("format", "")
if format_type == "email":
return faker_instance.email()
if format_type == "date-time" or format_type == "date":
return faker_instance.iso8601()
if format_type == "time":
return faker_instance.time()
if format_type == "uuid":
return faker_instance.uuid4()
if format_type == "uri":
return faker_instance.uri()
if format_type == "hostname":
return faker_instance.hostname()
if format_type == "ipv4":
return faker_instance.ipv4()
if format_type == "ipv6":
return faker_instance.ipv6()
if format_type == "regex":
pattern = schema.get("pattern", ".*")
return self._generate_from_pattern(pattern)
if format_type == "json":
return faker_instance.json()
if format_type == "password":
return faker_instance.password()
if format_type == "firstName":
return faker_instance.first_name()
if format_type == "lastName":
return faker_instance.last_name()
if format_type == "fullName":
return faker_instance.name()
if format_type == "phoneNumber":
return faker_instance.phone_number()
if format_type == "address":
return faker_instance.address()
if format_type == "city":
return faker_instance.city()
if format_type == "country":
return faker_instance.country()
if format_type == "company":
return faker_instance.company()
if format_type == "job":
return faker_instance.job()
if format_type == "url":
return faker_instance.url()
if format_type == "userName":
return faker_instance.user_name()
pattern = schema.get("pattern")
if pattern:
return self._generate_from_pattern(pattern)
min_length = schema.get("minLength", 0)
max_length = schema.get("maxLength", 100)
if min_length == max_length and min_length > 0:
import random
return ''.join(random.choices(string.ascii_letters, k=min_length))
return faker_instance.text(max_nb_chars=max_length)
def _generate_from_pattern(self, pattern: str) -> str:
"""Generate a string that matches the given regex pattern.
Args:
pattern: Regular expression pattern
Returns:
String matching the pattern
"""
import random
result = []
i = 0
while i < len(pattern):
if pattern[i] == '\\' and i + 1 < len(pattern):
char = pattern[i + 1]
if char in 'd':
result.append(str(random.randint(0, 9)))
elif char in 'w':
result.append(
random.choice(string.ascii_letters + string.digits + '_')
)
elif char in 's':
result.append(' ')
elif char in 'D':
result.append(random.choice(string.ascii_letters))
elif char in 'W':
result.append(random.choice(string.punctuation + ' '))
elif char in 'n':
result.append('\n')
elif char in 't':
result.append('\t')
else:
result.append(char)
i += 2
elif pattern[i] == '[':
end = pattern.find(']', i)
if end != -1:
char_class = pattern[i + 1:end]
result.append(random.choice(char_class))
i = end + 1
else:
result.append(pattern[i])
i += 1
elif pattern[i] == '*':
if result and isinstance(result[-1], str):
last = result[-1]
if len(last) > 0:
result[-1] = last * random.randint(0, 3)
i += 1
elif pattern[i] == '+':
if result and isinstance(result[-1], str):
last = result[-1]
if len(last) > 0:
result[-1] = last * random.randint(1, 3)
i += 1
elif pattern[i] == '?':
if result and random.random() > 0.5:
if isinstance(result[-1], str) and len(result[-1]) > 0:
result[-1] = result[-1][:-1]
i += 1
elif pattern[i] == '(':
end = pattern.find(')', i)
if end != -1:
group_content = pattern[i + 1:end]
if '|' in group_content:
options = group_content.split('|')
result.append(random.choice(options))
else:
result.append(self._generate_from_pattern(group_content))
i = end + 1
else:
result.append(pattern[i])
i += 1
elif pattern[i] == '{':
end = pattern.find('}', i)
if end != -1:
count_str = pattern[i + 1:end]
if ',' in count_str:
min_count, max_count = count_str.split(',')
min_c = int(min_count) if min_count else 0
max_c = int(max_count) if max_count else min_c
else:
min_c = max_c = int(count_str)
if result and isinstance(result[-1], str):
result[-1] = result[-1] * random.randint(min_c, max_c)
i = end + 1
else:
result.append(pattern[i])
i += 1
elif pattern[i] == '.':
result.append(random.choice(string.ascii_letters + string.digits))
i += 1
elif pattern[i] in string.ascii_letters:
result.append(pattern[i])
i += 1
elif pattern[i] in string.digits:
result.append(pattern[i])
i += 1
else:
i += 1
final_result = ''.join(result)
if len(final_result) > 100:
final_result = final_result[:100]
return final_result if final_result else ''.join(
random.choices(string.ascii_letters, k=10)
)
def _generate_array(
self,
schema: Dict[str, Any],
faker_instance: Any
) -> List[Any]:
"""Generate an array based on array schema definition."""
import random
items_schema = schema.get("items", {})
min_items = schema.get("minItems", 1)
max_items = schema.get("maxItems", 10)
count = random.randint(min_items, max_items)
unique_items = schema.get("uniqueItems", False)
results = []
seen = set()
for _ in range(count):
item = self.json_schema_type(items_schema, faker_instance)
if unique_items:
item_key = str(item)
attempts = 0
while item_key in seen and attempts < 100:
item = self.json_schema_type(items_schema, faker_instance)
item_key = str(item)
attempts += 1
seen.add(item_key)
results.append(item)
return results
def _generate_object(
self,
schema: Dict[str, Any],
faker_instance: Any
) -> Dict[str, Any]:
"""Generate an object based on object schema definition."""
result = {}
properties = schema.get("properties", {})
for prop_name, prop_schema in properties.items():
result[prop_name] = self.json_schema_type(prop_schema, faker_instance)
return result