Initial upload: testdata-cli with CI/CD workflow
This commit is contained in:
336
src/testdatagen/providers/testdata_provider.py
Normal file
336
src/testdatagen/providers/testdata_provider.py
Normal file
@@ -0,0 +1,336 @@
|
||||
"""TestDataProvider - Custom Faker provider for pattern-based and schema-based generation."""
|
||||
|
||||
import string
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from faker.providers import BaseProvider
|
||||
|
||||
|
||||
class TestDataProvider(BaseProvider):
|
||||
"""Custom Faker provider for JSON Schema-based test data generation."""
|
||||
|
||||
def json_schema_type(
|
||||
self,
|
||||
schema: Dict[str, Any],
|
||||
faker_instance: Any = None
|
||||
) -> Any:
|
||||
"""Generate data based on JSON Schema type definition.
|
||||
|
||||
Args:
|
||||
schema: JSON Schema definition
|
||||
faker_instance: Faker instance to use for generation
|
||||
|
||||
Returns:
|
||||
Generated data matching the schema
|
||||
"""
|
||||
if faker_instance is None:
|
||||
faker_instance = self
|
||||
|
||||
if "anyOf" in schema or "oneOf" in schema:
|
||||
schemas = schema.get("anyOf", []) or schema.get("oneOf", [])
|
||||
import random
|
||||
chosen = random.choice(schemas)
|
||||
return self.json_schema_type(chosen, faker_instance)
|
||||
|
||||
if "allOf" in schema:
|
||||
result = {}
|
||||
for subschema in schema["allOf"]:
|
||||
subschema_result = self.json_schema_type(subschema, faker_instance)
|
||||
if isinstance(subschema_result, dict):
|
||||
result.update(subschema_result)
|
||||
return result
|
||||
|
||||
json_type = schema.get("type")
|
||||
|
||||
if json_type == "null":
|
||||
return None
|
||||
|
||||
if json_type == "boolean":
|
||||
return faker_instance.pybool()
|
||||
|
||||
if json_type == "integer":
|
||||
minimum = schema.get("minimum")
|
||||
maximum = schema.get("maximum")
|
||||
exclusive_minimum = schema.get("exclusiveMinimum")
|
||||
exclusive_maximum = schema.get("exclusiveMaximum")
|
||||
|
||||
min_val = (
|
||||
minimum if minimum is not None
|
||||
else (exclusive_minimum + 1 if exclusive_minimum is not None else 0)
|
||||
)
|
||||
max_val = (
|
||||
maximum if maximum is not None
|
||||
else (exclusive_maximum - 1 if exclusive_maximum is not None else 10000)
|
||||
)
|
||||
|
||||
return faker_instance.random_int(min=min_val, max=max_val)
|
||||
|
||||
if json_type == "number":
|
||||
return faker_instance.pyfloat(
|
||||
min_value=schema.get("minimum"),
|
||||
max_value=schema.get("maximum")
|
||||
)
|
||||
|
||||
if json_type == "string":
|
||||
return self._generate_string(schema, faker_instance)
|
||||
|
||||
if json_type == "array":
|
||||
return self._generate_array(schema, faker_instance)
|
||||
|
||||
if json_type == "object":
|
||||
return self._generate_object(schema, faker_instance)
|
||||
|
||||
if "enum" in schema:
|
||||
import random
|
||||
return random.choice(schema["enum"])
|
||||
|
||||
if "const" in schema:
|
||||
return schema["const"]
|
||||
|
||||
return None
|
||||
|
||||
def _generate_string(
|
||||
self,
|
||||
schema: Dict[str, Any],
|
||||
faker_instance: Any
|
||||
) -> str:
|
||||
"""Generate a string based on string-specific schema constraints."""
|
||||
format_type = schema.get("format", "")
|
||||
|
||||
if format_type == "email":
|
||||
return faker_instance.email()
|
||||
|
||||
if format_type == "date-time" or format_type == "date":
|
||||
return faker_instance.iso8601()
|
||||
|
||||
if format_type == "time":
|
||||
return faker_instance.time()
|
||||
|
||||
if format_type == "uuid":
|
||||
return faker_instance.uuid4()
|
||||
|
||||
if format_type == "uri":
|
||||
return faker_instance.uri()
|
||||
|
||||
if format_type == "hostname":
|
||||
return faker_instance.hostname()
|
||||
|
||||
if format_type == "ipv4":
|
||||
return faker_instance.ipv4()
|
||||
|
||||
if format_type == "ipv6":
|
||||
return faker_instance.ipv6()
|
||||
|
||||
if format_type == "regex":
|
||||
pattern = schema.get("pattern", ".*")
|
||||
return self._generate_from_pattern(pattern)
|
||||
|
||||
if format_type == "json":
|
||||
return faker_instance.json()
|
||||
|
||||
if format_type == "password":
|
||||
return faker_instance.password()
|
||||
|
||||
if format_type == "firstName":
|
||||
return faker_instance.first_name()
|
||||
|
||||
if format_type == "lastName":
|
||||
return faker_instance.last_name()
|
||||
|
||||
if format_type == "fullName":
|
||||
return faker_instance.name()
|
||||
|
||||
if format_type == "phoneNumber":
|
||||
return faker_instance.phone_number()
|
||||
|
||||
if format_type == "address":
|
||||
return faker_instance.address()
|
||||
|
||||
if format_type == "city":
|
||||
return faker_instance.city()
|
||||
|
||||
if format_type == "country":
|
||||
return faker_instance.country()
|
||||
|
||||
if format_type == "company":
|
||||
return faker_instance.company()
|
||||
|
||||
if format_type == "job":
|
||||
return faker_instance.job()
|
||||
|
||||
if format_type == "url":
|
||||
return faker_instance.url()
|
||||
|
||||
if format_type == "userName":
|
||||
return faker_instance.user_name()
|
||||
|
||||
pattern = schema.get("pattern")
|
||||
if pattern:
|
||||
return self._generate_from_pattern(pattern)
|
||||
|
||||
min_length = schema.get("minLength", 0)
|
||||
max_length = schema.get("maxLength", 100)
|
||||
|
||||
if min_length == max_length and min_length > 0:
|
||||
import random
|
||||
return ''.join(random.choices(string.ascii_letters, k=min_length))
|
||||
|
||||
return faker_instance.text(max_nb_chars=max_length)
|
||||
|
||||
def _generate_from_pattern(self, pattern: str) -> str:
|
||||
"""Generate a string that matches the given regex pattern.
|
||||
|
||||
Args:
|
||||
pattern: Regular expression pattern
|
||||
|
||||
Returns:
|
||||
String matching the pattern
|
||||
"""
|
||||
import random
|
||||
|
||||
result = []
|
||||
i = 0
|
||||
|
||||
while i < len(pattern):
|
||||
if pattern[i] == '\\' and i + 1 < len(pattern):
|
||||
char = pattern[i + 1]
|
||||
if char in 'd':
|
||||
result.append(str(random.randint(0, 9)))
|
||||
elif char in 'w':
|
||||
result.append(
|
||||
random.choice(string.ascii_letters + string.digits + '_')
|
||||
)
|
||||
elif char in 's':
|
||||
result.append(' ')
|
||||
elif char in 'D':
|
||||
result.append(random.choice(string.ascii_letters))
|
||||
elif char in 'W':
|
||||
result.append(random.choice(string.punctuation + ' '))
|
||||
elif char in 'n':
|
||||
result.append('\n')
|
||||
elif char in 't':
|
||||
result.append('\t')
|
||||
else:
|
||||
result.append(char)
|
||||
i += 2
|
||||
elif pattern[i] == '[':
|
||||
end = pattern.find(']', i)
|
||||
if end != -1:
|
||||
char_class = pattern[i + 1:end]
|
||||
result.append(random.choice(char_class))
|
||||
i = end + 1
|
||||
else:
|
||||
result.append(pattern[i])
|
||||
i += 1
|
||||
elif pattern[i] == '*':
|
||||
if result and isinstance(result[-1], str):
|
||||
last = result[-1]
|
||||
if len(last) > 0:
|
||||
result[-1] = last * random.randint(0, 3)
|
||||
i += 1
|
||||
elif pattern[i] == '+':
|
||||
if result and isinstance(result[-1], str):
|
||||
last = result[-1]
|
||||
if len(last) > 0:
|
||||
result[-1] = last * random.randint(1, 3)
|
||||
i += 1
|
||||
elif pattern[i] == '?':
|
||||
if result and random.random() > 0.5:
|
||||
if isinstance(result[-1], str) and len(result[-1]) > 0:
|
||||
result[-1] = result[-1][:-1]
|
||||
i += 1
|
||||
elif pattern[i] == '(':
|
||||
end = pattern.find(')', i)
|
||||
if end != -1:
|
||||
group_content = pattern[i + 1:end]
|
||||
if '|' in group_content:
|
||||
options = group_content.split('|')
|
||||
result.append(random.choice(options))
|
||||
else:
|
||||
result.append(self._generate_from_pattern(group_content))
|
||||
i = end + 1
|
||||
else:
|
||||
result.append(pattern[i])
|
||||
i += 1
|
||||
elif pattern[i] == '{':
|
||||
end = pattern.find('}', i)
|
||||
if end != -1:
|
||||
count_str = pattern[i + 1:end]
|
||||
if ',' in count_str:
|
||||
min_count, max_count = count_str.split(',')
|
||||
min_c = int(min_count) if min_count else 0
|
||||
max_c = int(max_count) if max_count else min_c
|
||||
else:
|
||||
min_c = max_c = int(count_str)
|
||||
|
||||
if result and isinstance(result[-1], str):
|
||||
result[-1] = result[-1] * random.randint(min_c, max_c)
|
||||
i = end + 1
|
||||
else:
|
||||
result.append(pattern[i])
|
||||
i += 1
|
||||
elif pattern[i] == '.':
|
||||
result.append(random.choice(string.ascii_letters + string.digits))
|
||||
i += 1
|
||||
elif pattern[i] in string.ascii_letters:
|
||||
result.append(pattern[i])
|
||||
i += 1
|
||||
elif pattern[i] in string.digits:
|
||||
result.append(pattern[i])
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
final_result = ''.join(result)
|
||||
if len(final_result) > 100:
|
||||
final_result = final_result[:100]
|
||||
|
||||
return final_result if final_result else ''.join(
|
||||
random.choices(string.ascii_letters, k=10)
|
||||
)
|
||||
|
||||
def _generate_array(
|
||||
self,
|
||||
schema: Dict[str, Any],
|
||||
faker_instance: Any
|
||||
) -> List[Any]:
|
||||
"""Generate an array based on array schema definition."""
|
||||
import random
|
||||
|
||||
items_schema = schema.get("items", {})
|
||||
min_items = schema.get("minItems", 1)
|
||||
max_items = schema.get("maxItems", 10)
|
||||
|
||||
count = random.randint(min_items, max_items)
|
||||
|
||||
unique_items = schema.get("uniqueItems", False)
|
||||
results = []
|
||||
seen = set()
|
||||
|
||||
for _ in range(count):
|
||||
item = self.json_schema_type(items_schema, faker_instance)
|
||||
if unique_items:
|
||||
item_key = str(item)
|
||||
attempts = 0
|
||||
while item_key in seen and attempts < 100:
|
||||
item = self.json_schema_type(items_schema, faker_instance)
|
||||
item_key = str(item)
|
||||
attempts += 1
|
||||
seen.add(item_key)
|
||||
results.append(item)
|
||||
|
||||
return results
|
||||
|
||||
def _generate_object(
|
||||
self,
|
||||
schema: Dict[str, Any],
|
||||
faker_instance: Any
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an object based on object schema definition."""
|
||||
result = {}
|
||||
properties = schema.get("properties", {})
|
||||
|
||||
for prop_name, prop_schema in properties.items():
|
||||
result[prop_name] = self.json_schema_type(prop_schema, faker_instance)
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user