From 12a1cb8da13235a41f6497f28b785117f6e43a97 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Sat, 31 Jan 2026 12:25:57 +0000 Subject: [PATCH] Initial upload with CI/CD workflow --- src/type-inferrer.ts | 443 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 443 insertions(+) create mode 100644 src/type-inferrer.ts diff --git a/src/type-inferrer.ts b/src/type-inferrer.ts new file mode 100644 index 0000000..b90eaa4 --- /dev/null +++ b/src/type-inferrer.ts @@ -0,0 +1,443 @@ +import { + TypeNode, + PrimitiveType, + ArrayType, + ObjectType, + UnionType, + Property, + TypeDefinition, + TypeInferenceOptions, + ParseResult, +} from './types.js'; + +export class TypeInferrer { + private visitedObjects: Set; + private literalValues: Map>; + private typeCounts: Map>; + private options: Required; + private counter: number; + + constructor(options: TypeInferenceOptions = {}) { + this.visitedObjects = new Set(); + this.literalValues = new Map(); + this.typeCounts = new Map(); + this.counter = 0; + this.options = { + rootName: options.rootName || 'Root', + detectUnions: options.detectUnions ?? true, + literalThreshold: options.literalThreshold || 3, + }; + } + + infer(data: unknown): ParseResult { + this.visitedObjects.clear(); + this.literalValues.clear(); + this.typeCounts.clear(); + this.counter = 0; + + const type = this.inferType(data, this.options.rootName); + const types: TypeDefinition[] = []; + const warnings: string[] = []; + + if (type.kind === 'object' && type.properties.length > 0) { + type.name = this.options.rootName; + types.push({ name: type.name, type }); + this.collectNestedTypes(type, types); + } else if (type.kind === 'array') { + const arrayType = this.generateArrayTypeName(); + types.push({ name: arrayType, type }); + } else { + types.push({ name: this.options.rootName, type }); + } + + return { types, warnings }; + } + + inferFromMultiple(samples: unknown[]): ParseResult { + if (samples.length === 0) { + return { types: [], warnings: ['No samples provided'] }; + } + + if (samples.length === 1) { + return this.infer(samples[0]); + } + + this.visitedObjects.clear(); + this.literalValues.clear(); + this.typeCounts.clear(); + this.counter = 0; + + const fieldTypes = new Map>(); + const fieldOptional = new Map(); + const fieldLiteralValues = new Map>(); + + for (const sample of samples) { + if (sample === null || typeof sample !== 'object') { + continue; + } + this.collectFieldTypes(sample as Record, fieldTypes, fieldOptional, fieldLiteralValues); + } + + const properties: Property[] = []; + const warnings: string[] = []; + + for (const [fieldName, types] of fieldTypes) { + const isOptional = fieldOptional.get(fieldName) ?? false; + const literalValues = fieldLiteralValues.get(fieldName); + + const mergedType = this.mergeTypes(Array.from(types), fieldName, literalValues); + properties.push({ name: fieldName, type: mergedType, optional: isOptional }); + } + + properties.sort((a, b) => a.name.localeCompare(b.name)); + + const rootType: ObjectType = { + kind: 'object', + properties, + name: this.options.rootName, + }; + + const types: TypeDefinition[] = [{ name: this.options.rootName, type: rootType }]; + this.collectNestedTypes(rootType, types); + + return { types, warnings }; + } + + private collectFieldTypes( + obj: Record, + fieldTypes: Map>, + fieldOptional: Map, + fieldLiteralValues: Map> + ): void { + for (const [key, value] of Object.entries(obj)) { + if (!fieldTypes.has(key)) { + fieldTypes.set(key, new Set()); + fieldLiteralValues.set(key, new Set()); + } + + if (value === undefined) { + fieldOptional.set(key, true); + continue; + } + + fieldOptional.set(key, fieldOptional.get(key) ?? false); + + const inferredType = this.inferType(value, key); + fieldTypes.get(key)!.add(inferredType); + + if (inferredType.kind === 'literal') { + fieldLiteralValues.get(key)!.add(inferredType.value); + } + } + } + + private inferType(data: unknown, _name?: string): TypeNode { + if (data === null) { + return { kind: 'primitive', type: 'null' }; + } + + if (data === undefined) { + return { kind: 'primitive', type: 'unknown' }; + } + + switch (typeof data) { + case 'string': { + if (data.length === 0) { + return { kind: 'primitive', type: 'string' }; + } + const num = Number(data); + if (!isNaN(num) && data.trim() !== '') { + return { kind: 'union', types: [{ kind: 'primitive', type: 'string' }, { kind: 'primitive', type: 'number' }] }; + } + return { kind: 'primitive', type: 'string' }; + } + case 'number': { + return { kind: 'primitive', type: 'number' }; + } + case 'boolean': { + return { kind: 'primitive', type: 'boolean' }; + } + case 'object': { + if (this.visitedObjects.has(data)) { + return { kind: 'primitive', type: 'unknown' }; + } + this.visitedObjects.add(data); + + if (Array.isArray(data)) { + return this.inferArrayType(data); + } + + return this.inferObjectType(data as Record); + } + default: + return { kind: 'primitive', type: 'unknown' }; + } + } + + private inferArrayType(arr: unknown[]): ArrayType { + if (arr.length === 0) { + return { + kind: 'array', + elementType: { kind: 'primitive', type: 'unknown' }, + }; + } + + const elementTypes = new Set(); + for (const item of arr) { + elementTypes.add(this.inferType(item)); + } + + const mergedElementType = this.mergeTypes( + Array.from(elementTypes), + 'Element', + undefined + ); + + return { + kind: 'array', + elementType: mergedElementType, + }; + } + + private inferObjectType(obj: Record): ObjectType { + const properties: Property[] = []; + + for (const [key, value] of Object.entries(obj)) { + if (value === undefined) { + properties.push({ name: key, type: { kind: 'primitive', type: 'unknown' }, optional: true }); + continue; + } + + const inferredType = this.inferType(value, key); + properties.push({ name: key, type: inferredType, optional: false }); + } + + properties.sort((a, b) => a.name.localeCompare(b.name)); + + return { + kind: 'object', + properties, + }; + } + + private mergeTypes(types: TypeNode[], _name?: string, literalValues?: Set): TypeNode { + if (types.length === 0) { + return { kind: 'primitive', type: 'unknown' }; + } + + if (types.length === 1) { + return types[0]; + } + + const primitiveTypes = new Set(); + const objectTypes: ObjectType[] = []; + const arrayTypes: ArrayType[] = []; + const unionTypes: UnionType[] = []; + const literalTypeValues: Set = new Set(); + + for (const type of types) { + if (type.kind === 'primitive') { + primitiveTypes.add(type.type); + } else if (type.kind === 'object') { + objectTypes.push(type); + } else if (type.kind === 'array') { + arrayTypes.push(type); + } else if (type.kind === 'union') { + unionTypes.push(type); + } else if (type.kind === 'literal') { + literalTypeValues.add(type.value); + } + } + + if (literalValues && literalTypeValues.size > 0) { + for (const val of literalTypeValues) { + literalValues.add(val); + } + } + + if (primitiveTypes.size > 0) { + const primitives = Array.from(primitiveTypes); + if (primitives.length === 1) { + return { kind: 'primitive', type: primitives[0] }; + } + } + + if (objectTypes.length > 0) { + const mergedObject = this.mergeObjectTypes(objectTypes); + if (mergedObject) { + const resultTypes: TypeNode[] = [mergedObject]; + for (const p of primitiveTypes) { + resultTypes.push({ kind: 'primitive', type: p }); + } + if (resultTypes.length === 1) { + return resultTypes[0]; + } + return { kind: 'union', types: resultTypes }; + } + } + + if (arrayTypes.length > 0) { + const mergedArray = this.mergeArrayTypes(arrayTypes); + const resultTypes: TypeNode[] = [mergedArray]; + for (const p of primitiveTypes) { + resultTypes.push({ kind: 'primitive', type: p }); + } + if (resultTypes.length === 1) { + return resultTypes[0]; + } + return { kind: 'union', types: resultTypes }; + } + + if (unionTypes.length > 0) { + const allUnionTypes: TypeNode[] = []; + for (const ut of unionTypes) { + allUnionTypes.push(...ut.types); + } + for (const p of primitiveTypes) { + allUnionTypes.push({ kind: 'primitive', type: p }); + } + return { kind: 'union', types: this.deduplicateTypes(allUnionTypes) }; + } + + if (primitiveTypes.size > 0) { + return { + kind: 'union', + types: Array.from(primitiveTypes).map((t) => ({ kind: 'primitive', type: t as PrimitiveType['type'] })), + }; + } + + return { kind: 'primitive', type: 'unknown' }; + } + + private mergeObjectTypes(objects: ObjectType[]): ObjectType | null { + if (objects.length === 0) return null; + if (objects.length === 1) return objects[0]; + + const allFields = new Map; optional: boolean }>(); + + for (const obj of objects) { + for (const prop of obj.properties) { + if (!allFields.has(prop.name)) { + allFields.set(prop.name, { types: new Set(), optional: prop.optional }); + } + const existing = allFields.get(prop.name)!; + existing.types.add(prop.type); + existing.optional = existing.optional && prop.optional; + } + } + + const mergedProperties: Property[] = []; + + for (const [fieldName, { types, optional }] of allFields) { + const mergedType = this.mergeTypes(Array.from(types), fieldName, undefined); + mergedProperties.push({ name: fieldName, type: mergedType, optional }); + } + + mergedProperties.sort((a, b) => a.name.localeCompare(b.name)); + + return { + kind: 'object', + properties: mergedProperties, + }; + } + + private mergeArrayTypes(arrays: ArrayType[]): ArrayType { + if (arrays.length === 0) { + return { kind: 'array', elementType: { kind: 'primitive', type: 'unknown' } }; + } + if (arrays.length === 1) return arrays[0]; + + const elementTypes = new Set(); + for (const arr of arrays) { + elementTypes.add(arr.elementType); + } + + const mergedElementType = this.mergeTypes(Array.from(elementTypes), 'Element', undefined); + + return { + kind: 'array', + elementType: mergedElementType, + }; + } + + private deduplicateTypes(types: TypeNode[]): TypeNode[] { + const seen = new Map(); + + for (const type of types) { + const key = this.typeToString(type); + if (!seen.has(key)) { + seen.set(key, type); + } + } + + return Array.from(seen.values()); + } + + private typeToString(type: TypeNode): string { + switch (type.kind) { + case 'primitive': + return `prim:${type.type}`; + case 'literal': + return `lit:${String(type.value)}`; + case 'array': + return `arr:${this.typeToString(type.elementType)}`; + case 'object': + return `obj:${type.properties.map((p) => `${p.name}:${p.optional ? '?' : ''}${this.typeToString(p.type)}`).join(',')}`; + case 'union': + return `uni:${type.types.map((t) => this.typeToString(t)).sort().join('|')}`; + case 'optional': + return `opt:${this.typeToString(type.type)}`; + default: + return 'unknown'; + } + } + + private collectNestedTypes(objType: ObjectType, types: TypeDefinition[]): void { + const usedNames = new Set(types.map((t) => t.name)); + + for (const prop of objType.properties) { + this.extractNestedType(prop.type, types, usedNames); + } + } + + private extractNestedType(type: TypeNode, types: TypeDefinition[], usedNames: Set): void { + switch (type.kind) { + case 'object': + if (type.properties.length > 0 && !type.name) { + type.name = this.generateTypeName(usedNames); + } + if (type.name && !usedNames.has(type.name)) { + usedNames.add(type.name); + types.push({ name: type.name, type }); + } + for (const prop of type.properties) { + this.extractNestedType(prop.type, types, usedNames); + } + break; + case 'array': + this.extractNestedType(type.elementType, types, usedNames); + break; + case 'union': + for (const t of type.types) { + this.extractNestedType(t, types, usedNames); + } + break; + case 'optional': + this.extractNestedType(type.type, types, usedNames); + break; + } + } + + private generateTypeName(usedNames: Set): string { + let name: string; + let i = 0; + do { + name = `GeneratedType${++i}`; + } while (usedNames.has(name)); + return name; + } + + private generateArrayTypeName(): string { + return `GeneratedArrayType${++this.counter}`; + } +}