diff --git a/app/src/analyzers/conventionExtractor.ts b/app/src/analyzers/conventionExtractor.ts new file mode 100644 index 0000000..6c4a6e0 --- /dev/null +++ b/app/src/analyzers/conventionExtractor.ts @@ -0,0 +1,251 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { CodingConventions, FileNamingConvention } from '../types'; +import { readFileContent, globFiles } from '../utils/fileUtils'; + +const TEST_FRAMEWORKS: Record = { + jest: 'jest', + 'jest-environment-jsdom': 'jest', + vitest: 'vitest', + mocha: 'mocha', + chai: 'mocha', + pytest: 'pytest', + 'pytest-cov': 'pytest', + unittest: 'unittest', + nose: 'nose', + 'testing-library/jest-dom': 'jest', + 'testing-library/react': 'jest', + '@testing-library/jest-dom': 'jest', + '@testing-library/react': 'jest' +}; + +export async function extractConventions( + directory: string, + projectType: string +): Promise { + const files = await globFiles( + ['**/*.{ts,js,tsx,jsx,py,go,rs,java}'], + directory, + ['node_modules/**', 'dist/**', 'build/**', '.git/**'] + ); + + const fileNamingConventions = detectFileNamingConvention(files); + const importStyle = detectImportStyle(files); + const testingFramework = detectTestingFramework(directory, files); + + return { + fileNamingConvention: fileNamingConventions, + importStyle, + testingFramework: testingFramework?.framework, + testingStyle: testingFramework?.style || null, + componentStyle: detectComponentStyle(files, projectType), + modulePattern: detectModulePattern(files, projectType) + }; +} + +function detectFileNamingConvention(files: string[]): FileNamingConvention { + const sampleSize = Math.min(files.length, 50); + const samples = files.slice(0, sampleSize); + + const scores: Record = { + camelCase: 0, + snake_case: 0, + PascalCase: 0, + 'kebab-case': 0 + }; + + for (const file of samples) { + const filename = path.basename(file, path.extname(file)); + + if (/^[a-z]+([A-Z][a-z0-9]*)*$/.test(filename)) { + scores.camelCase++; + } else if (/^[a-z]+(_[a-z0-9]+)*$/.test(filename)) { + scores.snake_case++; + } else if (/^[A-Z][a-zA-Z0-9]*$/.test(filename)) { + scores.PascalCase++; + } else if (/^[a-z]+(-[a-z0-9]+)*$/.test(filename)) { + scores['kebab-case']++; + } + } + + const maxScore = Math.max(...Object.values(scores)); + + if (maxScore === 0) { + return 'unknown'; + } + + const bestMatch = Object.entries(scores).find(([_, score]) => score === maxScore); + return (bestMatch?.[0] as FileNamingConvention) || 'unknown'; +} + +function detectImportStyle(files: string[]): CodingConventions['importStyle'] { + const sampleSize = Math.min(files.length, 30); + const samples = files.slice(0, sampleSize); + + let es6Imports = 0; + let commonjsImports = 0; + let relativeImports = 0; + let pythonImports = 0; + let goImports = 0; + + for (const file of samples) { + const content = readFileContent(file); + if (!content) continue; + + const ext = path.extname(file); + + if (['.ts', '.js', '.tsx', '.jsx'].includes(ext)) { + if (/\bimport\s+.*\s+from\s+['"]/.test(content)) { + es6Imports++; + } + if (/\brequire\s*\(\s*['"]/.test(content)) { + commonjsImports++; + } + if (/from\s+['"]\.?[\/'"].test(content)) { + relativeImports++; + } + } else if (ext === '.py') { + if (/\bimport\s+\w+/.test(content) || /\bfrom\s+\w+/.test(content)) { + pythonImports++; + } + } else if (ext === '.go') { + if (/\bimport\s*\(/m.test(content) || /\bimport\s+["']/.test(content)) { + goImports++; + } + } + } + + if (pythonImports > 0) { + return 'python'; + } else if (goImports > 0) { + return 'go'; + } else if (es6Imports > commonjsImports && es6Imports > relativeImports) { + return 'es6'; + } else if (commonjsImports > relativeImports) { + return 'commonjs'; + } else if (relativeImports > es6Imports + commonjsImports) { + return 'relative'; + } else if (es6Imports > 0 || commonjsImports > 0) { + return 'mixed'; + } + + return 'es6'; +} + +function detectTestingFramework( + directory: string, + files: string[] +): { framework: string; style: CodingConventions['testingStyle'] } | null { + const packageJsonPath = path.join(directory, 'package.json'); + + const packageJson = fs.existsSync(packageJsonPath) + ? JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8')) + : null; + + const allDeps = { + ...(packageJson?.dependencies || {}), + ...(packageJson?.devDependencies || {}) + }; + + for (const [pkg, framework] of Object.entries(TEST_FRAMEWORKS)) { + if (allDeps[pkg]) { + const styleMap: Record = { + jest: 'jest', + vitest: 'vitest', + mocha: 'mocha', + pytest: 'pytest', + unittest: 'unittest', + testing: 'testing' + }; + return { framework: pkg, style: styleMap[framework] || null }; + } + } + + for (const file of files) { + const basename = path.basename(file); + + if (basename.includes('.test.') || basename.includes('.spec.')) { + return { framework: 'test runner', style: 'jest' }; + } + + if (basename.startsWith('test_') || basename.endsWith('_test.py')) { + return { framework: 'pytest', style: 'pytest' }; + } + } + + return null; +} + +function detectComponentStyle(files: string[], _projectType: string): CodingConventions['componentStyle'] { + const tsxFiles = files.filter(f => f.endsWith('.tsx')); + + if (tsxFiles.length === 0) { + return null; + } + + const sampleSize = Math.min(tsxFiles.length, 20); + const samples = tsxFiles.slice(0, sampleSize); + + let functional = 0; + let classBased = 0; + let hooks = 0; + + for (const file of samples) { + const content = readFileContent(file); + if (!content) continue; + + if (/\bfunction\s+\w+\s*\(/.test(content)) { + functional++; + } + if (/\bclass\s+\w+\s+extends\s+(React\.)?Component/.test(content)) { + classBased++; + } + if (/\buse\w+\s*\(/.test(content)) { + hooks++; + } + } + + if (hooks > functional + classBased) { + return 'hooks'; + } else if (functional > classBased) { + return 'functional'; + } else if (classBased > 0) { + return 'class'; + } + + return 'functional'; +} + +function detectModulePattern(files: string[], _projectType: string): CodingConventions['modulePattern'] { + const sampleSize = Math.min(files.length, 30); + const samples = files.slice(0, sampleSize); + + let defaultExports = 0; + let namedExports = 0; + let wildcardImports = 0; + + for (const file of samples) { + const content = readFileContent(file); + if (!content) continue; + + if (/export\s+default/.test(content)) { + defaultExports++; + } + if (/export\s+{/.test(content)) { + namedExports++; + } + if (/import\s+\*\s+as/.test(content)) { + wildcardImports++; + } + } + + if (defaultExports > namedExports && defaultExports > wildcardImports) { + return 'default'; + } else if (namedExports > defaultExports && namedExports > wildcardImports) { + return 'named'; + } else if (wildcardImports > defaultExports + namedExports) { + return 'wildcard'; + } + + return 'default'; +}