This commit is contained in:
314
src/analyzers/conventionExtractor.ts
Normal file
314
src/analyzers/conventionExtractor.ts
Normal file
@@ -0,0 +1,314 @@
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { ConventionInfo, NamingConvention, ImportStyle, CodeStyle } from '../types';
|
||||
|
||||
interface NamingPattern {
|
||||
regex: RegExp;
|
||||
type: 'camelCase' | 'snake_case' | 'kebab-case' | 'PascalCase';
|
||||
}
|
||||
|
||||
const NAMING_PATTERNS: NamingPattern[] = [
|
||||
{ regex: /^[a-z][a-zA-Z0-9]*$/, type: 'camelCase' },
|
||||
{ regex: /^[a-z]+_[a-z0-9_]+$/, type: 'snake_case' },
|
||||
{ regex: /^[a-z]+-[a-z0-9-]+$/, type: 'kebab-case' },
|
||||
{ regex: /^[A-Z][a-zA-Z0-9]*$/, type: 'PascalCase' },
|
||||
];
|
||||
|
||||
const TEST_FRAMEWORK_PATTERNS = [
|
||||
{ name: 'Jest', indicators: ['jest', '@types/jest'] },
|
||||
{ name: 'Mocha', indicators: ['mocha'] },
|
||||
{ name: 'Vitest', indicators: ['vitest'] },
|
||||
{ name: 'Pytest', indicators: ['pytest'] },
|
||||
{ name: 'unittest', indicators: ['unittest', 'unittest.mock'] },
|
||||
{ name: 'Go testing', indicators: ['testing'] },
|
||||
{ name: 'JUnit', indicators: ['junit', '@junit'] },
|
||||
{ name: 'pytest', indicators: ['pytest'] },
|
||||
];
|
||||
|
||||
export class ConventionExtractor {
|
||||
async extract(dir: string, files: string[]): Promise<ConventionInfo> {
|
||||
const namingConvention = await this.extractNamingConvention(files);
|
||||
const importStyle = await this.extractImportStyle(dir, files);
|
||||
const testingFramework = await this.detectTestingFramework(dir);
|
||||
const codeStyle = await this.extractCodeStyle(files);
|
||||
|
||||
return {
|
||||
namingConvention,
|
||||
importStyle,
|
||||
testingFramework,
|
||||
codeStyle,
|
||||
};
|
||||
}
|
||||
|
||||
private async extractNamingConvention(
|
||||
files: string[]
|
||||
): Promise<NamingConvention> {
|
||||
const fileNames = files.map(f => path.basename(f));
|
||||
const fileNameScores = this.scoreNamingPatterns(fileNames);
|
||||
|
||||
const allNames: string[] = [];
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = await fs.promises.readFile(file, 'utf-8');
|
||||
const identifiers = this.extractIdentifiers(content);
|
||||
allNames.push(...identifiers);
|
||||
} catch {
|
||||
// Skip files that can't be read
|
||||
}
|
||||
}
|
||||
|
||||
const variableNames = allNames.filter(n =>
|
||||
/^[a-z]/.test(n) && !n.includes('_') && !n.contains('-')
|
||||
);
|
||||
const functionNames = allNames.filter(n =>
|
||||
/^[a-z]/.test(n) && !n.includes('_') && !n.contains('-')
|
||||
);
|
||||
const classNames = allNames.filter(n =>
|
||||
/^[A-Z]/.test(n)
|
||||
);
|
||||
|
||||
const variableScores = this.scoreNamingPatterns(variableNames.slice(0, 100));
|
||||
const functionScores = this.scoreNamingPatterns(functionNames.slice(0, 100));
|
||||
const classScores = this.scoreNamingPatterns(classNames.slice(0, 50));
|
||||
|
||||
return {
|
||||
files: this.getBestType(fileNameScores),
|
||||
variables: this.getBestType(variableScores),
|
||||
functions: this.getBestType(functionScores),
|
||||
classes: this.getBestType(classScores),
|
||||
};
|
||||
}
|
||||
|
||||
private scoreNamingPatterns(names: string[]): Record<string, number> {
|
||||
const scores: Record<string, number> = {
|
||||
camelCase: 0,
|
||||
snake_case: 0,
|
||||
'kebab-case': 0,
|
||||
PascalCase: 0,
|
||||
};
|
||||
|
||||
for (const name of names) {
|
||||
for (const pattern of NAMING_PATTERNS) {
|
||||
if (pattern.regex.test(name)) {
|
||||
scores[pattern.type]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scores;
|
||||
}
|
||||
|
||||
private getBestType(
|
||||
scores: Record<string, number>
|
||||
): 'camelCase' | 'snake_case' | 'kebab-case' | 'PascalCase' {
|
||||
let maxScore = 0;
|
||||
let bestType: 'camelCase' | 'snake_case' | 'kebab-case' | 'PascalCase' =
|
||||
'camelCase';
|
||||
|
||||
for (const [type, score] of Object.entries(scores)) {
|
||||
if (score > maxScore) {
|
||||
maxScore = score;
|
||||
bestType = type as typeof bestType;
|
||||
}
|
||||
}
|
||||
|
||||
return bestType;
|
||||
}
|
||||
|
||||
private async extractImportStyle(
|
||||
dir: string,
|
||||
files: string[]
|
||||
): Promise<ImportStyle> {
|
||||
let hasESMImports = false;
|
||||
let hasCommonJSImports = false;
|
||||
let hasCommonJSRequires = false;
|
||||
let hasAliasImports = false;
|
||||
const commonPatterns: string[] = [];
|
||||
|
||||
const aliasPatterns = [
|
||||
/^@\//,
|
||||
/^~/,
|
||||
/^src\//,
|
||||
/^components\//,
|
||||
];
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = await fs.promises.readFile(file, 'utf-8');
|
||||
|
||||
if (/import\s+.*\s+from\s+['"]/.test(content)) {
|
||||
hasESMImports = true;
|
||||
}
|
||||
|
||||
if (/require\s*\(/.test(content)) {
|
||||
hasCommonJSRequires = true;
|
||||
}
|
||||
|
||||
if (/export\s+(const|function|class|interface|type)/.test(content)) {
|
||||
hasESMImports = true;
|
||||
}
|
||||
|
||||
for (const pattern of aliasPatterns) {
|
||||
if (pattern.test(content.replace(/import\s+.*\s+from\s+/, ''))) {
|
||||
hasAliasImports = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const importMatches = content.match(/import\s+.*\s+from\s+['"]([^'"]+)['"]/g);
|
||||
if (importMatches) {
|
||||
for (const match of importMatches) {
|
||||
const modMatch = match.match(/['"]([^'"]+)['"]$/);
|
||||
if (modMatch) {
|
||||
const module = modMatch[1];
|
||||
if (!commonPatterns.includes(module)) {
|
||||
commonPatterns.push(module);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
|
||||
hasCommonJSImports = hasCommonJSRequires;
|
||||
|
||||
let style: 'ESM' | 'CommonJS' | 'mixed' = 'CommonJS';
|
||||
if (hasESMImports && hasCommonJSImports) {
|
||||
style = 'mixed';
|
||||
} else if (hasESMImports) {
|
||||
style = 'ESM';
|
||||
}
|
||||
|
||||
const aliasPrefix = hasAliasImports ? '@/' : null;
|
||||
|
||||
return {
|
||||
style,
|
||||
aliasPrefix,
|
||||
commonPatterns: commonPatterns.slice(0, 10),
|
||||
};
|
||||
}
|
||||
|
||||
private async detectTestingFramework(dir: string): Promise<string | null> {
|
||||
const packageJsonPath = path.join(dir, 'package.json');
|
||||
if (await this.fileExists(packageJsonPath)) {
|
||||
const content = await fs.promises.readFile(packageJsonPath, 'utf-8');
|
||||
const packageJson = JSON.parse(content);
|
||||
const allDeps = {
|
||||
...packageJson.dependencies,
|
||||
...packageJson.devDependencies,
|
||||
};
|
||||
|
||||
for (const framework of TEST_FRAMEWORK_PATTERNS) {
|
||||
for (const indicator of framework.indicators) {
|
||||
if (Object.keys(allDeps).some(dep => dep.includes(indicator))) {
|
||||
return framework.name;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const requirementsPath = path.join(dir, 'requirements.txt');
|
||||
if (await this.fileExists(requirementsPath)) {
|
||||
const content = await fs.promises.readFile(requirementsPath, 'utf-8');
|
||||
for (const framework of TEST_FRAMEWORK_PATTERNS) {
|
||||
for (const indicator of framework.indicators) {
|
||||
if (content.toLowerCase().includes(indicator.toLowerCase())) {
|
||||
return framework.name;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const file of await fs.promises.readdir(dir)) {
|
||||
if (file.endsWith('.test.ts') || file.endsWith('.spec.ts')) {
|
||||
return 'Jest';
|
||||
}
|
||||
if (file.endsWith('.test.js') || file.endsWith('.spec.js')) {
|
||||
return 'Jest';
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private async extractCodeStyle(files: string[]): Promise<CodeStyle> {
|
||||
let spaceIndentCount = 0;
|
||||
let tabIndentCount = 0;
|
||||
let singleQuoteCount = 0;
|
||||
let doubleQuoteCount = 0;
|
||||
let lfLineEndings = 0;
|
||||
let crlfLineEndings = 0;
|
||||
|
||||
let sampleSize = 0;
|
||||
const maxSamples = 50;
|
||||
|
||||
for (const file of files.slice(0, maxSamples)) {
|
||||
try {
|
||||
const content = await fs.promises.readFile(file, 'utf-8');
|
||||
const lines = content.split('\n').slice(0, 100);
|
||||
|
||||
for (const line of lines) {
|
||||
if (/^\s+ /.test(line)) spaceIndentCount += 2;
|
||||
if (/^\s+\t/.test(line)) tabIndentCount++;
|
||||
if (/'[^']*'/.test(line) && !/\\'/.test(line)) singleQuoteCount++;
|
||||
if (/"[^"]*"/.test(line) && !/\\"/.test(line)) doubleQuoteCount++;
|
||||
}
|
||||
|
||||
if (content.includes('\r\n')) crlfLineEndings++;
|
||||
if (!content.includes('\r\n') && content.includes('\n')) lfLineEndings++;
|
||||
sampleSize++;
|
||||
} catch {
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
|
||||
const indentType = spaceIndentCount > tabIndentCount ? 'spaces' : 'tabs';
|
||||
const quoteStyle = singleQuoteCount > doubleQuoteCount ? 'single' : 'double';
|
||||
const lineEndings = lfLineEndings > crlfLineEndings ? 'LF' : 'CRLF';
|
||||
|
||||
return {
|
||||
indentSize: 2,
|
||||
indentType,
|
||||
lineEndings,
|
||||
quoteStyle,
|
||||
};
|
||||
}
|
||||
|
||||
private extractIdentifiers(content: string): string[] {
|
||||
const identifiers: string[] = [];
|
||||
|
||||
const varPattern = /\b(const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g;
|
||||
let match;
|
||||
while ((match = varPattern.exec(content)) !== null) {
|
||||
identifiers.push(match[2]);
|
||||
}
|
||||
|
||||
const funcPattern = /function\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g;
|
||||
while ((match = funcPattern.exec(content)) !== null) {
|
||||
identifiers.push(match[1]);
|
||||
}
|
||||
|
||||
const arrowFuncPattern = /const\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=/g;
|
||||
while ((match = arrowFuncPattern.exec(content)) !== null) {
|
||||
identifiers.push(match[1]);
|
||||
}
|
||||
|
||||
const classPattern = /class\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g;
|
||||
while ((match = classPattern.exec(content)) !== null) {
|
||||
identifiers.push(match[1]);
|
||||
}
|
||||
|
||||
return [...new Set(identifiers)];
|
||||
}
|
||||
|
||||
private async fileExists(filePath: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.promises.access(filePath, fs.constants.F_OK);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user