Files
7000pctAUTO 1d615f3ce9
Some checks failed
CI / test (push) Has been cancelled
Initial commit: Add python-stub-generator project
2026-01-30 04:51:46 +00:00

534 lines
18 KiB
Python

"""Type inference engine for Python code analysis."""
import ast
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from stubgen.parser import (
FileInfo, Function, Variable, Class, TypeHint
)
class TypeCandidate:
"""Represents a candidate type with confidence score."""
def __init__(
self,
type_str: str,
confidence: float,
source: str = "inference"
):
self.type_str = type_str
self.confidence = confidence
self.source = source
def __repr__(self):
return f"TypeCandidate({self.type_str}, confidence={self.confidence})"
class ReturnTypeAnalyzer:
"""Analyzes return statements to infer function return types."""
def __init__(self, func: Function):
self.func = func
def analyze(self) -> Optional[TypeCandidate]:
return_types: Set[str] = set()
for stmt in self.func.body:
result = self._analyze_statement(stmt)
if result:
return_types.add(result)
if not return_types:
if self._has_yield(stmt for stmt in self.func.body):
return TypeCandidate("Generator", 0.9, "yield")
return None
if len(return_types) == 1:
return_type = return_types.pop()
confidence = 1.0
if return_type == "None":
confidence = 0.7
return TypeCandidate(return_type, confidence, "return")
if return_types == {"None", "bool"}:
return TypeCandidate("Optional[bool]", 0.8, "return")
if return_types == {"None", "int"}:
return TypeCandidate("Optional[int]", 0.8, "return")
if return_types == {"None", "str"}:
return TypeCandidate("Optional[str]", 0.8, "return")
if return_types == {"None"}:
return TypeCandidate("None", 1.0, "return")
return TypeCandidate("Any", 0.5, "return")
def _has_yield(self, statements) -> bool:
for stmt in statements:
if isinstance(stmt, (ast.Yield, ast.YieldFrom)):
return True
for child in ast.walk(stmt):
if isinstance(child, (ast.Yield, ast.YieldFrom)):
return True
return False
def _analyze_statement(self, stmt: ast.stmt) -> Optional[str]:
if isinstance(stmt, ast.Return):
if stmt.value is None:
return "None"
return self._infer_type(stmt.value)
elif isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Constant):
return self._infer_type(stmt.value)
elif isinstance(stmt, ast.Assign):
if stmt.value:
return self._infer_type(stmt.value)
elif isinstance(stmt, (ast.If, ast.For, ast.While)):
for child in stmt.body:
result = self._analyze_statement(child)
if result:
return result
if hasattr(stmt, 'orelse'):
for child in stmt.orelse:
result = self._analyze_statement(child)
if result:
return result
elif isinstance(stmt, ast.Try):
for child in stmt.body:
result = self._analyze_statement(child)
if result:
return result
return None
def _infer_type(self, node: ast.expr) -> str:
if isinstance(node, ast.Constant):
value = node.value
if isinstance(value, bool):
return "bool"
elif isinstance(value, int):
return "int"
elif isinstance(value, float):
return "float"
elif isinstance(value, str):
return "str"
elif value is None:
return "None"
elif isinstance(node, ast.List):
if node.elts:
types = {self._infer_type(elt) for elt in node.elts}
if len(types) == 1:
return f"List[{types.pop()}]"
return "List[Any]"
elif isinstance(node, ast.Tuple):
if node.elts:
types = [self._infer_type(elt) for elt in node.elts]
return f"Tuple[{', '.join(types)}]"
return "Tuple[()]"
elif isinstance(node, ast.Dict):
return "Dict[Any, Any]"
elif isinstance(node, ast.Set):
if node.elts:
types = {self._infer_type(elt) for elt in node.elts}
if len(types) == 1:
return f"Set[{types.pop()}]"
return "Set[Any]"
elif isinstance(node, ast.Name):
return self._infer_name_type(node)
elif isinstance(node, ast.BinOp):
left_type = self._infer_type(node.left)
right_type = self._infer_type(node.right)
if left_type == right_type:
return left_type
if left_type in ("int", "float") and right_type in ("int", "float"):
return "float" if "float" in (left_type, right_type) else "int"
return "Any"
elif isinstance(node, ast.Call):
if isinstance(node.func, ast.Name):
func_name = node.func.id
return self._infer_call_type(func_name, node)
return "Any"
elif isinstance(node, ast.Attribute):
return "Any"
return "Any"
def _infer_name_type(self, node: ast.Name) -> str:
if node.id in ("True", "False"):
return "bool"
return "Any"
def _infer_call_type(self, func_name: str, node: ast.Call) -> str:
type_hints = {
"len": "int",
"str": "str",
"int": "int",
"float": "float",
"bool": "bool",
"list": "list",
"dict": "dict",
"set": "set",
"tuple": "tuple",
"range": "range",
"enumerate": "enumerate",
"zip": "zip",
"map": "map",
"filter": "filter",
"sorted": "list",
"reversed": "list",
"type": "type",
"isinstance": "bool",
"hasattr": "bool",
"getattr": "Any",
"open": "IO",
}
return type_hints.get(func_name, "Any")
class DefaultValueAnalyzer:
"""Analyzes default values to infer parameter types."""
def analyze(self, default: Optional[ast.expr]) -> Optional[TypeCandidate]:
if default is None:
return None
default_type = self._infer_type(default)
confidence_map = {
"None": 0.7,
"str": 0.8,
"int": 0.8,
"float": 0.8,
"bool": 0.8,
"list": 0.6,
"dict": 0.6,
"set": 0.6,
"tuple": 0.6,
"Any": 0.5,
}
confidence = confidence_map.get(default_type, 0.6)
return TypeCandidate(default_type, confidence, "default")
def _infer_type(self, node: ast.expr) -> str:
if isinstance(node, ast.Constant):
value = node.value
if isinstance(value, bool):
return "bool"
elif isinstance(value, int):
return "int"
elif isinstance(value, float):
return "float"
elif isinstance(value, str):
return "str"
elif value is None:
return "None"
elif isinstance(node, ast.ListConstant):
return "list"
elif isinstance(node, ast.Dict):
return "dict"
elif isinstance(node, ast.Tuple):
return "tuple"
elif isinstance(node, ast.Set):
return "set"
elif isinstance(node, ast.Name):
if node.id in ("None", "True", "False"):
return "None" if node.id == "None" else "bool"
elif isinstance(node, ast.NameConstant):
if node.value is None:
return "None"
elif node.value is bool:
return "bool"
return "Any"
class AssignmentAnalyzer:
"""Analyzes variable assignments to infer types."""
def __init__(self, file_info: FileInfo):
self.file_info = file_info
self.assignments: Dict[str, str] = {}
def analyze(self) -> Dict[str, TypeCandidate]:
results = {}
for var in self.file_info.variables:
if var.value:
inferred = self._infer_type(var.value)
confidence = 0.8 if inferred != "Any" else 0.5
results[var.name] = TypeCandidate(inferred, confidence, "assignment")
elif var.annotation:
results[var.name] = TypeCandidate(
var.annotation.to_string(),
1.0,
"annotation"
)
return results
def _infer_type(self, node: ast.expr) -> str:
if isinstance(node, ast.Constant):
value = node.value
if isinstance(value, bool):
return "bool"
elif isinstance(value, int):
return "int"
elif isinstance(value, float):
return "float"
elif isinstance(value, str):
return "str"
elif value is None:
return "None"
elif isinstance(node, ast.List):
if node.elts:
types = {self._infer_type(elt) for elt in node.elts}
if len(types) == 1:
return f"List[{types.pop()}]"
return "list"
elif isinstance(node, ast.Tuple):
return "tuple"
elif isinstance(node, ast.Dict):
return "dict"
elif isinstance(node, ast.Set):
return "set"
elif isinstance(node, ast.Call):
if isinstance(node.func, ast.Name):
func_name = node.func.id
type_map = {
"list": "list",
"dict": "dict",
"set": "set",
"tuple": "tuple",
"int": "int",
"str": "str",
"float": "float",
"bool": "bool",
}
return type_map.get(func_name, "Any")
return "Any"
return "Any"
class DocstringParser:
"""Parses docstrings for type hints (Google and NumPy style)."""
def __init__(self, docstring: str):
self.docstring = docstring or ""
def parse_params(self) -> Dict[str, str]:
params = {}
google_match = re.search(
r'Args:\s*\n(.*?)(?:\n\n|\n[A-Z]|\Z)',
self.docstring,
re.DOTALL
)
if google_match:
args_section = google_match.group(1)
for match in re.finditer(r'(\w+)\s*\(([^)]+)\)\s*:\s*(.*)', args_section):
param_name = match.group(1)
param_type = match.group(2).strip()
params[param_name] = param_type
numpy_match = re.search(
r'Parameters\s*\n\s*[-]+\s*\n(.*?)(?:\n\n|\n[A-Z]|\Z)',
self.docstring,
re.DOTALL
)
if numpy_match:
params_section = numpy_match.group(1)
for match in re.finditer(r'(\w+)\s*(?:[-\s]+)?(?:type| dtype):\s*(.*)', params_section):
param_name = match.group(1)
param_type = match.group(2).strip()
if param_name not in params:
params[param_name] = param_type
return params
def parse_returns(self) -> Optional[str]:
google_match = re.search(
r'Returns?:\s*\n\s*(?:.*?):\s*(.*?)(?:\n\n|\n[A-Z]|\Z)',
self.docstring,
re.DOTALL
)
if google_match:
return google_match.group(1).strip()
numpy_match = re.search(
r'Returns?\s*\n\s*[-]+\s*\n\s*(?:.*?):\s*(.*?)(?:\n\n|\Z)',
self.docstring,
re.DOTALL
)
if numpy_match:
return numpy_match.group(1).strip()
return None
def parse_attributes(self) -> Dict[str, str]:
attrs = {}
google_match = re.search(
r'Attributes:\s*\n(.*?)(?:\n\n|\n[A-Z]|\Z)',
self.docstring,
re.DOTALL
)
if google_match:
attrs_section = google_match.group(1)
for match in re.finditer(r'(\w+)\s*\(([^)]+)\)\s*:\s*(.*)', attrs_section):
attr_name = match.group(1)
attr_type = match.group(2).strip()
attrs[attr_name] = attr_type
return attrs
class Inferrer:
"""Main type inference engine that combines all analyzers."""
def __init__(self, infer_depth: int = 3):
self.infer_depth = infer_depth
def infer_function_signature(self, func: Function, docstring: str = None) -> Dict[str, Any]:
"""Infer complete function signature including parameter and return types."""
args_info = {}
docstring_parser = DocstringParser(docstring) if docstring else DocstringParser("")
doc_params = docstring_parser.parse_params()
for arg in func.args.args:
arg_info = {"name": arg.arg}
if arg.annotation:
arg_info["type"] = arg.annotation
arg_info["source"] = "annotation"
arg_info["confidence"] = 1.0
elif arg.arg in doc_params:
arg_info["type"] = doc_params[arg.arg]
arg_info["source"] = "docstring"
arg_info["confidence"] = 0.9
else:
default_idx = len(func.args.args) - len(func.args.defaults) + func.args.args.index(arg)
if default_idx >= 0 and default_idx < len(func.args.defaults):
default_analyzer = DefaultValueAnalyzer()
candidate = default_analyzer.analyze(func.args.defaults[default_idx])
if candidate:
arg_info["type"] = candidate.type_str
arg_info["source"] = "default"
arg_info["confidence"] = candidate.confidence
else:
arg_info["type"] = "Any"
arg_info["source"] = "none"
arg_info["confidence"] = 0.5
else:
arg_info["type"] = "Any"
arg_info["source"] = "none"
arg_info["confidence"] = 0.5
args_info[arg.arg] = arg_info
for kwonly in func.args.kwonlyargs:
arg_info = {"name": kwonly.arg}
if kwonly.annotation:
arg_info["type"] = kwonly.annotation
arg_info["source"] = "annotation"
arg_info["confidence"] = 1.0
else:
arg_info["type"] = "Any"
arg_info["source"] = "none"
arg_info["confidence"] = 0.5
args_info[kwonly.arg] = arg_info
if func.args.vararg:
args_info[func.args.vararg.arg] = {
"name": func.args.vararg.arg,
"type": "*Tuple[Any, ...]",
"source": "vararg",
"confidence": 1.0
}
if func.args.kwarg:
args_info[func.args.kwarg.arg] = {
"name": func.args.kwarg.arg,
"type": "**Dict[str, Any]",
"source": "kwarg",
"confidence": 1.0
}
return_type_info = {"type": "None", "source": "default", "confidence": 1.0}
if func.returns:
return_type_info = {
"type": func.returns.to_string(),
"source": "annotation",
"confidence": 1.0
}
else:
return_analyzer = ReturnTypeAnalyzer(func)
candidate = return_analyzer.analyze()
if candidate:
return_type_info = {
"type": candidate.type_str,
"source": candidate.source,
"confidence": candidate.confidence
}
elif docstring:
doc_returns = docstring_parser.parse_returns()
if doc_returns:
return_type_info = {
"type": doc_returns,
"source": "docstring",
"confidence": 0.9
}
return {
"args": args_info,
"returns": return_type_info,
"is_async": func.is_async,
"decorators": func.decorators
}
def infer_file(self, file_info: FileInfo) -> Dict[str, Any]:
"""Infer types for all components in a file."""
results = {
"functions": {},
"classes": {},
"variables": {}
}
for func in file_info.functions:
results["functions"][func.name] = self.infer_function_signature(
func, file_info.docstring
)
for cls in file_info.classes:
class_info = {
"name": cls.name,
"bases": cls.bases,
"methods": {},
"attributes": {}
}
for method in cls.methods:
class_info["methods"][method.name] = self.infer_function_signature(
method, None
)
for attr in cls.attributes:
if attr.annotation:
class_info["attributes"][attr.name] = {
"type": attr.annotation.to_string(),
"source": "annotation",
"confidence": 1.0
}
else:
class_info["attributes"][attr.name] = {
"type": "Any",
"source": "none",
"confidence": 0.5
}
results["classes"][cls.name] = class_info
assignment_analyzer = AssignmentAnalyzer(file_info)
results["variables"] = assignment_analyzer.analyze()
return results