Initial upload: ScaffoldForge CLI tool with full codebase, tests, and CI/CD

This commit is contained in:
2026-02-04 05:37:09 +00:00
parent c074c4ddb8
commit 2e17da8099

View File

@@ -0,0 +1,340 @@
"""GitHub issue parsing functionality."""
import os
import re
import time
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from github import Github
from github.Issue import Issue
from github.Label import Label
@dataclass
class ChecklistItem:
"""Represents a checklist item from a GitHub issue."""
text: str
completed: bool
line_number: Optional[int] = None
category: Optional[str] = None
@dataclass
class IssueData:
"""Structured data extracted from a GitHub issue."""
number: int
title: str
body: str
body_html: str
labels: List[str]
state: str
url: str
repository: str
author: str
created_at: str
updated_at: str
checklist: List[ChecklistItem] = field(default_factory=list)
requirements: List[str] = field(default_factory=list)
acceptance_criteria: List[str] = field(default_factory=list)
suggested_files: List[str] = field(default_factory=list)
suggested_directories: List[str] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict)
def get_todo_items(self) -> List[str]:
"""Get all todo items from checklist."""
return [item.text for item in self.checklist if not item.completed]
def get_completed_items(self) -> List[str]:
"""Get completed checklist items."""
return [item.text for item in self.checklist if item.completed]
def generate_todo_comments(self) -> str:
"""Generate TODO comments from checklist items."""
todos = self.get_todo_items()
if not todos:
return ""
lines = ["", "# TODO Items from GitHub Issue", ""]
for i, todo in enumerate(todos, 1):
lines.append(f"# TODO #{i}: {todo}")
return "\n".join(lines)
class IssueParser:
"""Parser for GitHub issues."""
LABEL_LANGUAGE_MAP = {
"python": ["python", "py", "python3"],
"javascript": ["javascript", "js", "node", "nodejs"],
"go": ["go", "golang"],
"rust": ["rust", "rs"],
}
def __init__(self, token: Optional[str] = None):
"""Initialize the issue parser.
Args:
token: GitHub personal access token for API access.
"""
self.token = token or os.environ.get("GITHUB_TOKEN")
if self.token:
self.github = Github(self.token)
else:
self.github = Github()
def parse_issue(
self, owner: str, repo: str, issue_number: int, max_retries: int = 3
) -> IssueData:
"""Parse a GitHub issue and extract structured data.
Args:
owner: Repository owner.
repo: Repository name.
issue_number: Issue number.
max_retries: Maximum number of retries on rate limit.
Returns:
IssueData object with extracted information.
"""
for attempt in range(max_retries):
try:
repository = self.github.get_repo(f"{owner}/{repo}")
issue = repository.get_issue(issue_number)
return self._extract_issue_data(issue, f"{owner}/{repo}")
except Exception as e:
if "rate limit" in str(e).lower() and attempt < max_retries - 1:
time.sleep(60 * (attempt + 1))
else:
raise
def _extract_issue_data(self, issue: Issue, repository: str) -> IssueData:
"""Extract structured data from a GitHub issue.
Args:
issue: PyGithub Issue object.
repository: Repository identifier (owner/repo).
Returns:
IssueData object with extracted information.
"""
labels = [label.name for label in issue.labels]
checklist = self._parse_checklist(issue.body)
requirements = self._parse_requirements(issue.body)
acceptance_criteria = self._parse_acceptance_criteria(issue.body)
suggested_files = self._parse_file_paths(issue.body)
suggested_directories = self._parse_directory_paths(issue.body)
return IssueData(
number=issue.number,
title=issue.title,
body=issue.body or "",
body_html=issue.body_html or "",
labels=labels,
state=issue.state,
url=issue.html_url,
repository=repository,
author=issue.user.login if issue.user else "unknown",
created_at=issue.created_at.isoformat() if issue.created_at else "",
updated_at=issue.updated_at.isoformat() if issue.updated_at else "",
checklist=checklist,
requirements=requirements,
acceptance_criteria=acceptance_criteria,
suggested_files=suggested_files,
suggested_directories=suggested_directories,
)
def _parse_checklist(self, body: str) -> List[ChecklistItem]:
"""Parse markdown checklist items from issue body.
Args:
body: Issue body text.
Returns:
List of ChecklistItem objects.
"""
checklist = []
if not body:
return checklist
lines = body.split("\n")
in_checklist = False
current_category = None
for i, line in enumerate(lines):
category_match = re.match(r"^\s*(?:###|##|#)\s+(.+)", line)
if category_match:
current_category = category_match.group(1)
in_checklist = False
continue
checklist_match = re.match(r"^\s*[-*]\s+\[([ xX])\]\s+(.+)$", line)
if checklist_match:
in_checklist = True
checked = checklist_match.group(1).lower() == "x"
text = checklist_match.group(2).strip()
checklist.append(
ChecklistItem(
text=text,
completed=checked,
line_number=i,
category=current_category,
)
)
return checklist
def _parse_requirements(self, body: str) -> List[str]:
"""Parse requirements from issue body.
Args:
body: Issue body text.
Returns:
List of requirement strings.
"""
requirements = []
if not body:
return requirements
lines = body.split("\n")
in_requirements_section = False
for line in lines:
if re.match(r"^##?\s*Requirements\s*$", line, re.IGNORECASE):
in_requirements_section = True
continue
if in_requirements_section:
if line.startswith("##"):
break
req_match = re.match(r"^[-*]\s+(.+)$", line)
if req_match:
requirements.append(req_match.group(1))
return requirements
def _parse_acceptance_criteria(self, body: str) -> List[str]:
"""Parse acceptance criteria from issue body.
Args:
body: Issue body text.
Returns:
List of acceptance criteria strings.
"""
criteria = []
if not body:
return criteria
lines = body.split("\n")
in_criteria_section = False
for line in lines:
if re.match(r"^##?\s*(Acceptance Criteria|AC)\s*$", line, re.IGNORECASE):
in_criteria_section = True
continue
if in_criteria_section:
if line.startswith("##"):
break
crit_match = re.match(r"^[-*]\s+\[([ xX])\]\s*(.+)$", line)
if crit_match:
criteria.append(crit_match.group(2).strip())
return criteria
def _parse_file_paths(self, body: str) -> List[str]:
"""Parse suggested file paths from issue body.
Args:
body: Issue body text.
Returns:
List of file path strings.
"""
files = []
if not body:
return files
patterns = [
r"`([^`/]+\.(py|js|go|rs|ts|json|yaml|yml|toml))`",
r"file:\s*([^\s]+)",
r"(src/[^\s]+)",
r"(lib/[^\s]+)",
]
for pattern in patterns:
matches = re.findall(pattern, body, re.IGNORECASE)
files.extend(matches)
return list(set(files))
def _parse_directory_paths(self, body: str) -> List[str]:
"""Parse suggested directory paths from issue body.
Args:
body: Issue body text.
Returns:
List of directory path strings.
"""
directories = []
if not body:
return directories
patterns = [
r"directory:\s*([^\s]+)",
r"(?:src|lib|tests?|docs?|examples?)/[^\s]*",
]
for pattern in patterns:
matches = re.findall(pattern, body, re.IGNORECASE)
directories.extend(matches)
return list(set(directories))
def detect_language(self, issue_data: IssueData) -> Optional[str]:
"""Detect the programming language from issue labels and content.
Args:
issue_data: IssueData object.
Returns:
Detected language string or None.
"""
labels_lower = [label.lower() for label in issue_data.labels]
for lang, keywords in self.LABEL_LANGUAGE_MAP.items():
if any(kw in labels_lower for kw in keywords):
return lang
body_lower = issue_data.body.lower()
for lang, keywords in self.LABEL_LANGUAGE_MAP.items():
if any(kw in body_lower for kw in keywords):
return lang
return None
def detect_project_type(self, issue_data: IssueData) -> str:
"""Detect project type from issue content.
Args:
issue_data: IssueData object.
Returns:
Project type string.
"""
body_lower = issue_data.body.lower()
if any(kw in body_lower for kw in ["cli", "command", "tool"]):
return "cli"
if any(kw in body_lower for kw in ["api", "rest", "endpoint"]):
return "api"
if any(kw in body_lower for kw in ["web", "frontend", "ui"]):
return "web"
if any(kw in body_lower for kw in ["library", "package", "module"]):
return "library"
return "application"