From 2e17da8099716d32a7f00b05aaf11931bac0c267 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Wed, 4 Feb 2026 05:37:09 +0000 Subject: [PATCH] Initial upload: ScaffoldForge CLI tool with full codebase, tests, and CI/CD --- scaffoldforge/parsers/issue_parser.py | 340 ++++++++++++++++++++++++++ 1 file changed, 340 insertions(+) create mode 100644 scaffoldforge/parsers/issue_parser.py diff --git a/scaffoldforge/parsers/issue_parser.py b/scaffoldforge/parsers/issue_parser.py new file mode 100644 index 0000000..b51091d --- /dev/null +++ b/scaffoldforge/parsers/issue_parser.py @@ -0,0 +1,340 @@ +"""GitHub issue parsing functionality.""" + +import os +import re +import time +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +from github import Github +from github.Issue import Issue +from github.Label import Label + + +@dataclass +class ChecklistItem: + """Represents a checklist item from a GitHub issue.""" + + text: str + completed: bool + line_number: Optional[int] = None + category: Optional[str] = None + + +@dataclass +class IssueData: + """Structured data extracted from a GitHub issue.""" + + number: int + title: str + body: str + body_html: str + labels: List[str] + state: str + url: str + repository: str + author: str + created_at: str + updated_at: str + checklist: List[ChecklistItem] = field(default_factory=list) + requirements: List[str] = field(default_factory=list) + acceptance_criteria: List[str] = field(default_factory=list) + suggested_files: List[str] = field(default_factory=list) + suggested_directories: List[str] = field(default_factory=list) + metadata: Dict[str, Any] = field(default_factory=dict) + + def get_todo_items(self) -> List[str]: + """Get all todo items from checklist.""" + return [item.text for item in self.checklist if not item.completed] + + def get_completed_items(self) -> List[str]: + """Get completed checklist items.""" + return [item.text for item in self.checklist if item.completed] + + def generate_todo_comments(self) -> str: + """Generate TODO comments from checklist items.""" + todos = self.get_todo_items() + if not todos: + return "" + + lines = ["", "# TODO Items from GitHub Issue", ""] + for i, todo in enumerate(todos, 1): + lines.append(f"# TODO #{i}: {todo}") + return "\n".join(lines) + + +class IssueParser: + """Parser for GitHub issues.""" + + LABEL_LANGUAGE_MAP = { + "python": ["python", "py", "python3"], + "javascript": ["javascript", "js", "node", "nodejs"], + "go": ["go", "golang"], + "rust": ["rust", "rs"], + } + + def __init__(self, token: Optional[str] = None): + """Initialize the issue parser. + + Args: + token: GitHub personal access token for API access. + """ + self.token = token or os.environ.get("GITHUB_TOKEN") + if self.token: + self.github = Github(self.token) + else: + self.github = Github() + + def parse_issue( + self, owner: str, repo: str, issue_number: int, max_retries: int = 3 + ) -> IssueData: + """Parse a GitHub issue and extract structured data. + + Args: + owner: Repository owner. + repo: Repository name. + issue_number: Issue number. + max_retries: Maximum number of retries on rate limit. + + Returns: + IssueData object with extracted information. + """ + for attempt in range(max_retries): + try: + repository = self.github.get_repo(f"{owner}/{repo}") + issue = repository.get_issue(issue_number) + return self._extract_issue_data(issue, f"{owner}/{repo}") + except Exception as e: + if "rate limit" in str(e).lower() and attempt < max_retries - 1: + time.sleep(60 * (attempt + 1)) + else: + raise + + def _extract_issue_data(self, issue: Issue, repository: str) -> IssueData: + """Extract structured data from a GitHub issue. + + Args: + issue: PyGithub Issue object. + repository: Repository identifier (owner/repo). + + Returns: + IssueData object with extracted information. + """ + labels = [label.name for label in issue.labels] + + checklist = self._parse_checklist(issue.body) + requirements = self._parse_requirements(issue.body) + acceptance_criteria = self._parse_acceptance_criteria(issue.body) + suggested_files = self._parse_file_paths(issue.body) + suggested_directories = self._parse_directory_paths(issue.body) + + return IssueData( + number=issue.number, + title=issue.title, + body=issue.body or "", + body_html=issue.body_html or "", + labels=labels, + state=issue.state, + url=issue.html_url, + repository=repository, + author=issue.user.login if issue.user else "unknown", + created_at=issue.created_at.isoformat() if issue.created_at else "", + updated_at=issue.updated_at.isoformat() if issue.updated_at else "", + checklist=checklist, + requirements=requirements, + acceptance_criteria=acceptance_criteria, + suggested_files=suggested_files, + suggested_directories=suggested_directories, + ) + + def _parse_checklist(self, body: str) -> List[ChecklistItem]: + """Parse markdown checklist items from issue body. + + Args: + body: Issue body text. + + Returns: + List of ChecklistItem objects. + """ + checklist = [] + if not body: + return checklist + + lines = body.split("\n") + in_checklist = False + current_category = None + + for i, line in enumerate(lines): + category_match = re.match(r"^\s*(?:###|##|#)\s+(.+)", line) + if category_match: + current_category = category_match.group(1) + in_checklist = False + continue + + checklist_match = re.match(r"^\s*[-*]\s+\[([ xX])\]\s+(.+)$", line) + if checklist_match: + in_checklist = True + checked = checklist_match.group(1).lower() == "x" + text = checklist_match.group(2).strip() + checklist.append( + ChecklistItem( + text=text, + completed=checked, + line_number=i, + category=current_category, + ) + ) + + return checklist + + def _parse_requirements(self, body: str) -> List[str]: + """Parse requirements from issue body. + + Args: + body: Issue body text. + + Returns: + List of requirement strings. + """ + requirements = [] + if not body: + return requirements + + lines = body.split("\n") + in_requirements_section = False + + for line in lines: + if re.match(r"^##?\s*Requirements\s*$", line, re.IGNORECASE): + in_requirements_section = True + continue + if in_requirements_section: + if line.startswith("##"): + break + req_match = re.match(r"^[-*]\s+(.+)$", line) + if req_match: + requirements.append(req_match.group(1)) + + return requirements + + def _parse_acceptance_criteria(self, body: str) -> List[str]: + """Parse acceptance criteria from issue body. + + Args: + body: Issue body text. + + Returns: + List of acceptance criteria strings. + """ + criteria = [] + if not body: + return criteria + + lines = body.split("\n") + in_criteria_section = False + + for line in lines: + if re.match(r"^##?\s*(Acceptance Criteria|AC)\s*$", line, re.IGNORECASE): + in_criteria_section = True + continue + if in_criteria_section: + if line.startswith("##"): + break + crit_match = re.match(r"^[-*]\s+\[([ xX])\]\s*(.+)$", line) + if crit_match: + criteria.append(crit_match.group(2).strip()) + + return criteria + + def _parse_file_paths(self, body: str) -> List[str]: + """Parse suggested file paths from issue body. + + Args: + body: Issue body text. + + Returns: + List of file path strings. + """ + files = [] + if not body: + return files + + patterns = [ + r"`([^`/]+\.(py|js|go|rs|ts|json|yaml|yml|toml))`", + r"file:\s*([^\s]+)", + r"(src/[^\s]+)", + r"(lib/[^\s]+)", + ] + + for pattern in patterns: + matches = re.findall(pattern, body, re.IGNORECASE) + files.extend(matches) + + return list(set(files)) + + def _parse_directory_paths(self, body: str) -> List[str]: + """Parse suggested directory paths from issue body. + + Args: + body: Issue body text. + + Returns: + List of directory path strings. + """ + directories = [] + if not body: + return directories + + patterns = [ + r"directory:\s*([^\s]+)", + r"(?:src|lib|tests?|docs?|examples?)/[^\s]*", + ] + + for pattern in patterns: + matches = re.findall(pattern, body, re.IGNORECASE) + directories.extend(matches) + + return list(set(directories)) + + def detect_language(self, issue_data: IssueData) -> Optional[str]: + """Detect the programming language from issue labels and content. + + Args: + issue_data: IssueData object. + + Returns: + Detected language string or None. + """ + labels_lower = [label.lower() for label in issue_data.labels] + + for lang, keywords in self.LABEL_LANGUAGE_MAP.items(): + if any(kw in labels_lower for kw in keywords): + return lang + + body_lower = issue_data.body.lower() + for lang, keywords in self.LABEL_LANGUAGE_MAP.items(): + if any(kw in body_lower for kw in keywords): + return lang + + return None + + def detect_project_type(self, issue_data: IssueData) -> str: + """Detect project type from issue content. + + Args: + issue_data: IssueData object. + + Returns: + Project type string. + """ + body_lower = issue_data.body.lower() + + if any(kw in body_lower for kw in ["cli", "command", "tool"]): + return "cli" + if any(kw in body_lower for kw in ["api", "rest", "endpoint"]): + return "api" + if any(kw in body_lower for kw in ["web", "frontend", "ui"]): + return "web" + if any(kw in body_lower for kw in ["library", "package", "module"]): + return "library" + + return "application"