From 2e17da8099716d32a7f00b05aaf11931bac0c267 Mon Sep 17 00:00:00 2001
From: 7000pctAUTO <lukems3823@gmail.com>
Date: Wed, 4 Feb 2026 05:37:09 +0000
Subject: [PATCH] Initial upload: ScaffoldForge CLI tool with full codebase,
 tests, and CI/CD

---
 scaffoldforge/parsers/issue_parser.py | 340 ++++++++++++++++++++++++++
 1 file changed, 340 insertions(+)
 create mode 100644 scaffoldforge/parsers/issue_parser.py

diff --git a/scaffoldforge/parsers/issue_parser.py b/scaffoldforge/parsers/issue_parser.py
new file mode 100644
index 0000000..b51091d
--- /dev/null
+++ b/scaffoldforge/parsers/issue_parser.py
@@ -0,0 +1,340 @@
+"""GitHub issue parsing functionality."""
+
+import os
+import re
+import time
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+from github import Github
+from github.Issue import Issue
+from github.Label import Label
+
+
+@dataclass
+class ChecklistItem:
+    """Represents a checklist item from a GitHub issue."""
+
+    text: str
+    completed: bool
+    line_number: Optional[int] = None
+    category: Optional[str] = None
+
+
+@dataclass
+class IssueData:
+    """Structured data extracted from a GitHub issue."""
+
+    number: int
+    title: str
+    body: str
+    body_html: str
+    labels: List[str]
+    state: str
+    url: str
+    repository: str
+    author: str
+    created_at: str
+    updated_at: str
+    checklist: List[ChecklistItem] = field(default_factory=list)
+    requirements: List[str] = field(default_factory=list)
+    acceptance_criteria: List[str] = field(default_factory=list)
+    suggested_files: List[str] = field(default_factory=list)
+    suggested_directories: List[str] = field(default_factory=list)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def get_todo_items(self) -> List[str]:
+        """Get all todo items from checklist."""
+        return [item.text for item in self.checklist if not item.completed]
+
+    def get_completed_items(self) -> List[str]:
+        """Get completed checklist items."""
+        return [item.text for item in self.checklist if item.completed]
+
+    def generate_todo_comments(self) -> str:
+        """Generate TODO comments from checklist items."""
+        todos = self.get_todo_items()
+        if not todos:
+            return ""
+
+        lines = ["", "# TODO Items from GitHub Issue", ""]
+        for i, todo in enumerate(todos, 1):
+            lines.append(f"# TODO #{i}: {todo}")
+        return "\n".join(lines)
+
+
+class IssueParser:
+    """Parser for GitHub issues."""
+
+    LABEL_LANGUAGE_MAP = {
+        "python": ["python", "py", "python3"],
+        "javascript": ["javascript", "js", "node", "nodejs"],
+        "go": ["go", "golang"],
+        "rust": ["rust", "rs"],
+    }
+
+    def __init__(self, token: Optional[str] = None):
+        """Initialize the issue parser.
+
+        Args:
+            token: GitHub personal access token for API access.
+        """
+        self.token = token or os.environ.get("GITHUB_TOKEN")
+        if self.token:
+            self.github = Github(self.token)
+        else:
+            self.github = Github()
+
+    def parse_issue(
+        self, owner: str, repo: str, issue_number: int, max_retries: int = 3
+    ) -> IssueData:
+        """Parse a GitHub issue and extract structured data.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            issue_number: Issue number.
+            max_retries: Maximum number of retries on rate limit.
+
+        Returns:
+            IssueData object with extracted information.
+        """
+        for attempt in range(max_retries):
+            try:
+                repository = self.github.get_repo(f"{owner}/{repo}")
+                issue = repository.get_issue(issue_number)
+                return self._extract_issue_data(issue, f"{owner}/{repo}")
+            except Exception as e:
+                if "rate limit" in str(e).lower() and attempt < max_retries - 1:
+                    time.sleep(60 * (attempt + 1))
+                else:
+                    raise
+
+    def _extract_issue_data(self, issue: Issue, repository: str) -> IssueData:
+        """Extract structured data from a GitHub issue.
+
+        Args:
+            issue: PyGithub Issue object.
+            repository: Repository identifier (owner/repo).
+
+        Returns:
+            IssueData object with extracted information.
+        """
+        labels = [label.name for label in issue.labels]
+
+        checklist = self._parse_checklist(issue.body)
+        requirements = self._parse_requirements(issue.body)
+        acceptance_criteria = self._parse_acceptance_criteria(issue.body)
+        suggested_files = self._parse_file_paths(issue.body)
+        suggested_directories = self._parse_directory_paths(issue.body)
+
+        return IssueData(
+            number=issue.number,
+            title=issue.title,
+            body=issue.body or "",
+            body_html=issue.body_html or "",
+            labels=labels,
+            state=issue.state,
+            url=issue.html_url,
+            repository=repository,
+            author=issue.user.login if issue.user else "unknown",
+            created_at=issue.created_at.isoformat() if issue.created_at else "",
+            updated_at=issue.updated_at.isoformat() if issue.updated_at else "",
+            checklist=checklist,
+            requirements=requirements,
+            acceptance_criteria=acceptance_criteria,
+            suggested_files=suggested_files,
+            suggested_directories=suggested_directories,
+        )
+
+    def _parse_checklist(self, body: str) -> List[ChecklistItem]:
+        """Parse markdown checklist items from issue body.
+
+        Args:
+            body: Issue body text.
+
+        Returns:
+            List of ChecklistItem objects.
+        """
+        checklist = []
+        if not body:
+            return checklist
+
+        lines = body.split("\n")
+        in_checklist = False
+        current_category = None
+
+        for i, line in enumerate(lines):
+            category_match = re.match(r"^\s*(?:###|##|#)\s+(.+)", line)
+            if category_match:
+                current_category = category_match.group(1)
+                in_checklist = False
+                continue
+
+            checklist_match = re.match(r"^\s*[-*]\s+\[([ xX])\]\s+(.+)$", line)
+            if checklist_match:
+                in_checklist = True
+                checked = checklist_match.group(1).lower() == "x"
+                text = checklist_match.group(2).strip()
+                checklist.append(
+                    ChecklistItem(
+                        text=text,
+                        completed=checked,
+                        line_number=i,
+                        category=current_category,
+                    )
+                )
+
+        return checklist
+
+    def _parse_requirements(self, body: str) -> List[str]:
+        """Parse requirements from issue body.
+
+        Args:
+            body: Issue body text.
+
+        Returns:
+            List of requirement strings.
+        """
+        requirements = []
+        if not body:
+            return requirements
+
+        lines = body.split("\n")
+        in_requirements_section = False
+
+        for line in lines:
+            if re.match(r"^##?\s*Requirements\s*$", line, re.IGNORECASE):
+                in_requirements_section = True
+                continue
+            if in_requirements_section:
+                if line.startswith("##"):
+                    break
+                req_match = re.match(r"^[-*]\s+(.+)$", line)
+                if req_match:
+                    requirements.append(req_match.group(1))
+
+        return requirements
+
+    def _parse_acceptance_criteria(self, body: str) -> List[str]:
+        """Parse acceptance criteria from issue body.
+
+        Args:
+            body: Issue body text.
+
+        Returns:
+            List of acceptance criteria strings.
+        """
+        criteria = []
+        if not body:
+            return criteria
+
+        lines = body.split("\n")
+        in_criteria_section = False
+
+        for line in lines:
+            if re.match(r"^##?\s*(Acceptance Criteria|AC)\s*$", line, re.IGNORECASE):
+                in_criteria_section = True
+                continue
+            if in_criteria_section:
+                if line.startswith("##"):
+                    break
+                crit_match = re.match(r"^[-*]\s+\[([ xX])\]\s*(.+)$", line)
+                if crit_match:
+                    criteria.append(crit_match.group(2).strip())
+
+        return criteria
+
+    def _parse_file_paths(self, body: str) -> List[str]:
+        """Parse suggested file paths from issue body.
+
+        Args:
+            body: Issue body text.
+
+        Returns:
+            List of file path strings.
+        """
+        files = []
+        if not body:
+            return files
+
+        patterns = [
+            r"`([^`/]+\.(py|js|go|rs|ts|json|yaml|yml|toml))`",
+            r"file:\s*([^\s]+)",
+            r"(src/[^\s]+)",
+            r"(lib/[^\s]+)",
+        ]
+
+        for pattern in patterns:
+            matches = re.findall(pattern, body, re.IGNORECASE)
+            files.extend(matches)
+
+        return list(set(files))
+
+    def _parse_directory_paths(self, body: str) -> List[str]:
+        """Parse suggested directory paths from issue body.
+
+        Args:
+            body: Issue body text.
+
+        Returns:
+            List of directory path strings.
+        """
+        directories = []
+        if not body:
+            return directories
+
+        patterns = [
+            r"directory:\s*([^\s]+)",
+            r"(?:src|lib|tests?|docs?|examples?)/[^\s]*",
+        ]
+
+        for pattern in patterns:
+            matches = re.findall(pattern, body, re.IGNORECASE)
+            directories.extend(matches)
+
+        return list(set(directories))
+
+    def detect_language(self, issue_data: IssueData) -> Optional[str]:
+        """Detect the programming language from issue labels and content.
+
+        Args:
+            issue_data: IssueData object.
+
+        Returns:
+            Detected language string or None.
+        """
+        labels_lower = [label.lower() for label in issue_data.labels]
+
+        for lang, keywords in self.LABEL_LANGUAGE_MAP.items():
+            if any(kw in labels_lower for kw in keywords):
+                return lang
+
+        body_lower = issue_data.body.lower()
+        for lang, keywords in self.LABEL_LANGUAGE_MAP.items():
+            if any(kw in body_lower for kw in keywords):
+                return lang
+
+        return None
+
+    def detect_project_type(self, issue_data: IssueData) -> str:
+        """Detect project type from issue content.
+
+        Args:
+            issue_data: IssueData object.
+
+        Returns:
+            Project type string.
+        """
+        body_lower = issue_data.body.lower()
+
+        if any(kw in body_lower for kw in ["cli", "command", "tool"]):
+            return "cli"
+        if any(kw in body_lower for kw in ["api", "rest", "endpoint"]):
+            return "api"
+        if any(kw in body_lower for kw in ["web", "frontend", "ui"]):
+            return "web"
+        if any(kw in body_lower for kw in ["library", "package", "module"]):
+            return "library"
+
+        return "application"