"""Parser module for curl commands.""" import re from dataclasses import dataclass, field from typing import Optional @dataclass class ParsedCurl: """Represents a parsed curl command.""" url: str method: str = "GET" headers: dict = field(default_factory=dict) data: Optional[str] = None auth: Optional[tuple] = None cookies: Optional[str] = None user_agent: Optional[str] = None def unquote(s: str) -> str: """Remove outer quotes from a string.""" if not s: return s if (s.startswith('"') and s.endswith('"')) or (s.startswith("'") and s.endswith("'")): return s[1:-1] return s def parse_curl(curl_command: str) -> ParsedCurl: """Parse a curl command string into structured data. Args: curl_command: The curl command string to parse. Returns: ParsedCurl object with extracted components. Raises: ValueError: If the curl command is invalid. """ if not curl_command.strip(): raise ValueError("Empty curl command") curl_command = curl_command.strip() if curl_command.startswith("curl "): curl_command = curl_command[5:] elif curl_command.startswith("curl"): curl_command = curl_command[4:] tokens = tokenize_command(curl_command) url = "" method = "GET" headers = {} data = None auth = None cookies = None user_agent = None i = 0 while i < len(tokens): token = tokens[i] if token == "-X" or token == "--request": if i + 1 < len(tokens): method = tokens[i + 1].upper() i += 2 continue elif token == "-H" or token == "--header": if i + 1 < len(tokens): header = tokens[i + 1] if ":" in header: key, value = header.split(":", 1) headers[key.strip()] = value.strip() i += 2 continue elif token == "-d" or token == "--data" or token == "--data-raw": if i + 1 < len(tokens): data = tokens[i + 1] if method == "GET": method = "POST" i += 2 continue elif token == "-u" or token == "--user": if i + 1 < len(tokens): auth_str = tokens[i + 1] if ":" in auth_str: auth = tuple(auth_str.split(":", 1)) else: auth = auth_str i += 2 continue elif token == "-b" or token == "--cookie": if i + 1 < len(tokens): cookies = tokens[i + 1] i += 2 continue elif token == "-A" or token == "--user-agent": if i + 1 < len(tokens): user_agent = tokens[i + 1] i += 2 continue elif token == "-L" or token == "--location" or token == "-s" or token == "--silent" or token == "-S" or token == "--show-error": i += 1 continue elif token.startswith("-"): i += 1 continue else: if not url: url = token i += 1 if not url: raise ValueError("No URL found in curl command") if not url.startswith(("http://", "https://")): url = "https://" + url if "Authorization" in headers: auth_header = headers["Authorization"] if auth_header.startswith("Basic "): import base64 try: encoded = auth_header[6:] decoded = base64.b64decode(encoded).decode("utf-8") if ":" in decoded: auth = tuple(decoded.split(":", 1)) except Exception: pass elif auth_header.startswith("Bearer "): headers["Authorization"] = auth_header return ParsedCurl( url=url, method=method, headers=headers, data=data, auth=auth, cookies=cookies, user_agent=user_agent ) def tokenize_command(cmd: str) -> list: """Tokenize a curl command into components, handling quotes and escapes.""" tokens = [] current = "" in_single_quote = False in_double_quote = False escape_next = False i = 0 while i < len(cmd): char = cmd[i] if escape_next: current += char escape_next = False i += 1 continue if char == "\\" and not in_single_quote: escape_next = True i += 1 continue if char == "'" and not in_double_quote: in_single_quote = not in_single_quote i += 1 continue if char == '"' and not in_single_quote: in_double_quote = not in_double_quote i += 1 continue if char == " " and not in_single_quote and not in_double_quote: if current: tokens.append(current) current = "" i += 1 continue current += char i += 1 if current: tokens.append(current) return tokens