Files

200 lines
5.3 KiB
Python

"""Parser module for curl commands."""
import re
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class ParsedCurl:
"""Represents a parsed curl command."""
url: str
method: str = "GET"
headers: dict = field(default_factory=dict)
data: Optional[str] = None
auth: Optional[tuple] = None
cookies: Optional[str] = None
user_agent: Optional[str] = None
def unquote(s: str) -> str:
"""Remove outer quotes from a string."""
if not s:
return s
if (s.startswith('"') and s.endswith('"')) or (s.startswith("'") and s.endswith("'")):
return s[1:-1]
return s
def parse_curl(curl_command: str) -> ParsedCurl:
"""Parse a curl command string into structured data.
Args:
curl_command: The curl command string to parse.
Returns:
ParsedCurl object with extracted components.
Raises:
ValueError: If the curl command is invalid.
"""
if not curl_command.strip():
raise ValueError("Empty curl command")
curl_command = curl_command.strip()
if curl_command.startswith("curl "):
curl_command = curl_command[5:]
elif curl_command.startswith("curl"):
curl_command = curl_command[4:]
tokens = tokenize_command(curl_command)
url = ""
method = "GET"
headers = {}
data = None
auth = None
cookies = None
user_agent = None
i = 0
while i < len(tokens):
token = tokens[i]
if token == "-X" or token == "--request":
if i + 1 < len(tokens):
method = tokens[i + 1].upper()
i += 2
continue
elif token == "-H" or token == "--header":
if i + 1 < len(tokens):
header = tokens[i + 1]
if ":" in header:
key, value = header.split(":", 1)
headers[key.strip()] = value.strip()
i += 2
continue
elif token == "-d" or token == "--data" or token == "--data-raw":
if i + 1 < len(tokens):
data = tokens[i + 1]
if method == "GET":
method = "POST"
i += 2
continue
elif token == "-u" or token == "--user":
if i + 1 < len(tokens):
auth_str = tokens[i + 1]
if ":" in auth_str:
auth = tuple(auth_str.split(":", 1))
else:
auth = auth_str
i += 2
continue
elif token == "-b" or token == "--cookie":
if i + 1 < len(tokens):
cookies = tokens[i + 1]
i += 2
continue
elif token == "-A" or token == "--user-agent":
if i + 1 < len(tokens):
user_agent = tokens[i + 1]
i += 2
continue
elif token == "-L" or token == "--location" or token == "-s" or token == "--silent" or token == "-S" or token == "--show-error":
i += 1
continue
elif token.startswith("-"):
i += 1
continue
else:
if not url:
url = token
i += 1
if not url:
raise ValueError("No URL found in curl command")
if not url.startswith(("http://", "https://")):
url = "https://" + url
if "Authorization" in headers:
auth_header = headers["Authorization"]
if auth_header.startswith("Basic "):
import base64
try:
encoded = auth_header[6:]
decoded = base64.b64decode(encoded).decode("utf-8")
if ":" in decoded:
auth = tuple(decoded.split(":", 1))
except Exception:
pass
elif auth_header.startswith("Bearer "):
headers["Authorization"] = auth_header
return ParsedCurl(
url=url,
method=method,
headers=headers,
data=data,
auth=auth,
cookies=cookies,
user_agent=user_agent
)
def tokenize_command(cmd: str) -> list:
"""Tokenize a curl command into components, handling quotes and escapes."""
tokens = []
current = ""
in_single_quote = False
in_double_quote = False
escape_next = False
i = 0
while i < len(cmd):
char = cmd[i]
if escape_next:
current += char
escape_next = False
i += 1
continue
if char == "\\" and not in_single_quote:
escape_next = True
i += 1
continue
if char == "'" and not in_double_quote:
in_single_quote = not in_single_quote
i += 1
continue
if char == '"' and not in_single_quote:
in_double_quote = not in_double_quote
i += 1
continue
if char == " " and not in_single_quote and not in_double_quote:
if current:
tokens.append(current)
current = ""
i += 1
continue
current += char
i += 1
if current:
tokens.append(current)
return tokens