200 lines
5.3 KiB
Python
200 lines
5.3 KiB
Python
"""Parser module for curl commands."""
|
|
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional
|
|
|
|
|
|
@dataclass
|
|
class ParsedCurl:
|
|
"""Represents a parsed curl command."""
|
|
url: str
|
|
method: str = "GET"
|
|
headers: dict = field(default_factory=dict)
|
|
data: Optional[str] = None
|
|
auth: Optional[tuple] = None
|
|
cookies: Optional[str] = None
|
|
user_agent: Optional[str] = None
|
|
|
|
|
|
def unquote(s: str) -> str:
|
|
"""Remove outer quotes from a string."""
|
|
if not s:
|
|
return s
|
|
if (s.startswith('"') and s.endswith('"')) or (s.startswith("'") and s.endswith("'")):
|
|
return s[1:-1]
|
|
return s
|
|
|
|
|
|
def parse_curl(curl_command: str) -> ParsedCurl:
|
|
"""Parse a curl command string into structured data.
|
|
|
|
Args:
|
|
curl_command: The curl command string to parse.
|
|
|
|
Returns:
|
|
ParsedCurl object with extracted components.
|
|
|
|
Raises:
|
|
ValueError: If the curl command is invalid.
|
|
"""
|
|
if not curl_command.strip():
|
|
raise ValueError("Empty curl command")
|
|
|
|
curl_command = curl_command.strip()
|
|
if curl_command.startswith("curl "):
|
|
curl_command = curl_command[5:]
|
|
elif curl_command.startswith("curl"):
|
|
curl_command = curl_command[4:]
|
|
|
|
tokens = tokenize_command(curl_command)
|
|
|
|
url = ""
|
|
method = "GET"
|
|
headers = {}
|
|
data = None
|
|
auth = None
|
|
cookies = None
|
|
user_agent = None
|
|
|
|
i = 0
|
|
while i < len(tokens):
|
|
token = tokens[i]
|
|
|
|
if token == "-X" or token == "--request":
|
|
if i + 1 < len(tokens):
|
|
method = tokens[i + 1].upper()
|
|
i += 2
|
|
continue
|
|
|
|
elif token == "-H" or token == "--header":
|
|
if i + 1 < len(tokens):
|
|
header = tokens[i + 1]
|
|
if ":" in header:
|
|
key, value = header.split(":", 1)
|
|
headers[key.strip()] = value.strip()
|
|
i += 2
|
|
continue
|
|
|
|
elif token == "-d" or token == "--data" or token == "--data-raw":
|
|
if i + 1 < len(tokens):
|
|
data = tokens[i + 1]
|
|
if method == "GET":
|
|
method = "POST"
|
|
i += 2
|
|
continue
|
|
|
|
elif token == "-u" or token == "--user":
|
|
if i + 1 < len(tokens):
|
|
auth_str = tokens[i + 1]
|
|
if ":" in auth_str:
|
|
auth = tuple(auth_str.split(":", 1))
|
|
else:
|
|
auth = auth_str
|
|
i += 2
|
|
continue
|
|
|
|
elif token == "-b" or token == "--cookie":
|
|
if i + 1 < len(tokens):
|
|
cookies = tokens[i + 1]
|
|
i += 2
|
|
continue
|
|
|
|
elif token == "-A" or token == "--user-agent":
|
|
if i + 1 < len(tokens):
|
|
user_agent = tokens[i + 1]
|
|
i += 2
|
|
continue
|
|
|
|
elif token == "-L" or token == "--location" or token == "-s" or token == "--silent" or token == "-S" or token == "--show-error":
|
|
i += 1
|
|
continue
|
|
|
|
elif token.startswith("-"):
|
|
i += 1
|
|
continue
|
|
|
|
else:
|
|
if not url:
|
|
url = token
|
|
i += 1
|
|
|
|
if not url:
|
|
raise ValueError("No URL found in curl command")
|
|
|
|
if not url.startswith(("http://", "https://")):
|
|
url = "https://" + url
|
|
|
|
if "Authorization" in headers:
|
|
auth_header = headers["Authorization"]
|
|
if auth_header.startswith("Basic "):
|
|
import base64
|
|
try:
|
|
encoded = auth_header[6:]
|
|
decoded = base64.b64decode(encoded).decode("utf-8")
|
|
if ":" in decoded:
|
|
auth = tuple(decoded.split(":", 1))
|
|
except Exception:
|
|
pass
|
|
elif auth_header.startswith("Bearer "):
|
|
headers["Authorization"] = auth_header
|
|
|
|
return ParsedCurl(
|
|
url=url,
|
|
method=method,
|
|
headers=headers,
|
|
data=data,
|
|
auth=auth,
|
|
cookies=cookies,
|
|
user_agent=user_agent
|
|
)
|
|
|
|
|
|
def tokenize_command(cmd: str) -> list:
|
|
"""Tokenize a curl command into components, handling quotes and escapes."""
|
|
tokens = []
|
|
current = ""
|
|
in_single_quote = False
|
|
in_double_quote = False
|
|
escape_next = False
|
|
|
|
i = 0
|
|
while i < len(cmd):
|
|
char = cmd[i]
|
|
|
|
if escape_next:
|
|
current += char
|
|
escape_next = False
|
|
i += 1
|
|
continue
|
|
|
|
if char == "\\" and not in_single_quote:
|
|
escape_next = True
|
|
i += 1
|
|
continue
|
|
|
|
if char == "'" and not in_double_quote:
|
|
in_single_quote = not in_single_quote
|
|
i += 1
|
|
continue
|
|
|
|
if char == '"' and not in_single_quote:
|
|
in_double_quote = not in_double_quote
|
|
i += 1
|
|
continue
|
|
|
|
if char == " " and not in_single_quote and not in_double_quote:
|
|
if current:
|
|
tokens.append(current)
|
|
current = ""
|
|
i += 1
|
|
continue
|
|
|
|
current += char
|
|
i += 1
|
|
|
|
if current:
|
|
tokens.append(current)
|
|
|
|
return tokens
|