Initial commit: Requirements to Gherkin CLI Converter

This commit is contained in:
Bot
2026-02-02 12:15:36 +00:00
commit ec3ea3da33
29 changed files with 2803 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
"""NL2Gherkin - Natural Language to Gherkin Converter.
A CLI tool that converts natural language project requirements into structured
acceptance criteria in Gherkin format (Given-When-Then).
"""
__version__ = "0.1.0"

View File

@@ -0,0 +1,5 @@
"""CLI module for NL2Gherkin."""
from nl2gherkin.cli.commands import cli, convert, interactive, validate
__all__ = ["cli", "convert", "interactive", "validate"]

View File

@@ -0,0 +1,170 @@
"""CLI commands for the NL2Gherkin tool."""
import sys
from pathlib import Path
from typing import Optional
import click
from nl2gherkin.exporters.base import BaseExporter
from nl2gherkin.exporters.behave import BehaveExporter
from nl2gherkin.exporters.cucumber import CucumberExporter
from nl2gherkin.exporters.pytest_bdd import PytestBDDExporter
from nl2gherkin.gherkin.generator import GherkinGenerator
from nl2gherkin.gherkin.parser import GherkinParser
from nl2gherkin.nlp.analyzer import NLPAnalyzer
@click.group()
def cli() -> None:
"""CLI tool for converting natural language requirements to Gherkin format."""
pass
@cli.command()
@click.argument(
"input_file",
type=click.Path(exists=True, readable=True, path_type=Path),
)
@click.option(
"--output",
"-o",
type=click.Path(path_type=Path),
help="Output file for the generated Gherkin.",
)
@click.option(
"--framework",
"-f",
type=click.Choice(["cucumber", "behave", "pytest-bdd"]),
default="cucumber",
help="BDD framework to export for.",
)
@click.option(
"--validate/--no-validate",
default=True,
help="Validate Gherkin syntax after generation.",
)
@click.option(
"--ambiguity-check/--no-ambiguity-check",
default=True,
help="Check for ambiguous language in requirements.",
)
def convert(
input_file: Path,
output: Optional[Path],
framework: str,
validate: bool,
ambiguity_check: bool,
) -> None:
"""Convert a requirements file to Gherkin format.
INPUT_FILE should be a text file containing natural language requirements.
"""
try:
content = input_file.read_text(encoding="utf-8")
analyzer = NLPAnalyzer()
parser = GherkinParser()
generator = GherkinGenerator(parser)
exporter: BaseExporter
if framework == "cucumber":
exporter = CucumberExporter()
elif framework == "behave":
exporter = BehaveExporter()
else:
exporter = PytestBDDExporter()
requirements = content.strip().split("\n\n")
gherkin_features = []
all_ambiguities = []
for req in requirements:
if not req.strip():
continue
if ambiguity_check:
ambiguities = analyzer.analyze_ambiguity(req)
if ambiguities:
all_ambiguities.extend(ambiguities)
click.echo("\n[WARNING] Ambiguities found in requirement:")
for amb in ambiguities:
click.echo(f" - {amb.message}")
if amb.suggestion:
click.echo(f" Suggestion: {amb.suggestion}")
analysis = analyzer.analyze(req)
gherkin = generator.generate(analysis)
gherkin_features.append(gherkin)
output_content = exporter.export(gherkin_features)
if validate:
valid, errors = parser.validate(output_content)
if not valid:
click.echo("\n[ERROR] Validation failed:")
for error in errors:
click.echo(f" {error}")
sys.exit(1)
else:
click.echo("[OK] Gherkin syntax is valid.")
if output:
output.write_text(output_content, encoding="utf-8")
click.echo(f"\nOutput written to: {output}")
else:
click.echo(f"\n{output_content}")
except Exception as e:
click.echo(f"[ERROR] {e}")
sys.exit(1)
@cli.command()
@click.option(
"--framework",
"-f",
type=click.Choice(["cucumber", "behave", "pytest-bdd"]),
default="cucumber",
help="BDD framework to export for.",
)
def interactive(framework: str) -> None:
"""Enter interactive mode for editing requirements."""
from nl2gherkin.cli.interactive import run_interactive_session
exporter: BaseExporter
if framework == "cucumber":
exporter = CucumberExporter()
elif framework == "behave":
exporter = BehaveExporter()
else:
exporter = PytestBDDExporter()
run_interactive_session(exporter)
@cli.command()
@click.argument(
"gherkin_file",
type=click.Path(exists=True, readable=True, path_type=Path),
)
def validate(gherkin_file: Path) -> None:
"""Validate a Gherkin file for syntax correctness."""
try:
content = gherkin_file.read_text(encoding="utf-8")
parser = GherkinParser()
valid, errors = parser.validate(content)
if valid:
click.echo("[OK] Gherkin syntax is valid.")
else:
click.echo("\n[ERROR] Validation failed:")
for error in errors:
click.echo(f" {error}")
sys.exit(1)
except Exception as e:
click.echo(f"[ERROR] {e}")
sys.exit(1)

View File

@@ -0,0 +1,108 @@
"""Interactive mode for the NL2Gherkin CLI."""
from typing import List
import click
from nl2gherkin.exporters.base import BaseExporter
from nl2gherkin.gherkin.generator import GherkinGenerator
from nl2gherkin.gherkin.parser import GherkinParser
from nl2gherkin.nlp.analyzer import NLPAnalyzer
def _colorize(text: str, color: str) -> str:
"""Helper to apply color to text."""
return click.style(text, fg=color)
def run_interactive_session(exporter: BaseExporter) -> None:
"""Run the interactive session for editing requirements."""
analyzer = NLPAnalyzer()
parser = GherkinParser()
generator = GherkinGenerator(parser)
history: List[dict] = []
generated_scenarios: List[str] = []
click.echo("\n[NL2Gherkin Interactive Mode]")
click.echo("Enter your requirements (press Ctrl+C to exit)")
click.echo("Use 'edit' to modify the last generated scenario")
click.echo("Use 'export' to export all scenarios")
click.echo("Use 'clear' to clear all scenarios\n")
while True:
try:
requirement = click.prompt(
"Enter requirement",
type=str,
default="",
show_default=False,
)
if not requirement.strip():
continue
if requirement.lower() == "edit":
if not generated_scenarios:
click.echo(_colorize("No scenarios to edit.", "yellow"))
continue
idx = click.prompt(
"Enter scenario number to edit",
type=int,
default=len(generated_scenarios),
show_default=False,
)
if 1 <= idx <= len(generated_scenarios):
edited_req = click.prompt(
"Enter modified requirement",
type=str,
default=generated_scenarios[idx - 1],
show_default=False,
)
analysis = analyzer.analyze(edited_req)
gherkin = generator.generate(analysis)
generated_scenarios[idx - 1] = gherkin
click.echo(f"\nUpdated scenario {idx}:")
click.echo(gherkin)
else:
click.echo(_colorize("Invalid scenario number.", "yellow"))
continue
if requirement.lower() == "export":
if not generated_scenarios:
click.echo(_colorize("No scenarios to export.", "yellow"))
continue
output = exporter.export(generated_scenarios)
click.echo("\n--- Exported Gherkin ---")
click.echo(output)
continue
if requirement.lower() == "clear":
generated_scenarios = []
click.echo(_colorize("Cleared all scenarios.", "green"))
continue
analysis = analyzer.analyze(requirement)
gherkin = generator.generate(analysis)
generated_scenarios.append(gherkin)
history.append({"requirement": requirement, "gherkin": gherkin})
click.echo("\n--- Generated Scenario ---")
click.echo(gherkin)
ambiguities = analyzer.analyze_ambiguity(requirement)
if ambiguities:
click.echo(_colorize("\n[WARNING] Potential ambiguities:", "yellow"))
for amb in ambiguities:
click.echo(f" - {amb.message}")
if amb.suggestion:
click.echo(f" Suggestion: {amb.suggestion}")
click.echo("")
except KeyboardInterrupt:
click.echo("\n\nExiting interactive mode.")
break

View File

@@ -0,0 +1,13 @@
"""Exporters module for BDD framework output."""
from nl2gherkin.exporters.base import BaseExporter
from nl2gherkin.exporters.behave import BehaveExporter
from nl2gherkin.exporters.cucumber import CucumberExporter
from nl2gherkin.exporters.pytest_bdd import PytestBDDExporter
__all__ = [
"BaseExporter",
"CucumberExporter",
"BehaveExporter",
"PytestBDDExporter",
]

View File

@@ -0,0 +1,59 @@
"""Base exporter class for BDD frameworks."""
from abc import ABC, abstractmethod
from typing import Dict, List
class BaseExporter(ABC):
"""Base class for BDD framework exporters."""
@abstractmethod
def export(self, features: List[str]) -> str:
"""Export features to the target framework format.
Args:
features: List of Gherkin feature strings.
Returns:
Exported content string.
"""
pass
@abstractmethod
def get_step_definitions_template(self) -> str:
"""Get step definitions template for this framework.
Returns:
Step definitions template string.
"""
pass
@abstractmethod
def get_configuration_template(self) -> Dict[str, str]:
"""Get configuration files for this framework.
Returns:
Dictionary mapping filenames to content templates.
"""
pass
def _extract_scenarios(self, feature: str) -> List[str]:
"""Extract individual scenarios from a feature string."""
scenarios: List[str] = []
current_scenario: List[str] = []
in_scenario = False
for line in feature.split("\n"):
stripped = line.strip()
if stripped.startswith("Scenario:") or stripped.startswith("Scenario Outline:"):
if current_scenario:
scenarios.append("\n".join(current_scenario))
current_scenario = [line]
in_scenario = True
elif in_scenario:
current_scenario.append(line)
if current_scenario:
scenarios.append("\n".join(current_scenario))
return scenarios

View File

@@ -0,0 +1,118 @@
"""Behave exporter for Python BDD projects."""
from typing import Dict, List
from nl2gherkin.exporters.base import BaseExporter
class BehaveExporter(BaseExporter):
"""Exporter for Behave (Python)."""
def __init__(self) -> None:
"""Initialize the Behave exporter."""
pass
def export(self, features: List[str]) -> str:
"""Export features to Behave format.
Args:
features: List of Gherkin feature strings.
Returns:
Behave-compatible feature file content.
"""
combined = "\n\n".join(features)
return combined
def get_step_definitions_template(self) -> str:
"""Get Behave step definitions template.
Returns:
Step definitions template string.
"""
return '''"""Behave step definitions."""
from behave import given, when, then
@given("a setup condition")
def step_given_setup(context):
"""Given step implementation."""
pass
@when("an action occurs")
def step_when_action(context):
"""When step implementation."""
pass
@then("an expected result")
def step_then_result(context):
"""Then step implementation."""
pass
'''
def get_configuration_template(self) -> Dict[str, str]:
"""Get Behave configuration files.
Returns:
Dictionary mapping filenames to content.
"""
return {
"behave.ini": '''[behave]
format = progress
outfiles = behave-report.txt
''',
"features/environment.py": '''"""Behave environment configuration."""
def before_scenario(context, scenario):
"""Run before each scenario."""
pass
def after_scenario(context, scenario):
"""Run after each scenario."""
pass
''',
}
def generate_step_definitions(self, scenarios: List[str]) -> str:
"""Generate step definitions for given scenarios.
Args:
scenarios: List of scenario texts.
Returns:
Step definitions Python code.
"""
step_defs = []
for scenario in scenarios:
lines = scenario.split("\n")
for line in lines:
stripped = line.strip()
if stripped.startswith(("Given ", "When ", "Then ", "And ")):
step_text = " ".join(stripped.split()[1:])
step_def = stripped.split()[0].lower()
params = self._extract_parameters(step_text)
step_def_line = f'@given("{step_text}")'
if params:
extra_params = ", " + ", ".join(f'"{p}"' for p in params)
else:
extra_params = ""
step_impl = f'def step_impl(context{extra_params}):\n """{stripped.split()[0]} step implementation."""\n pass\n'
step_defs.append(step_def_line)
step_defs.append(step_impl)
return "\n".join(step_defs)
def _extract_parameters(self, step_text: str) -> List[str]:
"""Extract parameters from a step text."""
import re
return re.findall(r"<([^>]+)>", step_text)

View File

@@ -0,0 +1,89 @@
"""Cucumber exporter for JavaScript/TypeScript projects."""
from typing import Dict, List
from nl2gherkin.exporters.base import BaseExporter
class CucumberExporter(BaseExporter):
"""Exporter for Cucumber (JavaScript/TypeScript)."""
def __init__(self) -> None:
"""Initialize the Cucumber exporter."""
self.step_definitions_template = """const {{ Given, When, Then }} = require('@cucumber/cucumber');
{{step_definitions}}
"""
def export(self, features: List[str]) -> str:
"""Export features to Cucumber format.
Args:
features: List of Gherkin feature strings.
Returns:
Cucumber-compatible feature file content.
"""
combined = "\n\n".join(features)
return combined
def get_step_definitions_template(self) -> str:
"""Get Cucumber step definitions template.
Returns:
Step definitions template string.
"""
return self.step_definitions_template
def get_configuration_template(self) -> Dict[str, str]:
"""Get Cucumber configuration files.
Returns:
Dictionary mapping filenames to content.
"""
return {
"cucumber.js": '''module.exports = {
default: '--publish-quiet'
}
''',
".cucumberrc": '''default:
publish-quiet: true
format: ['progress-bar', 'html:cucumber-report.html']
''',
}
def generate_step_definitions(self, scenarios: List[str]) -> str:
"""Generate step definitions for given scenarios.
Args:
scenarios: List of scenario texts.
Returns:
Step definitions JavaScript code.
"""
step_defs = []
for scenario in scenarios:
lines = scenario.split("\n")
for line in lines:
stripped = line.strip()
if stripped.startswith(("Given ", "When ", "Then ", "And ")):
step_text = " ".join(stripped.split()[1:])
step_def = stripped.split()[0].lower()
indent = " " * (1 if stripped.startswith("And") or stripped.startswith("But") else 0)
params = self._extract_parameters(step_text)
param_str = ", ".join(f'"{p}"' for p in params) if params else ""
params_list = ", ".join(p for p in params)
step_def_code = step_def.capitalize() + "(" + param_str + ", async function (" + params_list + ") {\n"
step_def_code += " // TODO: implement step\n"
step_def_code += "});\n"
step_defs.append(step_def_code)
return "\n".join(step_defs)
def _extract_parameters(self, step_text: str) -> List[str]:
"""Extract parameters from a step text."""
import re
return re.findall(r"<([^>]+)>", step_text)

View File

@@ -0,0 +1,141 @@
"""pytest-bdd exporter for pytest projects."""
from typing import Dict, List
from nl2gherkin.exporters.base import BaseExporter
class PytestBDDExporter(BaseExporter):
"""Exporter for pytest-bdd (Python)."""
def __init__(self) -> None:
"""Initialize the pytest-bdd exporter."""
pass
def export(self, features: List[str]) -> str:
"""Export features to pytest-bdd format.
Args:
features: List of Gherkin feature strings.
Returns:
pytest-bdd-compatible feature file content.
"""
combined = "\n\n".join(features)
return combined
def get_step_definitions_template(self) -> str:
"""Get pytest-bdd step definitions template.
Returns:
Step definitions template string.
"""
return '''"""pytest-bdd step definitions."""
import pytest
from pytest_bdd import given, when, then, scenarios
scenarios('features')
@given("a setup condition")
def setup_condition():
"""Given step implementation."""
return {}
@when("an action occurs")
def action_occurs():
"""When step implementation."""
pass
@then("an expected result")
def expected_result():
"""Then step implementation."""
pass
'''
def get_configuration_template(self) -> Dict[str, str]:
"""Get pytest-bdd configuration files.
Returns:
Dictionary mapping filenames to content.
"""
return {
"conftest.py": '''"""pytest configuration and fixtures."""
import pytest
from pytest_bdd import scenarios
scenarios('features')
@pytest.fixture
def context():
"""Shared test context."""
return {}
def pytest_configure(config):
"""Configure pytest."""
pass
''',
"pytest.ini": '''[pytest]
bdd_features_base_dir = features/
''',
}
def generate_step_definitions(self, scenarios: List[str], feature_name: str = "features") -> str:
"""Generate step definitions for given scenarios.
Args:
scenarios: List of scenario texts.
feature_name: Name of the feature file.
Returns:
Step definitions Python code.
"""
step_defs = []
for scenario in scenarios:
lines = scenario.split("\n")
scenario_name = ""
for line in lines:
stripped = line.strip()
if stripped.startswith("Scenario:"):
scenario_name = stripped[9:].strip().replace(" ", "_")
break
for line in lines:
stripped = line.strip()
if stripped.startswith(("Given ", "When ", "Then ", "And ")):
step_text = " ".join(stripped.split()[1:])
step_def = stripped.split()[0].lower()
params = self._extract_parameters(step_text)
param_str = ", ".join(f'"{p}"' for p in params) if params else ""
if params:
step_impl = f'''@pytest.{step_def}("{step_text}")
def {step_def}_{scenario_name}({", ".join(params)}):
"""{stripped.split()[0]} step implementation."""
pass
'''
else:
step_impl = f'''@{step_def}("{step_text}")
def {step_def}_{scenario_name}():
"""{stripped.split()[0]} step implementation."""
pass
'''
step_defs.append(step_impl)
return "\n".join(step_defs)
def _extract_parameters(self, step_text: str) -> List[str]:
"""Extract parameters from a step text."""
import re
return re.findall(r"<([^>]+)>", step_text)

View File

@@ -0,0 +1,11 @@
"""Gherkin module for generating and parsing Gherkin syntax."""
from nl2gherkin.gherkin.generator import GherkinGenerator
from nl2gherkin.gherkin.parser import GherkinParser
from nl2gherkin.gherkin.templates import GherkinTemplates
__all__ = [
"GherkinGenerator",
"GherkinParser",
"GherkinTemplates",
]

View File

@@ -0,0 +1,219 @@
"""Gherkin generator for creating Gherkin syntax from analyzed requirements."""
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, List, Optional
from nl2gherkin.nlp.analyzer import RequirementAnalysis
class ScenarioType(str, Enum):
"""Types of Gherkin scenarios."""
SCENARIO = "Scenario"
SCENARIO_OUTLINE = "Scenario Outline"
@dataclass
class GherkinStep:
"""A single step in a Gherkin scenario."""
keyword: str
text: str
@dataclass
class GherkinScenario:
"""A Gherkin scenario."""
name: str
scenario_type: ScenarioType = ScenarioType.SCENARIO
steps: List[GherkinStep] = field(default_factory=list)
examples: List[str] = field(default_factory=list)
tags: List[str] = field(default_factory=list)
@dataclass
class GherkinFeature:
"""A Gherkin feature."""
name: str
description: Optional[str] = None
scenarios: List[GherkinScenario] = field(default_factory=list)
tags: List[str] = field(default_factory=list)
background: Optional[List[GherkinStep]] = None
class GherkinGenerator:
"""Generator for creating Gherkin syntax from requirements analysis."""
def __init__(self, parser: Optional[Any] = None) -> None:
"""Initialize the generator.
Args:
parser: Optional parser for validation.
"""
self.parser = parser
def generate(self, analysis: RequirementAnalysis) -> str:
"""Generate Gherkin from a requirement analysis.
Args:
analysis: The analyzed requirement.
Returns:
Gherkin formatted string.
"""
feature = self._create_feature(analysis)
return self._render_feature(feature)
def generate_scenario(self, analysis: RequirementAnalysis) -> GherkinScenario:
"""Generate a scenario from analysis.
Args:
analysis: The analyzed requirement.
Returns:
GherkinScenario object.
"""
return self._create_scenario(analysis)
def _create_feature(self, analysis: RequirementAnalysis) -> GherkinFeature:
"""Create a Gherkin feature from analysis."""
scenario = self._create_scenario(analysis)
feature_name = f"{analysis.actor or 'User'} {analysis.action or 'does something'}"
feature = GherkinFeature(
name=feature_name,
description=self._create_description(analysis),
scenarios=[scenario],
)
return feature
def _create_description(self, analysis: RequirementAnalysis) -> Optional[str]:
"""Create a description from analysis."""
parts = []
if analysis.raw_text:
parts.append(analysis.raw_text)
if analysis.benefit:
parts.append(f"So that: {analysis.benefit}")
return " ".join(parts) if parts else None
def _create_scenario(self, analysis: RequirementAnalysis) -> GherkinScenario:
"""Create a Gherkin scenario from analysis."""
steps: List[GherkinStep] = []
if analysis.condition:
steps.append(GherkinStep("Given", analysis.condition))
if analysis.actor:
given_text = f"a {analysis.actor}"
if analysis.target:
given_text += f" wants to interact with {analysis.target}"
elif analysis.action:
given_text += f" wants to {analysis.action}"
steps.append(GherkinStep("Given", given_text))
if analysis.action:
when_text = analysis.action
if analysis.target and not analysis.condition:
when_text = f"the {analysis.actor} {analysis.action} the {analysis.target}"
elif analysis.condition:
when_text = f"the {analysis.actor} {analysis.action} the {analysis.target}"
steps.append(GherkinStep("When", when_text))
if analysis.target:
then_text = f"the {analysis.target} should be {analysis.action}ed"
if analysis.action_type.value in ["read", "search", "filter"]:
then_text = f"the {analysis.target} should be displayed"
steps.append(GherkinStep("Then", then_text))
scenario_type = ScenarioType.SCENARIO
examples: List[str] = []
if analysis.variables:
scenario_type = ScenarioType.SCENARIO_OUTLINE
examples = self._create_examples(analysis)
scenario_name = self._create_scenario_name(analysis)
return GherkinScenario(
name=scenario_name,
scenario_type=scenario_type,
steps=steps,
examples=examples,
)
def _create_scenario_name(self, analysis: RequirementAnalysis) -> str:
"""Create a scenario name from analysis."""
parts = []
if analysis.actor:
parts.append(analysis.actor.capitalize())
if analysis.action:
parts.append(analysis.action.capitalize())
if analysis.target:
target_name = analysis.target.split()[-1] if analysis.target else ""
parts.append(target_name.capitalize())
return " ".join(parts) if parts else "Sample Scenario"
def _create_examples(self, analysis: RequirementAnalysis) -> List[str]:
"""Create Examples table from variables."""
if not analysis.variables:
return []
headers = list(analysis.variables.keys())
header_row = "| " + " | ".join(headers) + " |"
example_rows: List[str] = []
if analysis.examples:
for example in analysis.examples:
if isinstance(example, dict):
row_values = [str(example.get(h, "")) for h in headers]
else:
row_values = [str(example)]
row = "| " + " | ".join(row_values) + " |"
example_rows.append(row)
else:
default_row = "| " + " | ".join(["value"] * len(headers)) + " |"
example_rows.append(default_row)
return [header_row] + example_rows
def _render_feature(self, feature: GherkinFeature) -> str:
"""Render a GherkinFeature to string."""
lines: List[str] = []
for tag in feature.tags:
lines.append(f"@{tag}")
lines.append(f"Feature: {feature.name}")
if feature.description:
lines.append(f" {feature.description}")
if feature.background:
lines.append(" Background:")
for step in feature.background:
lines.append(f" {step.keyword} {step.text}")
for scenario in feature.scenarios:
lines.append("")
for tag in scenario.tags:
lines.append(f" @{tag}")
lines.append(f" {scenario.scenario_type.value}: {scenario.name}")
for step in scenario.steps:
lines.append(f" {step.keyword} {step.text}")
if scenario.examples:
lines.append(" Examples:")
for example in scenario.examples:
lines.append(f" {example}")
return "\n".join(lines)

View File

@@ -0,0 +1,167 @@
"""Gherkin parser for validation."""
import re
from typing import List, Optional, Tuple
class GherkinParser:
"""Parser and validator for Gherkin syntax."""
def __init__(self) -> None:
"""Initialize the Gherkin parser."""
pass
def parse(self, content: str) -> dict:
"""Parse Gherkin content into an AST.
Args:
content: The Gherkin content to parse.
Returns:
Dictionary representing the Gherkin AST.
"""
lines = content.strip().split("\n")
ast: dict = {
"feature": None,
"scenarios": [],
}
current_section = None
scenario: Optional[dict] = None
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("Feature:"):
ast["feature"] = {
"name": stripped[8:].strip(),
"description": "",
"line": i,
}
elif stripped.startswith("Scenario:"):
if scenario:
ast["scenarios"].append(scenario)
scenario = {
"name": stripped[9:].strip(),
"type": "Scenario",
"steps": [],
"line": i,
}
elif stripped.startswith("Scenario Outline:"):
if scenario:
ast["scenarios"].append(scenario)
scenario = {
"name": stripped[17:].strip(),
"type": "Scenario Outline",
"steps": [],
"line": i,
}
elif stripped.startswith("Given ") or stripped.startswith("When ") or \
stripped.startswith("Then ") or stripped.startswith("And ") or \
stripped.startswith("But "):
if scenario:
scenario["steps"].append({
"keyword": stripped.split()[0],
"text": " ".join(stripped.split()[1:]),
"line": i,
})
elif stripped.startswith("Examples:"):
if scenario:
scenario["has_examples"] = True
if scenario:
ast["scenarios"].append(scenario)
return ast
def validate(self, content: str) -> Tuple[bool, List[str]]:
"""Validate Gherkin syntax.
Args:
content: The Gherkin content to validate.
Returns:
Tuple of (is_valid, list_of_errors).
"""
errors: List[str] = []
if not content.strip():
return False, ["Empty content"]
lines = content.strip().split("\n")
if not lines[0].strip().startswith("Feature:"):
return False, ["Gherkin must start with 'Feature:'"]
has_scenario = any(
line.strip().startswith("Scenario:") or
line.strip().startswith("Scenario Outline:")
for line in lines
)
if not has_scenario:
return False, ["Feature must have at least one Scenario"]
try:
self.parse(content)
except Exception as e:
error_msg = str(e)
line_match = _extract_line_number(error_msg)
if line_match:
errors.append(f"Line {line_match}: {error_msg}")
else:
errors.append(f"Validation error: {error_msg}")
return False, errors
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("Examples:") and not any(
"Scenario Outline" in l for l in lines[:i]
):
errors.append(f"Line {i + 1}: Examples table can only be used with Scenario Outline")
for i, line in enumerate(lines):
stripped = line.strip()
if stripped and not stripped.startswith(("Feature:", "Scenario", "Given ", "When ",
"Then ", "And ", "But ", "Background:", "Examples:", "|", "@", " ")):
if not stripped.startswith("#"):
if i > 0 and lines[i-1].strip().endswith(":"):
continue
pass
if errors:
return False, errors
return True, []
def validate_feature(self, feature_content: str) -> Tuple[bool, List[str]]:
"""Validate a single feature.
Args:
feature_content: The feature content to validate.
Returns:
Tuple of (is_valid, list_of_errors).
"""
if not feature_content.strip().startswith("Feature:"):
return False, ["Content must start with 'Feature:'"]
return self.validate(feature_content)
def _extract_line_number(error_msg: str) -> Optional[int]:
"""Extract line number from error message."""
patterns = [
r"line\s+(\d+)",
r"(\d+):",
r"row\s+(\d+)",
r"line\s+(\d+)",
]
for pattern in patterns:
match = re.search(pattern, error_msg, re.IGNORECASE)
if match:
return int(match.group(1))
return None

View File

@@ -0,0 +1,105 @@
"""Gherkin templates for formatting output."""
from typing import Any, Optional
class GherkinTemplates:
"""Templates for Gherkin syntax generation."""
FEATURE = """Feature: {name}
{description}
"""
SCENARIO = """ Scenario: {name}
Given {given}
When {when}
Then {then}
"""
SCENARIO_OUTLINE = """ Scenario Outline: {name}
Given {given}
When {when}
Then {then}
Examples:
| {variables} |
"""
BACKGROUND = """ Background:
"""
STEP_GIVEN = " Given {text}"
STEP_WHEN = " When {text}"
STEP_THEN = " Then {text}"
STEP_AND = " And {text}"
STEP_BUT = " But {text}"
EXAMPLES_TABLE = """ Examples:
| {header} |
| {row} |
"""
@staticmethod
def format_feature(name: str, description: str = "") -> str:
"""Format a feature."""
if description:
return GherkinTemplates.FEATURE.format(name=name, description=description)
return f"Feature: {name}\n"
@staticmethod
def format_scenario(
name: str,
given: str,
when: str,
then: str,
additional_steps: Optional[list] = None,
) -> str:
"""Format a scenario."""
lines = [f" Scenario: {name}"]
lines.append(f" Given {given}")
lines.append(f" When {when}")
lines.append(f" Then {then}")
if additional_steps:
for step in additional_steps:
lines.append(f" And {step}")
return "\n".join(lines)
@staticmethod
def format_scenario_outline(
name: str,
given: str,
when: str,
then: str,
variables: list,
examples: list,
) -> str:
"""Format a scenario outline with examples."""
lines = [f" Scenario Outline: {name}"]
lines.append(f" Given {given}")
lines.append(f" When {when}")
lines.append(f" Then {then}")
lines.append(" Examples:")
header = " | ".join(variables)
lines.append(f" | {header} |")
for row in examples:
row_str = " | ".join(str(v) for v in row)
lines.append(f" | {row_str} |")
return "\n".join(lines)
@staticmethod
def format_step(keyword: str, text: str) -> str:
"""Format a single step."""
templates = {
"Given": GherkinTemplates.STEP_GIVEN,
"When": GherkinTemplates.STEP_WHEN,
"Then": GherkinTemplates.STEP_THEN,
"And": GherkinTemplates.STEP_AND,
"But": GherkinTemplates.STEP_BUT,
}
template = templates.get(keyword, GherkinTemplates.STEP_GIVEN)
return template.format(text=text)

14
src/nl2gherkin/main.py Normal file
View File

@@ -0,0 +1,14 @@
"""Main entry point for the NL2Gherkin CLI."""
from nl2gherkin.cli.commands import cli
def main() -> None:
"""Entry point for the CLI."""
cli()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,12 @@
"""NLP module for natural language processing."""
from nl2gherkin.nlp.ambiguity import AmbiguityDetector, AmbiguityWarning
from nl2gherkin.nlp.analyzer import NLPAnalyzer
from nl2gherkin.nlp.patterns import RequirementPattern
__all__ = [
"NLPAnalyzer",
"AmbiguityDetector",
"AmbiguityWarning",
"RequirementPattern",
]

View File

@@ -0,0 +1,232 @@
"""Ambiguity detection for requirements."""
from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, List, Optional
class AmbiguityType(str, Enum):
"""Types of ambiguity in requirements."""
PRONOUN = "pronoun"
VAGUE_QUANTIFIER = "vague_quantifier"
TEMPORAL = "temporal"
MISSING_CONDITION = "missing_condition"
UNCLEAR_REFERENCE = "unclear_reference"
PASSIVE_VOICE = "passive_voice"
UNDEFINED_TERM = "undefined_term"
@dataclass
class AmbiguityWarning:
"""A warning about ambiguous language in a requirement."""
type: AmbiguityType
message: str
position: int = 0
length: int = 0
suggestion: Optional[str] = None
severity: str = "medium"
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
"type": self.type.value,
"message": self.message,
"position": self.position,
"length": self.length,
"suggestion": self.suggestion,
"severity": self.severity,
}
class AmbiguityDetector:
"""Detector for ambiguous language in requirements."""
PRONOUNS = {
"it", "they", "them", "he", "she", "this", "that", "these", "those",
"its", "their", "his", "her", "which", "what", "who", "whom",
}
VAGUE_QUANTIFIERS = {
"some", "many", "few", "several", "various", "multiple", "somewhat",
"roughly", "approximately", "generally", "usually", "often", "sometimes",
"occasionally", "maybe", "possibly", "probably", "likely",
}
TEMPORAL_AMBIGUITIES = {
"soon", "later", "eventually", "eventually", "currently", "presently",
"before long", "in the future", "at some point", "eventually",
}
CONDITIONAL_KEYWORDS = {
"if", "when", "unless", "provided", "given", "assuming", "while",
}
def detect(self, text: str) -> List[AmbiguityWarning]:
"""Detect ambiguities in the given text.
Args:
text: The requirement text to analyze.
Returns:
List of ambiguity warnings.
"""
warnings: List[AmbiguityWarning] = []
warnings.extend(self._detect_pronouns(text))
warnings.extend(self._detect_vague_quantifiers(text))
warnings.extend(self._detect_temporal_ambiguities(text))
warnings.extend(self._detect_missing_conditions(text))
warnings.extend(self._detect_passive_voice(text))
return warnings
def _detect_pronouns(self, text: str) -> List[AmbiguityWarning]:
"""Detect pronoun usage that may be ambiguous."""
warnings: List[AmbiguityWarning] = []
words = text.split()
for i, word in enumerate(words):
clean_word = word.strip(".,!?;:\"'()[]{}").lower()
if clean_word in self.PRONOUNS and len(clean_word) > 2:
pos = text.find(word)
warnings.append(
AmbiguityWarning(
type=AmbiguityType.PRONOUN,
message=f"Pronoun '{word}' may have unclear antecedent",
position=pos,
length=len(word),
suggestion=f"Replace '{word}' with the specific noun it refers to",
severity="low",
)
)
return warnings
def _detect_vague_quantifiers(self, text: str) -> List[AmbiguityWarning]:
"""Detect vague quantifiers that lack precision."""
warnings: List[AmbiguityWarning] = []
words = text.split()
for i, word in enumerate(words):
clean_word = word.strip(".,!?;:\"'()[]{}").lower()
if clean_word in self.VAGUE_QUANTIFIERS:
pos = text.find(word)
if clean_word in {"some", "few", "several"}:
suggestion = f"Specify an exact number or range for '{word}'"
elif clean_word in {"many", "multiple", "various"}:
suggestion = f"Specify a count or percentage for '{word}'"
elif clean_word in {"approximately", "roughly"}:
suggestion = "Replace with a specific value or range"
else:
suggestion = "Provide more specific criteria"
warnings.append(
AmbiguityWarning(
type=AmbiguityType.VAGUE_QUANTIFIER,
message=f"Vague quantifier '{word}' lacks precision",
position=pos,
length=len(word),
suggestion=suggestion,
severity="medium",
)
)
return warnings
def _detect_temporal_ambiguities(self, text: str) -> List[AmbiguityWarning]:
"""Detect temporal ambiguities in the text."""
warnings: List[AmbiguityWarning] = []
words = text.split()
for i, word in enumerate(words):
clean_word = word.strip(".,!?;:\"'()[]{}").lower()
if clean_word in self.TEMPORAL_AMBIGUITIES:
pos = text.find(word)
warnings.append(
AmbiguityWarning(
type=AmbiguityType.TEMPORAL,
message=f"Temporal term '{word}' is ambiguous",
position=pos,
length=len(word),
suggestion=f"Specify an exact time, deadline, or condition for '{word}'",
severity="low",
)
)
return warnings
def _detect_missing_conditions(self, text: str) -> List[AmbiguityWarning]:
"""Detect potential missing conditions in requirements."""
warnings: List[AmbiguityWarning] = []
import re
has_conditional = any(
re.search(r"\b" + kw + r"\b", text, re.IGNORECASE)
for kw in self.CONDITIONAL_KEYWORDS
)
action_patterns = [
r"\bmust\b", r"\bshall\b", r"\bshould\b", r"\bwill\b",
r"\bcan\b", r"\benable\b", r"\ballow\b",
]
has_action = any(
re.search(pattern, text, re.IGNORECASE)
for pattern in action_patterns
)
if has_action and not has_conditional:
action_match = re.search(
r"(must|shall|should|will|can|enable|allow)\s+\w+",
text,
re.IGNORECASE
)
if action_match:
warnings.append(
AmbiguityWarning(
type=AmbiguityType.MISSING_CONDITION,
message="No explicit condition found for this action",
position=action_match.start(),
length=len(action_match.group()),
suggestion="Add a condition using 'when', 'if', 'after', or 'before'",
severity="medium",
)
)
return warnings
def _detect_passive_voice(self, text: str) -> List[AmbiguityWarning]:
"""Detect passive voice usage."""
warnings: List[AmbiguityWarning] = []
import re
passive_patterns = [
r"\bwas\s+\w+ed\b",
r"\bwere\s+\w+ed\b",
r"\bhas\s+been\s+\w+ed\b",
r"\bhave\s+been\s+\w+ed\b",
r"\bwill\s+be\s+\w+ed\b",
r"\bby\s+the\s+\w+\b",
]
for pattern in passive_patterns:
matches = list(re.finditer(pattern, text, re.IGNORECASE))
for match in matches:
warnings.append(
AmbiguityWarning(
type=AmbiguityType.PASSIVE_VOICE,
message=f"Passive voice detected: '{match.group()}'",
position=match.start(),
length=match.end() - match.start(),
suggestion="Consider rewriting in active voice for clarity",
severity="low",
)
)
return warnings

View File

@@ -0,0 +1,237 @@
"""NLP analyzer for extracting structured information from requirements."""
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, TYPE_CHECKING
import spacy
from spacy.tokens import Doc
if TYPE_CHECKING:
from nl2gherkin.nlp.ambiguity import AmbiguityWarning
class ActorType(str, Enum):
"""Types of actors in requirements."""
USER = "user"
SYSTEM = "system"
ADMIN = "admin"
API = "api"
UNKNOWN = "unknown"
class ActionType(str, Enum):
"""Types of actions in requirements."""
CREATE = "create"
READ = "read"
UPDATE = "update"
DELETE = "delete"
VALIDATE = "validate"
SEND = "send"
RECEIVE = "receive"
LOGIN = "login"
LOGOUT = "logout"
SEARCH = "search"
FILTER = "filter"
EXPORT = "export"
IMPORT = "import"
UNKNOWN = "unknown"
@dataclass
class RequirementAnalysis:
"""Structured analysis of a requirement."""
raw_text: str
actor: Optional[str] = None
actor_type: ActorType = ActorType.UNKNOWN
action: Optional[str] = None
action_type: ActionType = ActionType.UNKNOWN
target: Optional[str] = None
condition: Optional[str] = None
benefit: Optional[str] = None
examples: List[str] = field(default_factory=list)
variables: Dict[str, str] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
"raw_text": self.raw_text,
"actor": self.actor,
"actor_type": self.actor_type.value,
"action": self.action,
"action_type": self.action_type.value,
"target": self.target,
"condition": self.condition,
"benefit": self.benefit,
"examples": self.examples,
"variables": self.variables,
}
class NLPAnalyzer:
"""Analyzer for natural language requirements."""
def __init__(self, model: str = "en_core_web_sm"):
"""Initialize the analyzer with a spaCy model.
Args:
model: spaCy model name. Defaults to en_core_web_sm.
"""
try:
self.nlp = spacy.load(model)
except OSError:
import subprocess
subprocess.run(
["python", "-m", "spacy", "download", model],
check=True,
)
self.nlp = spacy.load(model)
def analyze(self, text: str) -> RequirementAnalysis:
"""Analyze a requirement text and extract structured information.
Args:
text: The natural language requirement text.
Returns:
RequirementAnalysis with extracted components.
"""
doc = self.nlp(text)
analysis = RequirementAnalysis(raw_text=text)
self._extract_user_story(doc, analysis)
self._extract_subject(doc, analysis)
self._extract_action(doc, analysis)
self._extract_object(doc, analysis)
self._extract_condition(doc, analysis)
self._extract_variables(doc, analysis)
return analysis
def _extract_user_story(self, doc: Doc, analysis: RequirementAnalysis) -> None:
"""Extract user story format: As a [role], I want [feature] so that [benefit]."""
import re
user_story_patterns = [
(r"as\s+a\s+(.*?),\s*i\s+want\s+(.*?)(?:\s+so\s+that\s+(.*))?$", True),
(r"as\s+an\s+(.*?),\s*i\s+want\s+(.*?)(?:\s+so\s+that\s+(.*))?$", True),
(r"as\s+a\s+(.*?)\s+i\s+can\s+(.*?)$", False),
]
for pattern, has_benefit in user_story_patterns:
match = re.search(pattern, doc.text, re.IGNORECASE)
if match:
groups = match.groups()
analysis.actor = groups[0].strip()
if has_benefit and len(groups) >= 3 and groups[2]:
analysis.benefit = groups[2].strip()
analysis.action = groups[1].strip()
else:
feature_text = groups[1].strip()
parts = feature_text.split(" to ")
if len(parts) > 1:
analysis.action = parts[1].strip()
else:
analysis.action = feature_text
break
def _extract_subject(self, doc: Doc, analysis: RequirementAnalysis) -> None:
"""Extract the subject/actor from the requirement."""
if analysis.actor:
return
for token in doc:
if token.dep_ == "nsubj" or token.dep_ == "nsubjpass":
if not analysis.actor:
analysis.actor = token.text
actor_lower = token.text.lower()
if any(x in actor_lower for x in ["user", "customer", "visitor"]):
analysis.actor_type = ActorType.USER
elif any(x in actor_lower for x in ["admin", "administrator"]):
analysis.actor_type = ActorType.ADMIN
elif any(x in actor_lower for x in ["system", "app", "application"]):
analysis.actor_type = ActorType.SYSTEM
elif any(x in actor_lower for x in ["api", "service", "endpoint"]):
analysis.actor_type = ActorType.API
break
def _extract_action(self, doc: Doc, analysis: RequirementAnalysis) -> None:
"""Extract the action/verb from the requirement."""
if analysis.action:
return
action_keywords = {
"create": ["create", "add", "make", "generate", "produce"],
"read": ["view", "see", "display", "show", "list", "retrieve", "get"],
"update": ["update", "edit", "modify", "change", "alter"],
"delete": ["delete", "remove", "destroy", "cancel"],
"validate": ["validate", "verify", "check", "confirm", "ensure"],
"send": ["send", "submit", "post", "push", "dispatch"],
"receive": ["receive", "get", "fetch", "pull"],
"login": ["login", "sign in", "log in", "authenticate"],
"logout": ["logout", "sign out", "log out"],
"search": ["search", "find", "look for", "query"],
"filter": ["filter", "narrow", "refine"],
"export": ["export", "download", "save"],
"import": ["import", "upload", "load"],
}
for token in doc:
token_lower = token.text.lower()
for action_type, keywords in action_keywords.items():
if token_lower in keywords:
analysis.action_type = ActionType(action_type)
if not analysis.action:
analysis.action = token.text
break
def _extract_object(self, doc: Doc, analysis: RequirementAnalysis) -> None:
"""Extract the object/target from the requirement."""
if analysis.target:
return
for token in doc:
if token.dep_ in ["dobj", "pobj", "attr"]:
if not analysis.target:
subtree_tokens = list(token.subtree)
analysis.target = " ".join([t.text for t in subtree_tokens])
break
def _extract_condition(self, doc: Doc, analysis: RequirementAnalysis) -> None:
"""Extract conditions from the requirement."""
condition_markers = ["if", "when", "after", "before", "during", "while"]
for i, token in enumerate(doc):
if token.text.lower() in condition_markers:
remaining_tokens = list(doc[i + 1 :])
if remaining_tokens:
analysis.condition = " ".join([t.text for t in remaining_tokens[:15]])
break
def _extract_variables(self, doc: Doc, analysis: RequirementAnalysis) -> None:
"""Extract variables/parameters from the requirement."""
import re
pattern = r"<(\w+)>"
matches = re.findall(pattern, doc.text)
for var in matches:
analysis.variables[var] = "string"
if var not in analysis.examples:
analysis.examples.append(var)
def analyze_ambiguity(self, text: str) -> "list[AmbiguityWarning]":
"""Analyze text for ambiguous language.
Args:
text: The text to analyze.
Returns:
List of ambiguity warnings.
"""
from nl2gherkin.nlp.ambiguity import AmbiguityDetector
detector = AmbiguityDetector()
return detector.detect(text) # type: ignore[return-value]

View File

@@ -0,0 +1,112 @@
"""Pattern definitions for requirement parsing."""
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional
class PatternType(str, Enum):
"""Types of requirement patterns."""
USER_STORY = "user_story"
SCENARIO = "scenario"
ACCEPTANCE_CRITERIA = "acceptance_criteria"
BUSINESS_RULE = "business_rule"
GENERAL = "general"
@dataclass
class RequirementPattern:
"""A pattern for matching requirements."""
name: str
pattern: str
pattern_type: PatternType
priority: int = 0
description: Optional[str] = None
def matches(self, text: str) -> bool:
"""Check if the text matches this pattern."""
import re
return bool(re.search(self.pattern, text, re.IGNORECASE))
USER_STORY_PATTERNS = [
RequirementPattern(
name="classic_user_story",
pattern=r"as\s+a?\s*(?:user|role|customer|visitor|admin|sys(?:tem)?)\b.*,\s*i\s+(?:want|can|would like to)\b",
pattern_type=PatternType.USER_STORY,
priority=10,
description="Classic user story format",
),
RequirementPattern(
name="short_user_story",
pattern=r"as\s+(?:an?)\s+\w+\s*,\s*\w+.*",
pattern_type=PatternType.USER_STORY,
priority=5,
description="Short user story format",
),
]
SCENARIO_PATTERNS = [
RequirementPattern(
name="if_when_then",
pattern=r"(?:if|when)\s+.*\s+then\s+",
pattern_type=PatternType.SCENARIO,
priority=10,
description="If-When-Then scenario format",
),
RequirementPattern(
name="given_when_then",
pattern=r"(?:given|when|then)\s+.*",
pattern_type=PatternType.SCENARIO,
priority=5,
description="Given-When-Then format",
),
]
ACCEPTANCE_CRITERIA_PATTERNS = [
RequirementPattern(
name="bullet_points",
pattern=r"(?:^|\n)\s*[-*•]\s+",
pattern_type=PatternType.ACCEPTANCE_CRITERIA,
priority=10,
description="Bullet point format",
),
RequirementPattern(
name="numbered_list",
pattern=r"(?:^|\n)\s*\d+[.)]\s+",
pattern_type=PatternType.ACCEPTANCE_CRITERIA,
priority=10,
description="Numbered list format",
),
]
def get_patterns_by_type(pattern_type: PatternType) -> List[RequirementPattern]:
"""Get all patterns of a specific type."""
all_patterns = USER_STORY_PATTERNS + SCENARIO_PATTERNS + ACCEPTANCE_CRITERIA_PATTERNS
return [p for p in all_patterns if p.pattern_type == pattern_type]
def detect_pattern_type(text: str) -> Optional[PatternType]:
"""Detect the type of requirement based on patterns."""
for pattern_type in PatternType:
patterns = get_patterns_by_type(pattern_type)
for pattern in patterns:
if pattern.matches(text):
return pattern_type
return PatternType.GENERAL
def find_best_matching_pattern(text: str) -> Optional[RequirementPattern]:
"""Find the best matching pattern for the text."""
all_patterns = USER_STORY_PATTERNS + SCENARIO_PATTERNS + ACCEPTANCE_CRITERIA_PATTERNS
best_match = None
highest_priority = -1
for pattern in all_patterns:
if pattern.matches(text):
if pattern.priority > highest_priority:
highest_priority = pattern.priority
best_match = pattern
return best_match