Initial commit: Requirements to Gherkin CLI Converter
This commit is contained in:
7
src/nl2gherkin/__init__.py
Normal file
7
src/nl2gherkin/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""NL2Gherkin - Natural Language to Gherkin Converter.
|
||||
|
||||
A CLI tool that converts natural language project requirements into structured
|
||||
acceptance criteria in Gherkin format (Given-When-Then).
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
5
src/nl2gherkin/cli/__init__.py
Normal file
5
src/nl2gherkin/cli/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""CLI module for NL2Gherkin."""
|
||||
|
||||
from nl2gherkin.cli.commands import cli, convert, interactive, validate
|
||||
|
||||
__all__ = ["cli", "convert", "interactive", "validate"]
|
||||
170
src/nl2gherkin/cli/commands.py
Normal file
170
src/nl2gherkin/cli/commands.py
Normal file
@@ -0,0 +1,170 @@
|
||||
"""CLI commands for the NL2Gherkin tool."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import click
|
||||
|
||||
from nl2gherkin.exporters.base import BaseExporter
|
||||
from nl2gherkin.exporters.behave import BehaveExporter
|
||||
from nl2gherkin.exporters.cucumber import CucumberExporter
|
||||
from nl2gherkin.exporters.pytest_bdd import PytestBDDExporter
|
||||
from nl2gherkin.gherkin.generator import GherkinGenerator
|
||||
from nl2gherkin.gherkin.parser import GherkinParser
|
||||
from nl2gherkin.nlp.analyzer import NLPAnalyzer
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli() -> None:
|
||||
"""CLI tool for converting natural language requirements to Gherkin format."""
|
||||
pass
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument(
|
||||
"input_file",
|
||||
type=click.Path(exists=True, readable=True, path_type=Path),
|
||||
)
|
||||
@click.option(
|
||||
"--output",
|
||||
"-o",
|
||||
type=click.Path(path_type=Path),
|
||||
help="Output file for the generated Gherkin.",
|
||||
)
|
||||
@click.option(
|
||||
"--framework",
|
||||
"-f",
|
||||
type=click.Choice(["cucumber", "behave", "pytest-bdd"]),
|
||||
default="cucumber",
|
||||
help="BDD framework to export for.",
|
||||
)
|
||||
@click.option(
|
||||
"--validate/--no-validate",
|
||||
default=True,
|
||||
help="Validate Gherkin syntax after generation.",
|
||||
)
|
||||
@click.option(
|
||||
"--ambiguity-check/--no-ambiguity-check",
|
||||
default=True,
|
||||
help="Check for ambiguous language in requirements.",
|
||||
)
|
||||
def convert(
|
||||
input_file: Path,
|
||||
output: Optional[Path],
|
||||
framework: str,
|
||||
validate: bool,
|
||||
ambiguity_check: bool,
|
||||
) -> None:
|
||||
"""Convert a requirements file to Gherkin format.
|
||||
|
||||
INPUT_FILE should be a text file containing natural language requirements.
|
||||
"""
|
||||
try:
|
||||
content = input_file.read_text(encoding="utf-8")
|
||||
|
||||
analyzer = NLPAnalyzer()
|
||||
parser = GherkinParser()
|
||||
generator = GherkinGenerator(parser)
|
||||
|
||||
exporter: BaseExporter
|
||||
if framework == "cucumber":
|
||||
exporter = CucumberExporter()
|
||||
elif framework == "behave":
|
||||
exporter = BehaveExporter()
|
||||
else:
|
||||
exporter = PytestBDDExporter()
|
||||
|
||||
requirements = content.strip().split("\n\n")
|
||||
|
||||
gherkin_features = []
|
||||
all_ambiguities = []
|
||||
|
||||
for req in requirements:
|
||||
if not req.strip():
|
||||
continue
|
||||
|
||||
if ambiguity_check:
|
||||
ambiguities = analyzer.analyze_ambiguity(req)
|
||||
if ambiguities:
|
||||
all_ambiguities.extend(ambiguities)
|
||||
click.echo("\n[WARNING] Ambiguities found in requirement:")
|
||||
for amb in ambiguities:
|
||||
click.echo(f" - {amb.message}")
|
||||
if amb.suggestion:
|
||||
click.echo(f" Suggestion: {amb.suggestion}")
|
||||
|
||||
analysis = analyzer.analyze(req)
|
||||
gherkin = generator.generate(analysis)
|
||||
gherkin_features.append(gherkin)
|
||||
|
||||
output_content = exporter.export(gherkin_features)
|
||||
|
||||
if validate:
|
||||
valid, errors = parser.validate(output_content)
|
||||
if not valid:
|
||||
click.echo("\n[ERROR] Validation failed:")
|
||||
for error in errors:
|
||||
click.echo(f" {error}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
click.echo("[OK] Gherkin syntax is valid.")
|
||||
|
||||
if output:
|
||||
output.write_text(output_content, encoding="utf-8")
|
||||
click.echo(f"\nOutput written to: {output}")
|
||||
else:
|
||||
click.echo(f"\n{output_content}")
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"[ERROR] {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option(
|
||||
"--framework",
|
||||
"-f",
|
||||
type=click.Choice(["cucumber", "behave", "pytest-bdd"]),
|
||||
default="cucumber",
|
||||
help="BDD framework to export for.",
|
||||
)
|
||||
def interactive(framework: str) -> None:
|
||||
"""Enter interactive mode for editing requirements."""
|
||||
from nl2gherkin.cli.interactive import run_interactive_session
|
||||
|
||||
exporter: BaseExporter
|
||||
if framework == "cucumber":
|
||||
exporter = CucumberExporter()
|
||||
elif framework == "behave":
|
||||
exporter = BehaveExporter()
|
||||
else:
|
||||
exporter = PytestBDDExporter()
|
||||
|
||||
run_interactive_session(exporter)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument(
|
||||
"gherkin_file",
|
||||
type=click.Path(exists=True, readable=True, path_type=Path),
|
||||
)
|
||||
def validate(gherkin_file: Path) -> None:
|
||||
"""Validate a Gherkin file for syntax correctness."""
|
||||
try:
|
||||
content = gherkin_file.read_text(encoding="utf-8")
|
||||
parser = GherkinParser()
|
||||
|
||||
valid, errors = parser.validate(content)
|
||||
|
||||
if valid:
|
||||
click.echo("[OK] Gherkin syntax is valid.")
|
||||
else:
|
||||
click.echo("\n[ERROR] Validation failed:")
|
||||
for error in errors:
|
||||
click.echo(f" {error}")
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"[ERROR] {e}")
|
||||
sys.exit(1)
|
||||
108
src/nl2gherkin/cli/interactive.py
Normal file
108
src/nl2gherkin/cli/interactive.py
Normal file
@@ -0,0 +1,108 @@
|
||||
"""Interactive mode for the NL2Gherkin CLI."""
|
||||
|
||||
from typing import List
|
||||
|
||||
import click
|
||||
|
||||
from nl2gherkin.exporters.base import BaseExporter
|
||||
from nl2gherkin.gherkin.generator import GherkinGenerator
|
||||
from nl2gherkin.gherkin.parser import GherkinParser
|
||||
from nl2gherkin.nlp.analyzer import NLPAnalyzer
|
||||
|
||||
|
||||
def _colorize(text: str, color: str) -> str:
|
||||
"""Helper to apply color to text."""
|
||||
return click.style(text, fg=color)
|
||||
|
||||
|
||||
def run_interactive_session(exporter: BaseExporter) -> None:
|
||||
"""Run the interactive session for editing requirements."""
|
||||
analyzer = NLPAnalyzer()
|
||||
parser = GherkinParser()
|
||||
generator = GherkinGenerator(parser)
|
||||
|
||||
history: List[dict] = []
|
||||
generated_scenarios: List[str] = []
|
||||
|
||||
click.echo("\n[NL2Gherkin Interactive Mode]")
|
||||
click.echo("Enter your requirements (press Ctrl+C to exit)")
|
||||
click.echo("Use 'edit' to modify the last generated scenario")
|
||||
click.echo("Use 'export' to export all scenarios")
|
||||
click.echo("Use 'clear' to clear all scenarios\n")
|
||||
|
||||
while True:
|
||||
try:
|
||||
requirement = click.prompt(
|
||||
"Enter requirement",
|
||||
type=str,
|
||||
default="",
|
||||
show_default=False,
|
||||
)
|
||||
|
||||
if not requirement.strip():
|
||||
continue
|
||||
|
||||
if requirement.lower() == "edit":
|
||||
if not generated_scenarios:
|
||||
click.echo(_colorize("No scenarios to edit.", "yellow"))
|
||||
continue
|
||||
|
||||
idx = click.prompt(
|
||||
"Enter scenario number to edit",
|
||||
type=int,
|
||||
default=len(generated_scenarios),
|
||||
show_default=False,
|
||||
)
|
||||
if 1 <= idx <= len(generated_scenarios):
|
||||
edited_req = click.prompt(
|
||||
"Enter modified requirement",
|
||||
type=str,
|
||||
default=generated_scenarios[idx - 1],
|
||||
show_default=False,
|
||||
)
|
||||
analysis = analyzer.analyze(edited_req)
|
||||
gherkin = generator.generate(analysis)
|
||||
generated_scenarios[idx - 1] = gherkin
|
||||
click.echo(f"\nUpdated scenario {idx}:")
|
||||
click.echo(gherkin)
|
||||
else:
|
||||
click.echo(_colorize("Invalid scenario number.", "yellow"))
|
||||
continue
|
||||
|
||||
if requirement.lower() == "export":
|
||||
if not generated_scenarios:
|
||||
click.echo(_colorize("No scenarios to export.", "yellow"))
|
||||
continue
|
||||
|
||||
output = exporter.export(generated_scenarios)
|
||||
click.echo("\n--- Exported Gherkin ---")
|
||||
click.echo(output)
|
||||
continue
|
||||
|
||||
if requirement.lower() == "clear":
|
||||
generated_scenarios = []
|
||||
click.echo(_colorize("Cleared all scenarios.", "green"))
|
||||
continue
|
||||
|
||||
analysis = analyzer.analyze(requirement)
|
||||
gherkin = generator.generate(analysis)
|
||||
|
||||
generated_scenarios.append(gherkin)
|
||||
history.append({"requirement": requirement, "gherkin": gherkin})
|
||||
|
||||
click.echo("\n--- Generated Scenario ---")
|
||||
click.echo(gherkin)
|
||||
|
||||
ambiguities = analyzer.analyze_ambiguity(requirement)
|
||||
if ambiguities:
|
||||
click.echo(_colorize("\n[WARNING] Potential ambiguities:", "yellow"))
|
||||
for amb in ambiguities:
|
||||
click.echo(f" - {amb.message}")
|
||||
if amb.suggestion:
|
||||
click.echo(f" Suggestion: {amb.suggestion}")
|
||||
|
||||
click.echo("")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
click.echo("\n\nExiting interactive mode.")
|
||||
break
|
||||
13
src/nl2gherkin/exporters/__init__.py
Normal file
13
src/nl2gherkin/exporters/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""Exporters module for BDD framework output."""
|
||||
|
||||
from nl2gherkin.exporters.base import BaseExporter
|
||||
from nl2gherkin.exporters.behave import BehaveExporter
|
||||
from nl2gherkin.exporters.cucumber import CucumberExporter
|
||||
from nl2gherkin.exporters.pytest_bdd import PytestBDDExporter
|
||||
|
||||
__all__ = [
|
||||
"BaseExporter",
|
||||
"CucumberExporter",
|
||||
"BehaveExporter",
|
||||
"PytestBDDExporter",
|
||||
]
|
||||
59
src/nl2gherkin/exporters/base.py
Normal file
59
src/nl2gherkin/exporters/base.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""Base exporter class for BDD frameworks."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
class BaseExporter(ABC):
|
||||
"""Base class for BDD framework exporters."""
|
||||
|
||||
@abstractmethod
|
||||
def export(self, features: List[str]) -> str:
|
||||
"""Export features to the target framework format.
|
||||
|
||||
Args:
|
||||
features: List of Gherkin feature strings.
|
||||
|
||||
Returns:
|
||||
Exported content string.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_step_definitions_template(self) -> str:
|
||||
"""Get step definitions template for this framework.
|
||||
|
||||
Returns:
|
||||
Step definitions template string.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_configuration_template(self) -> Dict[str, str]:
|
||||
"""Get configuration files for this framework.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping filenames to content templates.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _extract_scenarios(self, feature: str) -> List[str]:
|
||||
"""Extract individual scenarios from a feature string."""
|
||||
scenarios: List[str] = []
|
||||
current_scenario: List[str] = []
|
||||
in_scenario = False
|
||||
|
||||
for line in feature.split("\n"):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("Scenario:") or stripped.startswith("Scenario Outline:"):
|
||||
if current_scenario:
|
||||
scenarios.append("\n".join(current_scenario))
|
||||
current_scenario = [line]
|
||||
in_scenario = True
|
||||
elif in_scenario:
|
||||
current_scenario.append(line)
|
||||
|
||||
if current_scenario:
|
||||
scenarios.append("\n".join(current_scenario))
|
||||
|
||||
return scenarios
|
||||
118
src/nl2gherkin/exporters/behave.py
Normal file
118
src/nl2gherkin/exporters/behave.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""Behave exporter for Python BDD projects."""
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from nl2gherkin.exporters.base import BaseExporter
|
||||
|
||||
|
||||
class BehaveExporter(BaseExporter):
|
||||
"""Exporter for Behave (Python)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the Behave exporter."""
|
||||
pass
|
||||
|
||||
def export(self, features: List[str]) -> str:
|
||||
"""Export features to Behave format.
|
||||
|
||||
Args:
|
||||
features: List of Gherkin feature strings.
|
||||
|
||||
Returns:
|
||||
Behave-compatible feature file content.
|
||||
"""
|
||||
combined = "\n\n".join(features)
|
||||
return combined
|
||||
|
||||
def get_step_definitions_template(self) -> str:
|
||||
"""Get Behave step definitions template.
|
||||
|
||||
Returns:
|
||||
Step definitions template string.
|
||||
"""
|
||||
return '''"""Behave step definitions."""
|
||||
|
||||
from behave import given, when, then
|
||||
|
||||
|
||||
@given("a setup condition")
|
||||
def step_given_setup(context):
|
||||
"""Given step implementation."""
|
||||
pass
|
||||
|
||||
|
||||
@when("an action occurs")
|
||||
def step_when_action(context):
|
||||
"""When step implementation."""
|
||||
pass
|
||||
|
||||
|
||||
@then("an expected result")
|
||||
def step_then_result(context):
|
||||
"""Then step implementation."""
|
||||
pass
|
||||
'''
|
||||
|
||||
def get_configuration_template(self) -> Dict[str, str]:
|
||||
"""Get Behave configuration files.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping filenames to content.
|
||||
"""
|
||||
return {
|
||||
"behave.ini": '''[behave]
|
||||
format = progress
|
||||
outfiles = behave-report.txt
|
||||
''',
|
||||
"features/environment.py": '''"""Behave environment configuration."""
|
||||
|
||||
def before_scenario(context, scenario):
|
||||
"""Run before each scenario."""
|
||||
pass
|
||||
|
||||
|
||||
def after_scenario(context, scenario):
|
||||
"""Run after each scenario."""
|
||||
pass
|
||||
''',
|
||||
}
|
||||
|
||||
def generate_step_definitions(self, scenarios: List[str]) -> str:
|
||||
"""Generate step definitions for given scenarios.
|
||||
|
||||
Args:
|
||||
scenarios: List of scenario texts.
|
||||
|
||||
Returns:
|
||||
Step definitions Python code.
|
||||
"""
|
||||
step_defs = []
|
||||
|
||||
for scenario in scenarios:
|
||||
lines = scenario.split("\n")
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped.startswith(("Given ", "When ", "Then ", "And ")):
|
||||
step_text = " ".join(stripped.split()[1:])
|
||||
step_def = stripped.split()[0].lower()
|
||||
|
||||
params = self._extract_parameters(step_text)
|
||||
|
||||
step_def_line = f'@given("{step_text}")'
|
||||
|
||||
if params:
|
||||
extra_params = ", " + ", ".join(f'"{p}"' for p in params)
|
||||
else:
|
||||
extra_params = ""
|
||||
|
||||
step_impl = f'def step_impl(context{extra_params}):\n """{stripped.split()[0]} step implementation."""\n pass\n'
|
||||
|
||||
step_defs.append(step_def_line)
|
||||
step_defs.append(step_impl)
|
||||
|
||||
return "\n".join(step_defs)
|
||||
|
||||
def _extract_parameters(self, step_text: str) -> List[str]:
|
||||
"""Extract parameters from a step text."""
|
||||
import re
|
||||
return re.findall(r"<([^>]+)>", step_text)
|
||||
89
src/nl2gherkin/exporters/cucumber.py
Normal file
89
src/nl2gherkin/exporters/cucumber.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""Cucumber exporter for JavaScript/TypeScript projects."""
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from nl2gherkin.exporters.base import BaseExporter
|
||||
|
||||
|
||||
class CucumberExporter(BaseExporter):
|
||||
"""Exporter for Cucumber (JavaScript/TypeScript)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the Cucumber exporter."""
|
||||
self.step_definitions_template = """const {{ Given, When, Then }} = require('@cucumber/cucumber');
|
||||
|
||||
{{step_definitions}}
|
||||
"""
|
||||
|
||||
def export(self, features: List[str]) -> str:
|
||||
"""Export features to Cucumber format.
|
||||
|
||||
Args:
|
||||
features: List of Gherkin feature strings.
|
||||
|
||||
Returns:
|
||||
Cucumber-compatible feature file content.
|
||||
"""
|
||||
combined = "\n\n".join(features)
|
||||
return combined
|
||||
|
||||
def get_step_definitions_template(self) -> str:
|
||||
"""Get Cucumber step definitions template.
|
||||
|
||||
Returns:
|
||||
Step definitions template string.
|
||||
"""
|
||||
return self.step_definitions_template
|
||||
|
||||
def get_configuration_template(self) -> Dict[str, str]:
|
||||
"""Get Cucumber configuration files.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping filenames to content.
|
||||
"""
|
||||
return {
|
||||
"cucumber.js": '''module.exports = {
|
||||
default: '--publish-quiet'
|
||||
}
|
||||
''',
|
||||
".cucumberrc": '''default:
|
||||
publish-quiet: true
|
||||
format: ['progress-bar', 'html:cucumber-report.html']
|
||||
''',
|
||||
}
|
||||
|
||||
def generate_step_definitions(self, scenarios: List[str]) -> str:
|
||||
"""Generate step definitions for given scenarios.
|
||||
|
||||
Args:
|
||||
scenarios: List of scenario texts.
|
||||
|
||||
Returns:
|
||||
Step definitions JavaScript code.
|
||||
"""
|
||||
step_defs = []
|
||||
|
||||
for scenario in scenarios:
|
||||
lines = scenario.split("\n")
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped.startswith(("Given ", "When ", "Then ", "And ")):
|
||||
step_text = " ".join(stripped.split()[1:])
|
||||
step_def = stripped.split()[0].lower()
|
||||
indent = " " * (1 if stripped.startswith("And") or stripped.startswith("But") else 0)
|
||||
|
||||
params = self._extract_parameters(step_text)
|
||||
param_str = ", ".join(f'"{p}"' for p in params) if params else ""
|
||||
params_list = ", ".join(p for p in params)
|
||||
|
||||
step_def_code = step_def.capitalize() + "(" + param_str + ", async function (" + params_list + ") {\n"
|
||||
step_def_code += " // TODO: implement step\n"
|
||||
step_def_code += "});\n"
|
||||
step_defs.append(step_def_code)
|
||||
|
||||
return "\n".join(step_defs)
|
||||
|
||||
def _extract_parameters(self, step_text: str) -> List[str]:
|
||||
"""Extract parameters from a step text."""
|
||||
import re
|
||||
return re.findall(r"<([^>]+)>", step_text)
|
||||
141
src/nl2gherkin/exporters/pytest_bdd.py
Normal file
141
src/nl2gherkin/exporters/pytest_bdd.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""pytest-bdd exporter for pytest projects."""
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from nl2gherkin.exporters.base import BaseExporter
|
||||
|
||||
|
||||
class PytestBDDExporter(BaseExporter):
|
||||
"""Exporter for pytest-bdd (Python)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the pytest-bdd exporter."""
|
||||
pass
|
||||
|
||||
def export(self, features: List[str]) -> str:
|
||||
"""Export features to pytest-bdd format.
|
||||
|
||||
Args:
|
||||
features: List of Gherkin feature strings.
|
||||
|
||||
Returns:
|
||||
pytest-bdd-compatible feature file content.
|
||||
"""
|
||||
combined = "\n\n".join(features)
|
||||
return combined
|
||||
|
||||
def get_step_definitions_template(self) -> str:
|
||||
"""Get pytest-bdd step definitions template.
|
||||
|
||||
Returns:
|
||||
Step definitions template string.
|
||||
"""
|
||||
return '''"""pytest-bdd step definitions."""
|
||||
|
||||
import pytest
|
||||
from pytest_bdd import given, when, then, scenarios
|
||||
|
||||
|
||||
scenarios('features')
|
||||
|
||||
|
||||
@given("a setup condition")
|
||||
def setup_condition():
|
||||
"""Given step implementation."""
|
||||
return {}
|
||||
|
||||
|
||||
@when("an action occurs")
|
||||
def action_occurs():
|
||||
"""When step implementation."""
|
||||
pass
|
||||
|
||||
|
||||
@then("an expected result")
|
||||
def expected_result():
|
||||
"""Then step implementation."""
|
||||
pass
|
||||
'''
|
||||
|
||||
def get_configuration_template(self) -> Dict[str, str]:
|
||||
"""Get pytest-bdd configuration files.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping filenames to content.
|
||||
"""
|
||||
return {
|
||||
"conftest.py": '''"""pytest configuration and fixtures."""
|
||||
|
||||
import pytest
|
||||
from pytest_bdd import scenarios
|
||||
|
||||
|
||||
scenarios('features')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def context():
|
||||
"""Shared test context."""
|
||||
return {}
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure pytest."""
|
||||
pass
|
||||
''',
|
||||
"pytest.ini": '''[pytest]
|
||||
bdd_features_base_dir = features/
|
||||
''',
|
||||
}
|
||||
|
||||
def generate_step_definitions(self, scenarios: List[str], feature_name: str = "features") -> str:
|
||||
"""Generate step definitions for given scenarios.
|
||||
|
||||
Args:
|
||||
scenarios: List of scenario texts.
|
||||
feature_name: Name of the feature file.
|
||||
|
||||
Returns:
|
||||
Step definitions Python code.
|
||||
"""
|
||||
step_defs = []
|
||||
|
||||
for scenario in scenarios:
|
||||
lines = scenario.split("\n")
|
||||
scenario_name = ""
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("Scenario:"):
|
||||
scenario_name = stripped[9:].strip().replace(" ", "_")
|
||||
break
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped.startswith(("Given ", "When ", "Then ", "And ")):
|
||||
step_text = " ".join(stripped.split()[1:])
|
||||
step_def = stripped.split()[0].lower()
|
||||
|
||||
params = self._extract_parameters(step_text)
|
||||
param_str = ", ".join(f'"{p}"' for p in params) if params else ""
|
||||
|
||||
if params:
|
||||
step_impl = f'''@pytest.{step_def}("{step_text}")
|
||||
def {step_def}_{scenario_name}({", ".join(params)}):
|
||||
"""{stripped.split()[0]} step implementation."""
|
||||
pass
|
||||
'''
|
||||
else:
|
||||
step_impl = f'''@{step_def}("{step_text}")
|
||||
def {step_def}_{scenario_name}():
|
||||
"""{stripped.split()[0]} step implementation."""
|
||||
pass
|
||||
'''
|
||||
|
||||
step_defs.append(step_impl)
|
||||
|
||||
return "\n".join(step_defs)
|
||||
|
||||
def _extract_parameters(self, step_text: str) -> List[str]:
|
||||
"""Extract parameters from a step text."""
|
||||
import re
|
||||
return re.findall(r"<([^>]+)>", step_text)
|
||||
11
src/nl2gherkin/gherkin/__init__.py
Normal file
11
src/nl2gherkin/gherkin/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Gherkin module for generating and parsing Gherkin syntax."""
|
||||
|
||||
from nl2gherkin.gherkin.generator import GherkinGenerator
|
||||
from nl2gherkin.gherkin.parser import GherkinParser
|
||||
from nl2gherkin.gherkin.templates import GherkinTemplates
|
||||
|
||||
__all__ = [
|
||||
"GherkinGenerator",
|
||||
"GherkinParser",
|
||||
"GherkinTemplates",
|
||||
]
|
||||
219
src/nl2gherkin/gherkin/generator.py
Normal file
219
src/nl2gherkin/gherkin/generator.py
Normal file
@@ -0,0 +1,219 @@
|
||||
"""Gherkin generator for creating Gherkin syntax from analyzed requirements."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from nl2gherkin.nlp.analyzer import RequirementAnalysis
|
||||
|
||||
|
||||
class ScenarioType(str, Enum):
|
||||
"""Types of Gherkin scenarios."""
|
||||
SCENARIO = "Scenario"
|
||||
SCENARIO_OUTLINE = "Scenario Outline"
|
||||
|
||||
|
||||
@dataclass
|
||||
class GherkinStep:
|
||||
"""A single step in a Gherkin scenario."""
|
||||
keyword: str
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class GherkinScenario:
|
||||
"""A Gherkin scenario."""
|
||||
name: str
|
||||
scenario_type: ScenarioType = ScenarioType.SCENARIO
|
||||
steps: List[GherkinStep] = field(default_factory=list)
|
||||
examples: List[str] = field(default_factory=list)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GherkinFeature:
|
||||
"""A Gherkin feature."""
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
scenarios: List[GherkinScenario] = field(default_factory=list)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
background: Optional[List[GherkinStep]] = None
|
||||
|
||||
|
||||
class GherkinGenerator:
|
||||
"""Generator for creating Gherkin syntax from requirements analysis."""
|
||||
|
||||
def __init__(self, parser: Optional[Any] = None) -> None:
|
||||
"""Initialize the generator.
|
||||
|
||||
Args:
|
||||
parser: Optional parser for validation.
|
||||
"""
|
||||
self.parser = parser
|
||||
|
||||
def generate(self, analysis: RequirementAnalysis) -> str:
|
||||
"""Generate Gherkin from a requirement analysis.
|
||||
|
||||
Args:
|
||||
analysis: The analyzed requirement.
|
||||
|
||||
Returns:
|
||||
Gherkin formatted string.
|
||||
"""
|
||||
feature = self._create_feature(analysis)
|
||||
return self._render_feature(feature)
|
||||
|
||||
def generate_scenario(self, analysis: RequirementAnalysis) -> GherkinScenario:
|
||||
"""Generate a scenario from analysis.
|
||||
|
||||
Args:
|
||||
analysis: The analyzed requirement.
|
||||
|
||||
Returns:
|
||||
GherkinScenario object.
|
||||
"""
|
||||
return self._create_scenario(analysis)
|
||||
|
||||
def _create_feature(self, analysis: RequirementAnalysis) -> GherkinFeature:
|
||||
"""Create a Gherkin feature from analysis."""
|
||||
scenario = self._create_scenario(analysis)
|
||||
|
||||
feature_name = f"{analysis.actor or 'User'} {analysis.action or 'does something'}"
|
||||
|
||||
feature = GherkinFeature(
|
||||
name=feature_name,
|
||||
description=self._create_description(analysis),
|
||||
scenarios=[scenario],
|
||||
)
|
||||
|
||||
return feature
|
||||
|
||||
def _create_description(self, analysis: RequirementAnalysis) -> Optional[str]:
|
||||
"""Create a description from analysis."""
|
||||
parts = []
|
||||
|
||||
if analysis.raw_text:
|
||||
parts.append(analysis.raw_text)
|
||||
|
||||
if analysis.benefit:
|
||||
parts.append(f"So that: {analysis.benefit}")
|
||||
|
||||
return " ".join(parts) if parts else None
|
||||
|
||||
def _create_scenario(self, analysis: RequirementAnalysis) -> GherkinScenario:
|
||||
"""Create a Gherkin scenario from analysis."""
|
||||
steps: List[GherkinStep] = []
|
||||
|
||||
if analysis.condition:
|
||||
steps.append(GherkinStep("Given", analysis.condition))
|
||||
|
||||
if analysis.actor:
|
||||
given_text = f"a {analysis.actor}"
|
||||
if analysis.target:
|
||||
given_text += f" wants to interact with {analysis.target}"
|
||||
elif analysis.action:
|
||||
given_text += f" wants to {analysis.action}"
|
||||
steps.append(GherkinStep("Given", given_text))
|
||||
|
||||
if analysis.action:
|
||||
when_text = analysis.action
|
||||
if analysis.target and not analysis.condition:
|
||||
when_text = f"the {analysis.actor} {analysis.action} the {analysis.target}"
|
||||
elif analysis.condition:
|
||||
when_text = f"the {analysis.actor} {analysis.action} the {analysis.target}"
|
||||
steps.append(GherkinStep("When", when_text))
|
||||
|
||||
if analysis.target:
|
||||
then_text = f"the {analysis.target} should be {analysis.action}ed"
|
||||
if analysis.action_type.value in ["read", "search", "filter"]:
|
||||
then_text = f"the {analysis.target} should be displayed"
|
||||
steps.append(GherkinStep("Then", then_text))
|
||||
|
||||
scenario_type = ScenarioType.SCENARIO
|
||||
examples: List[str] = []
|
||||
|
||||
if analysis.variables:
|
||||
scenario_type = ScenarioType.SCENARIO_OUTLINE
|
||||
examples = self._create_examples(analysis)
|
||||
|
||||
scenario_name = self._create_scenario_name(analysis)
|
||||
|
||||
return GherkinScenario(
|
||||
name=scenario_name,
|
||||
scenario_type=scenario_type,
|
||||
steps=steps,
|
||||
examples=examples,
|
||||
)
|
||||
|
||||
def _create_scenario_name(self, analysis: RequirementAnalysis) -> str:
|
||||
"""Create a scenario name from analysis."""
|
||||
parts = []
|
||||
|
||||
if analysis.actor:
|
||||
parts.append(analysis.actor.capitalize())
|
||||
|
||||
if analysis.action:
|
||||
parts.append(analysis.action.capitalize())
|
||||
|
||||
if analysis.target:
|
||||
target_name = analysis.target.split()[-1] if analysis.target else ""
|
||||
parts.append(target_name.capitalize())
|
||||
|
||||
return " ".join(parts) if parts else "Sample Scenario"
|
||||
|
||||
def _create_examples(self, analysis: RequirementAnalysis) -> List[str]:
|
||||
"""Create Examples table from variables."""
|
||||
if not analysis.variables:
|
||||
return []
|
||||
|
||||
headers = list(analysis.variables.keys())
|
||||
header_row = "| " + " | ".join(headers) + " |"
|
||||
|
||||
example_rows: List[str] = []
|
||||
if analysis.examples:
|
||||
for example in analysis.examples:
|
||||
if isinstance(example, dict):
|
||||
row_values = [str(example.get(h, "")) for h in headers]
|
||||
else:
|
||||
row_values = [str(example)]
|
||||
row = "| " + " | ".join(row_values) + " |"
|
||||
example_rows.append(row)
|
||||
else:
|
||||
default_row = "| " + " | ".join(["value"] * len(headers)) + " |"
|
||||
example_rows.append(default_row)
|
||||
|
||||
return [header_row] + example_rows
|
||||
|
||||
def _render_feature(self, feature: GherkinFeature) -> str:
|
||||
"""Render a GherkinFeature to string."""
|
||||
lines: List[str] = []
|
||||
|
||||
for tag in feature.tags:
|
||||
lines.append(f"@{tag}")
|
||||
|
||||
lines.append(f"Feature: {feature.name}")
|
||||
|
||||
if feature.description:
|
||||
lines.append(f" {feature.description}")
|
||||
|
||||
if feature.background:
|
||||
lines.append(" Background:")
|
||||
for step in feature.background:
|
||||
lines.append(f" {step.keyword} {step.text}")
|
||||
|
||||
for scenario in feature.scenarios:
|
||||
lines.append("")
|
||||
for tag in scenario.tags:
|
||||
lines.append(f" @{tag}")
|
||||
|
||||
lines.append(f" {scenario.scenario_type.value}: {scenario.name}")
|
||||
|
||||
for step in scenario.steps:
|
||||
lines.append(f" {step.keyword} {step.text}")
|
||||
|
||||
if scenario.examples:
|
||||
lines.append(" Examples:")
|
||||
for example in scenario.examples:
|
||||
lines.append(f" {example}")
|
||||
|
||||
return "\n".join(lines)
|
||||
167
src/nl2gherkin/gherkin/parser.py
Normal file
167
src/nl2gherkin/gherkin/parser.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""Gherkin parser for validation."""
|
||||
|
||||
import re
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
|
||||
class GherkinParser:
|
||||
"""Parser and validator for Gherkin syntax."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the Gherkin parser."""
|
||||
pass
|
||||
|
||||
def parse(self, content: str) -> dict:
|
||||
"""Parse Gherkin content into an AST.
|
||||
|
||||
Args:
|
||||
content: The Gherkin content to parse.
|
||||
|
||||
Returns:
|
||||
Dictionary representing the Gherkin AST.
|
||||
"""
|
||||
lines = content.strip().split("\n")
|
||||
ast: dict = {
|
||||
"feature": None,
|
||||
"scenarios": [],
|
||||
}
|
||||
|
||||
current_section = None
|
||||
scenario: Optional[dict] = None
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith("Feature:"):
|
||||
ast["feature"] = {
|
||||
"name": stripped[8:].strip(),
|
||||
"description": "",
|
||||
"line": i,
|
||||
}
|
||||
elif stripped.startswith("Scenario:"):
|
||||
if scenario:
|
||||
ast["scenarios"].append(scenario)
|
||||
scenario = {
|
||||
"name": stripped[9:].strip(),
|
||||
"type": "Scenario",
|
||||
"steps": [],
|
||||
"line": i,
|
||||
}
|
||||
elif stripped.startswith("Scenario Outline:"):
|
||||
if scenario:
|
||||
ast["scenarios"].append(scenario)
|
||||
scenario = {
|
||||
"name": stripped[17:].strip(),
|
||||
"type": "Scenario Outline",
|
||||
"steps": [],
|
||||
"line": i,
|
||||
}
|
||||
elif stripped.startswith("Given ") or stripped.startswith("When ") or \
|
||||
stripped.startswith("Then ") or stripped.startswith("And ") or \
|
||||
stripped.startswith("But "):
|
||||
if scenario:
|
||||
scenario["steps"].append({
|
||||
"keyword": stripped.split()[0],
|
||||
"text": " ".join(stripped.split()[1:]),
|
||||
"line": i,
|
||||
})
|
||||
elif stripped.startswith("Examples:"):
|
||||
if scenario:
|
||||
scenario["has_examples"] = True
|
||||
|
||||
if scenario:
|
||||
ast["scenarios"].append(scenario)
|
||||
|
||||
return ast
|
||||
|
||||
def validate(self, content: str) -> Tuple[bool, List[str]]:
|
||||
"""Validate Gherkin syntax.
|
||||
|
||||
Args:
|
||||
content: The Gherkin content to validate.
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, list_of_errors).
|
||||
"""
|
||||
errors: List[str] = []
|
||||
|
||||
if not content.strip():
|
||||
return False, ["Empty content"]
|
||||
|
||||
lines = content.strip().split("\n")
|
||||
|
||||
if not lines[0].strip().startswith("Feature:"):
|
||||
return False, ["Gherkin must start with 'Feature:'"]
|
||||
|
||||
has_scenario = any(
|
||||
line.strip().startswith("Scenario:") or
|
||||
line.strip().startswith("Scenario Outline:")
|
||||
for line in lines
|
||||
)
|
||||
|
||||
if not has_scenario:
|
||||
return False, ["Feature must have at least one Scenario"]
|
||||
|
||||
try:
|
||||
self.parse(content)
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
line_match = _extract_line_number(error_msg)
|
||||
if line_match:
|
||||
errors.append(f"Line {line_match}: {error_msg}")
|
||||
else:
|
||||
errors.append(f"Validation error: {error_msg}")
|
||||
return False, errors
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith("Examples:") and not any(
|
||||
"Scenario Outline" in l for l in lines[:i]
|
||||
):
|
||||
errors.append(f"Line {i + 1}: Examples table can only be used with Scenario Outline")
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith(("Feature:", "Scenario", "Given ", "When ",
|
||||
"Then ", "And ", "But ", "Background:", "Examples:", "|", "@", " ")):
|
||||
if not stripped.startswith("#"):
|
||||
if i > 0 and lines[i-1].strip().endswith(":"):
|
||||
continue
|
||||
pass
|
||||
|
||||
if errors:
|
||||
return False, errors
|
||||
|
||||
return True, []
|
||||
|
||||
def validate_feature(self, feature_content: str) -> Tuple[bool, List[str]]:
|
||||
"""Validate a single feature.
|
||||
|
||||
Args:
|
||||
feature_content: The feature content to validate.
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, list_of_errors).
|
||||
"""
|
||||
if not feature_content.strip().startswith("Feature:"):
|
||||
return False, ["Content must start with 'Feature:'"]
|
||||
|
||||
return self.validate(feature_content)
|
||||
|
||||
|
||||
def _extract_line_number(error_msg: str) -> Optional[int]:
|
||||
"""Extract line number from error message."""
|
||||
patterns = [
|
||||
r"line\s+(\d+)",
|
||||
r"(\d+):",
|
||||
r"row\s+(\d+)",
|
||||
r"line\s+(\d+)",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, error_msg, re.IGNORECASE)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
return None
|
||||
105
src/nl2gherkin/gherkin/templates.py
Normal file
105
src/nl2gherkin/gherkin/templates.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Gherkin templates for formatting output."""
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
class GherkinTemplates:
|
||||
"""Templates for Gherkin syntax generation."""
|
||||
|
||||
FEATURE = """Feature: {name}
|
||||
{description}
|
||||
"""
|
||||
|
||||
SCENARIO = """ Scenario: {name}
|
||||
Given {given}
|
||||
When {when}
|
||||
Then {then}
|
||||
"""
|
||||
|
||||
SCENARIO_OUTLINE = """ Scenario Outline: {name}
|
||||
Given {given}
|
||||
When {when}
|
||||
Then {then}
|
||||
Examples:
|
||||
| {variables} |
|
||||
"""
|
||||
|
||||
BACKGROUND = """ Background:
|
||||
"""
|
||||
|
||||
STEP_GIVEN = " Given {text}"
|
||||
STEP_WHEN = " When {text}"
|
||||
STEP_THEN = " Then {text}"
|
||||
STEP_AND = " And {text}"
|
||||
STEP_BUT = " But {text}"
|
||||
|
||||
EXAMPLES_TABLE = """ Examples:
|
||||
| {header} |
|
||||
| {row} |
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def format_feature(name: str, description: str = "") -> str:
|
||||
"""Format a feature."""
|
||||
if description:
|
||||
return GherkinTemplates.FEATURE.format(name=name, description=description)
|
||||
return f"Feature: {name}\n"
|
||||
|
||||
@staticmethod
|
||||
def format_scenario(
|
||||
name: str,
|
||||
given: str,
|
||||
when: str,
|
||||
then: str,
|
||||
additional_steps: Optional[list] = None,
|
||||
) -> str:
|
||||
"""Format a scenario."""
|
||||
lines = [f" Scenario: {name}"]
|
||||
lines.append(f" Given {given}")
|
||||
lines.append(f" When {when}")
|
||||
lines.append(f" Then {then}")
|
||||
|
||||
if additional_steps:
|
||||
for step in additional_steps:
|
||||
lines.append(f" And {step}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def format_scenario_outline(
|
||||
name: str,
|
||||
given: str,
|
||||
when: str,
|
||||
then: str,
|
||||
variables: list,
|
||||
examples: list,
|
||||
) -> str:
|
||||
"""Format a scenario outline with examples."""
|
||||
lines = [f" Scenario Outline: {name}"]
|
||||
lines.append(f" Given {given}")
|
||||
lines.append(f" When {when}")
|
||||
lines.append(f" Then {then}")
|
||||
lines.append(" Examples:")
|
||||
|
||||
header = " | ".join(variables)
|
||||
lines.append(f" | {header} |")
|
||||
|
||||
for row in examples:
|
||||
row_str = " | ".join(str(v) for v in row)
|
||||
lines.append(f" | {row_str} |")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def format_step(keyword: str, text: str) -> str:
|
||||
"""Format a single step."""
|
||||
templates = {
|
||||
"Given": GherkinTemplates.STEP_GIVEN,
|
||||
"When": GherkinTemplates.STEP_WHEN,
|
||||
"Then": GherkinTemplates.STEP_THEN,
|
||||
"And": GherkinTemplates.STEP_AND,
|
||||
"But": GherkinTemplates.STEP_BUT,
|
||||
}
|
||||
|
||||
template = templates.get(keyword, GherkinTemplates.STEP_GIVEN)
|
||||
return template.format(text=text)
|
||||
14
src/nl2gherkin/main.py
Normal file
14
src/nl2gherkin/main.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""Main entry point for the NL2Gherkin CLI."""
|
||||
|
||||
|
||||
|
||||
from nl2gherkin.cli.commands import cli
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Entry point for the CLI."""
|
||||
cli()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
12
src/nl2gherkin/nlp/__init__.py
Normal file
12
src/nl2gherkin/nlp/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""NLP module for natural language processing."""
|
||||
|
||||
from nl2gherkin.nlp.ambiguity import AmbiguityDetector, AmbiguityWarning
|
||||
from nl2gherkin.nlp.analyzer import NLPAnalyzer
|
||||
from nl2gherkin.nlp.patterns import RequirementPattern
|
||||
|
||||
__all__ = [
|
||||
"NLPAnalyzer",
|
||||
"AmbiguityDetector",
|
||||
"AmbiguityWarning",
|
||||
"RequirementPattern",
|
||||
]
|
||||
232
src/nl2gherkin/nlp/ambiguity.py
Normal file
232
src/nl2gherkin/nlp/ambiguity.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""Ambiguity detection for requirements."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class AmbiguityType(str, Enum):
|
||||
"""Types of ambiguity in requirements."""
|
||||
PRONOUN = "pronoun"
|
||||
VAGUE_QUANTIFIER = "vague_quantifier"
|
||||
TEMPORAL = "temporal"
|
||||
MISSING_CONDITION = "missing_condition"
|
||||
UNCLEAR_REFERENCE = "unclear_reference"
|
||||
PASSIVE_VOICE = "passive_voice"
|
||||
UNDEFINED_TERM = "undefined_term"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AmbiguityWarning:
|
||||
"""A warning about ambiguous language in a requirement."""
|
||||
type: AmbiguityType
|
||||
message: str
|
||||
position: int = 0
|
||||
length: int = 0
|
||||
suggestion: Optional[str] = None
|
||||
severity: str = "medium"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"type": self.type.value,
|
||||
"message": self.message,
|
||||
"position": self.position,
|
||||
"length": self.length,
|
||||
"suggestion": self.suggestion,
|
||||
"severity": self.severity,
|
||||
}
|
||||
|
||||
|
||||
class AmbiguityDetector:
|
||||
"""Detector for ambiguous language in requirements."""
|
||||
|
||||
PRONOUNS = {
|
||||
"it", "they", "them", "he", "she", "this", "that", "these", "those",
|
||||
"its", "their", "his", "her", "which", "what", "who", "whom",
|
||||
}
|
||||
|
||||
VAGUE_QUANTIFIERS = {
|
||||
"some", "many", "few", "several", "various", "multiple", "somewhat",
|
||||
"roughly", "approximately", "generally", "usually", "often", "sometimes",
|
||||
"occasionally", "maybe", "possibly", "probably", "likely",
|
||||
}
|
||||
|
||||
TEMPORAL_AMBIGUITIES = {
|
||||
"soon", "later", "eventually", "eventually", "currently", "presently",
|
||||
"before long", "in the future", "at some point", "eventually",
|
||||
}
|
||||
|
||||
CONDITIONAL_KEYWORDS = {
|
||||
"if", "when", "unless", "provided", "given", "assuming", "while",
|
||||
}
|
||||
|
||||
def detect(self, text: str) -> List[AmbiguityWarning]:
|
||||
"""Detect ambiguities in the given text.
|
||||
|
||||
Args:
|
||||
text: The requirement text to analyze.
|
||||
|
||||
Returns:
|
||||
List of ambiguity warnings.
|
||||
"""
|
||||
warnings: List[AmbiguityWarning] = []
|
||||
|
||||
warnings.extend(self._detect_pronouns(text))
|
||||
warnings.extend(self._detect_vague_quantifiers(text))
|
||||
warnings.extend(self._detect_temporal_ambiguities(text))
|
||||
warnings.extend(self._detect_missing_conditions(text))
|
||||
warnings.extend(self._detect_passive_voice(text))
|
||||
|
||||
return warnings
|
||||
|
||||
def _detect_pronouns(self, text: str) -> List[AmbiguityWarning]:
|
||||
"""Detect pronoun usage that may be ambiguous."""
|
||||
warnings: List[AmbiguityWarning] = []
|
||||
|
||||
words = text.split()
|
||||
|
||||
for i, word in enumerate(words):
|
||||
clean_word = word.strip(".,!?;:\"'()[]{}").lower()
|
||||
if clean_word in self.PRONOUNS and len(clean_word) > 2:
|
||||
pos = text.find(word)
|
||||
warnings.append(
|
||||
AmbiguityWarning(
|
||||
type=AmbiguityType.PRONOUN,
|
||||
message=f"Pronoun '{word}' may have unclear antecedent",
|
||||
position=pos,
|
||||
length=len(word),
|
||||
suggestion=f"Replace '{word}' with the specific noun it refers to",
|
||||
severity="low",
|
||||
)
|
||||
)
|
||||
|
||||
return warnings
|
||||
|
||||
def _detect_vague_quantifiers(self, text: str) -> List[AmbiguityWarning]:
|
||||
"""Detect vague quantifiers that lack precision."""
|
||||
warnings: List[AmbiguityWarning] = []
|
||||
|
||||
words = text.split()
|
||||
|
||||
for i, word in enumerate(words):
|
||||
clean_word = word.strip(".,!?;:\"'()[]{}").lower()
|
||||
if clean_word in self.VAGUE_QUANTIFIERS:
|
||||
pos = text.find(word)
|
||||
|
||||
if clean_word in {"some", "few", "several"}:
|
||||
suggestion = f"Specify an exact number or range for '{word}'"
|
||||
elif clean_word in {"many", "multiple", "various"}:
|
||||
suggestion = f"Specify a count or percentage for '{word}'"
|
||||
elif clean_word in {"approximately", "roughly"}:
|
||||
suggestion = "Replace with a specific value or range"
|
||||
else:
|
||||
suggestion = "Provide more specific criteria"
|
||||
|
||||
warnings.append(
|
||||
AmbiguityWarning(
|
||||
type=AmbiguityType.VAGUE_QUANTIFIER,
|
||||
message=f"Vague quantifier '{word}' lacks precision",
|
||||
position=pos,
|
||||
length=len(word),
|
||||
suggestion=suggestion,
|
||||
severity="medium",
|
||||
)
|
||||
)
|
||||
|
||||
return warnings
|
||||
|
||||
def _detect_temporal_ambiguities(self, text: str) -> List[AmbiguityWarning]:
|
||||
"""Detect temporal ambiguities in the text."""
|
||||
warnings: List[AmbiguityWarning] = []
|
||||
|
||||
words = text.split()
|
||||
|
||||
for i, word in enumerate(words):
|
||||
clean_word = word.strip(".,!?;:\"'()[]{}").lower()
|
||||
if clean_word in self.TEMPORAL_AMBIGUITIES:
|
||||
pos = text.find(word)
|
||||
warnings.append(
|
||||
AmbiguityWarning(
|
||||
type=AmbiguityType.TEMPORAL,
|
||||
message=f"Temporal term '{word}' is ambiguous",
|
||||
position=pos,
|
||||
length=len(word),
|
||||
suggestion=f"Specify an exact time, deadline, or condition for '{word}'",
|
||||
severity="low",
|
||||
)
|
||||
)
|
||||
|
||||
return warnings
|
||||
|
||||
def _detect_missing_conditions(self, text: str) -> List[AmbiguityWarning]:
|
||||
"""Detect potential missing conditions in requirements."""
|
||||
warnings: List[AmbiguityWarning] = []
|
||||
|
||||
import re
|
||||
|
||||
has_conditional = any(
|
||||
re.search(r"\b" + kw + r"\b", text, re.IGNORECASE)
|
||||
for kw in self.CONDITIONAL_KEYWORDS
|
||||
)
|
||||
|
||||
action_patterns = [
|
||||
r"\bmust\b", r"\bshall\b", r"\bshould\b", r"\bwill\b",
|
||||
r"\bcan\b", r"\benable\b", r"\ballow\b",
|
||||
]
|
||||
|
||||
has_action = any(
|
||||
re.search(pattern, text, re.IGNORECASE)
|
||||
for pattern in action_patterns
|
||||
)
|
||||
|
||||
if has_action and not has_conditional:
|
||||
action_match = re.search(
|
||||
r"(must|shall|should|will|can|enable|allow)\s+\w+",
|
||||
text,
|
||||
re.IGNORECASE
|
||||
)
|
||||
if action_match:
|
||||
warnings.append(
|
||||
AmbiguityWarning(
|
||||
type=AmbiguityType.MISSING_CONDITION,
|
||||
message="No explicit condition found for this action",
|
||||
position=action_match.start(),
|
||||
length=len(action_match.group()),
|
||||
suggestion="Add a condition using 'when', 'if', 'after', or 'before'",
|
||||
severity="medium",
|
||||
)
|
||||
)
|
||||
|
||||
return warnings
|
||||
|
||||
def _detect_passive_voice(self, text: str) -> List[AmbiguityWarning]:
|
||||
"""Detect passive voice usage."""
|
||||
warnings: List[AmbiguityWarning] = []
|
||||
|
||||
import re
|
||||
|
||||
passive_patterns = [
|
||||
r"\bwas\s+\w+ed\b",
|
||||
r"\bwere\s+\w+ed\b",
|
||||
r"\bhas\s+been\s+\w+ed\b",
|
||||
r"\bhave\s+been\s+\w+ed\b",
|
||||
r"\bwill\s+be\s+\w+ed\b",
|
||||
r"\bby\s+the\s+\w+\b",
|
||||
]
|
||||
|
||||
for pattern in passive_patterns:
|
||||
matches = list(re.finditer(pattern, text, re.IGNORECASE))
|
||||
for match in matches:
|
||||
warnings.append(
|
||||
AmbiguityWarning(
|
||||
type=AmbiguityType.PASSIVE_VOICE,
|
||||
message=f"Passive voice detected: '{match.group()}'",
|
||||
position=match.start(),
|
||||
length=match.end() - match.start(),
|
||||
suggestion="Consider rewriting in active voice for clarity",
|
||||
severity="low",
|
||||
)
|
||||
)
|
||||
|
||||
return warnings
|
||||
237
src/nl2gherkin/nlp/analyzer.py
Normal file
237
src/nl2gherkin/nlp/analyzer.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""NLP analyzer for extracting structured information from requirements."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
import spacy
|
||||
from spacy.tokens import Doc
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nl2gherkin.nlp.ambiguity import AmbiguityWarning
|
||||
|
||||
|
||||
class ActorType(str, Enum):
|
||||
"""Types of actors in requirements."""
|
||||
USER = "user"
|
||||
SYSTEM = "system"
|
||||
ADMIN = "admin"
|
||||
API = "api"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class ActionType(str, Enum):
|
||||
"""Types of actions in requirements."""
|
||||
CREATE = "create"
|
||||
READ = "read"
|
||||
UPDATE = "update"
|
||||
DELETE = "delete"
|
||||
VALIDATE = "validate"
|
||||
SEND = "send"
|
||||
RECEIVE = "receive"
|
||||
LOGIN = "login"
|
||||
LOGOUT = "logout"
|
||||
SEARCH = "search"
|
||||
FILTER = "filter"
|
||||
EXPORT = "export"
|
||||
IMPORT = "import"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequirementAnalysis:
|
||||
"""Structured analysis of a requirement."""
|
||||
raw_text: str
|
||||
actor: Optional[str] = None
|
||||
actor_type: ActorType = ActorType.UNKNOWN
|
||||
action: Optional[str] = None
|
||||
action_type: ActionType = ActionType.UNKNOWN
|
||||
target: Optional[str] = None
|
||||
condition: Optional[str] = None
|
||||
benefit: Optional[str] = None
|
||||
examples: List[str] = field(default_factory=list)
|
||||
variables: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"raw_text": self.raw_text,
|
||||
"actor": self.actor,
|
||||
"actor_type": self.actor_type.value,
|
||||
"action": self.action,
|
||||
"action_type": self.action_type.value,
|
||||
"target": self.target,
|
||||
"condition": self.condition,
|
||||
"benefit": self.benefit,
|
||||
"examples": self.examples,
|
||||
"variables": self.variables,
|
||||
}
|
||||
|
||||
|
||||
class NLPAnalyzer:
|
||||
"""Analyzer for natural language requirements."""
|
||||
|
||||
def __init__(self, model: str = "en_core_web_sm"):
|
||||
"""Initialize the analyzer with a spaCy model.
|
||||
|
||||
Args:
|
||||
model: spaCy model name. Defaults to en_core_web_sm.
|
||||
"""
|
||||
try:
|
||||
self.nlp = spacy.load(model)
|
||||
except OSError:
|
||||
import subprocess
|
||||
subprocess.run(
|
||||
["python", "-m", "spacy", "download", model],
|
||||
check=True,
|
||||
)
|
||||
self.nlp = spacy.load(model)
|
||||
|
||||
def analyze(self, text: str) -> RequirementAnalysis:
|
||||
"""Analyze a requirement text and extract structured information.
|
||||
|
||||
Args:
|
||||
text: The natural language requirement text.
|
||||
|
||||
Returns:
|
||||
RequirementAnalysis with extracted components.
|
||||
"""
|
||||
doc = self.nlp(text)
|
||||
analysis = RequirementAnalysis(raw_text=text)
|
||||
|
||||
self._extract_user_story(doc, analysis)
|
||||
self._extract_subject(doc, analysis)
|
||||
self._extract_action(doc, analysis)
|
||||
self._extract_object(doc, analysis)
|
||||
self._extract_condition(doc, analysis)
|
||||
self._extract_variables(doc, analysis)
|
||||
|
||||
return analysis
|
||||
|
||||
def _extract_user_story(self, doc: Doc, analysis: RequirementAnalysis) -> None:
|
||||
"""Extract user story format: As a [role], I want [feature] so that [benefit]."""
|
||||
import re
|
||||
|
||||
user_story_patterns = [
|
||||
(r"as\s+a\s+(.*?),\s*i\s+want\s+(.*?)(?:\s+so\s+that\s+(.*))?$", True),
|
||||
(r"as\s+an\s+(.*?),\s*i\s+want\s+(.*?)(?:\s+so\s+that\s+(.*))?$", True),
|
||||
(r"as\s+a\s+(.*?)\s+i\s+can\s+(.*?)$", False),
|
||||
]
|
||||
|
||||
for pattern, has_benefit in user_story_patterns:
|
||||
match = re.search(pattern, doc.text, re.IGNORECASE)
|
||||
if match:
|
||||
groups = match.groups()
|
||||
analysis.actor = groups[0].strip()
|
||||
|
||||
if has_benefit and len(groups) >= 3 and groups[2]:
|
||||
analysis.benefit = groups[2].strip()
|
||||
analysis.action = groups[1].strip()
|
||||
else:
|
||||
feature_text = groups[1].strip()
|
||||
parts = feature_text.split(" to ")
|
||||
if len(parts) > 1:
|
||||
analysis.action = parts[1].strip()
|
||||
else:
|
||||
analysis.action = feature_text
|
||||
break
|
||||
|
||||
def _extract_subject(self, doc: Doc, analysis: RequirementAnalysis) -> None:
|
||||
"""Extract the subject/actor from the requirement."""
|
||||
if analysis.actor:
|
||||
return
|
||||
|
||||
for token in doc:
|
||||
if token.dep_ == "nsubj" or token.dep_ == "nsubjpass":
|
||||
if not analysis.actor:
|
||||
analysis.actor = token.text
|
||||
|
||||
actor_lower = token.text.lower()
|
||||
if any(x in actor_lower for x in ["user", "customer", "visitor"]):
|
||||
analysis.actor_type = ActorType.USER
|
||||
elif any(x in actor_lower for x in ["admin", "administrator"]):
|
||||
analysis.actor_type = ActorType.ADMIN
|
||||
elif any(x in actor_lower for x in ["system", "app", "application"]):
|
||||
analysis.actor_type = ActorType.SYSTEM
|
||||
elif any(x in actor_lower for x in ["api", "service", "endpoint"]):
|
||||
analysis.actor_type = ActorType.API
|
||||
break
|
||||
|
||||
def _extract_action(self, doc: Doc, analysis: RequirementAnalysis) -> None:
|
||||
"""Extract the action/verb from the requirement."""
|
||||
if analysis.action:
|
||||
return
|
||||
|
||||
action_keywords = {
|
||||
"create": ["create", "add", "make", "generate", "produce"],
|
||||
"read": ["view", "see", "display", "show", "list", "retrieve", "get"],
|
||||
"update": ["update", "edit", "modify", "change", "alter"],
|
||||
"delete": ["delete", "remove", "destroy", "cancel"],
|
||||
"validate": ["validate", "verify", "check", "confirm", "ensure"],
|
||||
"send": ["send", "submit", "post", "push", "dispatch"],
|
||||
"receive": ["receive", "get", "fetch", "pull"],
|
||||
"login": ["login", "sign in", "log in", "authenticate"],
|
||||
"logout": ["logout", "sign out", "log out"],
|
||||
"search": ["search", "find", "look for", "query"],
|
||||
"filter": ["filter", "narrow", "refine"],
|
||||
"export": ["export", "download", "save"],
|
||||
"import": ["import", "upload", "load"],
|
||||
}
|
||||
|
||||
for token in doc:
|
||||
token_lower = token.text.lower()
|
||||
for action_type, keywords in action_keywords.items():
|
||||
if token_lower in keywords:
|
||||
analysis.action_type = ActionType(action_type)
|
||||
if not analysis.action:
|
||||
analysis.action = token.text
|
||||
break
|
||||
|
||||
def _extract_object(self, doc: Doc, analysis: RequirementAnalysis) -> None:
|
||||
"""Extract the object/target from the requirement."""
|
||||
if analysis.target:
|
||||
return
|
||||
|
||||
for token in doc:
|
||||
if token.dep_ in ["dobj", "pobj", "attr"]:
|
||||
if not analysis.target:
|
||||
subtree_tokens = list(token.subtree)
|
||||
analysis.target = " ".join([t.text for t in subtree_tokens])
|
||||
break
|
||||
|
||||
def _extract_condition(self, doc: Doc, analysis: RequirementAnalysis) -> None:
|
||||
"""Extract conditions from the requirement."""
|
||||
condition_markers = ["if", "when", "after", "before", "during", "while"]
|
||||
|
||||
for i, token in enumerate(doc):
|
||||
if token.text.lower() in condition_markers:
|
||||
remaining_tokens = list(doc[i + 1 :])
|
||||
if remaining_tokens:
|
||||
analysis.condition = " ".join([t.text for t in remaining_tokens[:15]])
|
||||
break
|
||||
|
||||
def _extract_variables(self, doc: Doc, analysis: RequirementAnalysis) -> None:
|
||||
"""Extract variables/parameters from the requirement."""
|
||||
import re
|
||||
|
||||
pattern = r"<(\w+)>"
|
||||
matches = re.findall(pattern, doc.text)
|
||||
|
||||
for var in matches:
|
||||
analysis.variables[var] = "string"
|
||||
if var not in analysis.examples:
|
||||
analysis.examples.append(var)
|
||||
|
||||
def analyze_ambiguity(self, text: str) -> "list[AmbiguityWarning]":
|
||||
"""Analyze text for ambiguous language.
|
||||
|
||||
Args:
|
||||
text: The text to analyze.
|
||||
|
||||
Returns:
|
||||
List of ambiguity warnings.
|
||||
"""
|
||||
from nl2gherkin.nlp.ambiguity import AmbiguityDetector
|
||||
|
||||
detector = AmbiguityDetector()
|
||||
return detector.detect(text) # type: ignore[return-value]
|
||||
112
src/nl2gherkin/nlp/patterns.py
Normal file
112
src/nl2gherkin/nlp/patterns.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""Pattern definitions for requirement parsing."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class PatternType(str, Enum):
|
||||
"""Types of requirement patterns."""
|
||||
USER_STORY = "user_story"
|
||||
SCENARIO = "scenario"
|
||||
ACCEPTANCE_CRITERIA = "acceptance_criteria"
|
||||
BUSINESS_RULE = "business_rule"
|
||||
GENERAL = "general"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequirementPattern:
|
||||
"""A pattern for matching requirements."""
|
||||
name: str
|
||||
pattern: str
|
||||
pattern_type: PatternType
|
||||
priority: int = 0
|
||||
description: Optional[str] = None
|
||||
|
||||
def matches(self, text: str) -> bool:
|
||||
"""Check if the text matches this pattern."""
|
||||
import re
|
||||
return bool(re.search(self.pattern, text, re.IGNORECASE))
|
||||
|
||||
|
||||
USER_STORY_PATTERNS = [
|
||||
RequirementPattern(
|
||||
name="classic_user_story",
|
||||
pattern=r"as\s+a?\s*(?:user|role|customer|visitor|admin|sys(?:tem)?)\b.*,\s*i\s+(?:want|can|would like to)\b",
|
||||
pattern_type=PatternType.USER_STORY,
|
||||
priority=10,
|
||||
description="Classic user story format",
|
||||
),
|
||||
RequirementPattern(
|
||||
name="short_user_story",
|
||||
pattern=r"as\s+(?:an?)\s+\w+\s*,\s*\w+.*",
|
||||
pattern_type=PatternType.USER_STORY,
|
||||
priority=5,
|
||||
description="Short user story format",
|
||||
),
|
||||
]
|
||||
|
||||
SCENARIO_PATTERNS = [
|
||||
RequirementPattern(
|
||||
name="if_when_then",
|
||||
pattern=r"(?:if|when)\s+.*\s+then\s+",
|
||||
pattern_type=PatternType.SCENARIO,
|
||||
priority=10,
|
||||
description="If-When-Then scenario format",
|
||||
),
|
||||
RequirementPattern(
|
||||
name="given_when_then",
|
||||
pattern=r"(?:given|when|then)\s+.*",
|
||||
pattern_type=PatternType.SCENARIO,
|
||||
priority=5,
|
||||
description="Given-When-Then format",
|
||||
),
|
||||
]
|
||||
|
||||
ACCEPTANCE_CRITERIA_PATTERNS = [
|
||||
RequirementPattern(
|
||||
name="bullet_points",
|
||||
pattern=r"(?:^|\n)\s*[-*•]\s+",
|
||||
pattern_type=PatternType.ACCEPTANCE_CRITERIA,
|
||||
priority=10,
|
||||
description="Bullet point format",
|
||||
),
|
||||
RequirementPattern(
|
||||
name="numbered_list",
|
||||
pattern=r"(?:^|\n)\s*\d+[.)]\s+",
|
||||
pattern_type=PatternType.ACCEPTANCE_CRITERIA,
|
||||
priority=10,
|
||||
description="Numbered list format",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def get_patterns_by_type(pattern_type: PatternType) -> List[RequirementPattern]:
|
||||
"""Get all patterns of a specific type."""
|
||||
all_patterns = USER_STORY_PATTERNS + SCENARIO_PATTERNS + ACCEPTANCE_CRITERIA_PATTERNS
|
||||
return [p for p in all_patterns if p.pattern_type == pattern_type]
|
||||
|
||||
|
||||
def detect_pattern_type(text: str) -> Optional[PatternType]:
|
||||
"""Detect the type of requirement based on patterns."""
|
||||
for pattern_type in PatternType:
|
||||
patterns = get_patterns_by_type(pattern_type)
|
||||
for pattern in patterns:
|
||||
if pattern.matches(text):
|
||||
return pattern_type
|
||||
return PatternType.GENERAL
|
||||
|
||||
|
||||
def find_best_matching_pattern(text: str) -> Optional[RequirementPattern]:
|
||||
"""Find the best matching pattern for the text."""
|
||||
all_patterns = USER_STORY_PATTERNS + SCENARIO_PATTERNS + ACCEPTANCE_CRITERIA_PATTERNS
|
||||
best_match = None
|
||||
highest_priority = -1
|
||||
|
||||
for pattern in all_patterns:
|
||||
if pattern.matches(text):
|
||||
if pattern.priority > highest_priority:
|
||||
highest_priority = pattern.priority
|
||||
best_match = pattern
|
||||
|
||||
return best_match
|
||||
Reference in New Issue
Block a user