Initial commit: Requirements to Gherkin CLI Converter

2026-02-02 12:15:36 +00:00
commit ec3ea3da33
29 changed files with 2803 additions and 0 deletions
--- a/src/nl2gherkin/init.py
+++ b/src/nl2gherkin/init.py
@@ -0,0 +1,7 @@
+"""NL2Gherkin - Natural Language to Gherkin Converter.
+
+A CLI tool that converts natural language project requirements into structured 
+acceptance criteria in Gherkin format (Given-When-Then).
+"""
+
+__version__ = "0.1.0"
--- a/src/nl2gherkin/cli/init.py
+++ b/src/nl2gherkin/cli/init.py
@@ -0,0 +1,5 @@
+"""CLI module for NL2Gherkin."""
+
+from nl2gherkin.cli.commands import cli, convert, interactive, validate
+
+__all__ = ["cli", "convert", "interactive", "validate"]
--- a/src/nl2gherkin/cli/commands.py
+++ b/src/nl2gherkin/cli/commands.py
@@ -0,0 +1,170 @@
+"""CLI commands for the NL2Gherkin tool."""
+
+import sys
+from pathlib import Path
+from typing import Optional
+
+import click
+
+from nl2gherkin.exporters.base import BaseExporter
+from nl2gherkin.exporters.behave import BehaveExporter
+from nl2gherkin.exporters.cucumber import CucumberExporter
+from nl2gherkin.exporters.pytest_bdd import PytestBDDExporter
+from nl2gherkin.gherkin.generator import GherkinGenerator
+from nl2gherkin.gherkin.parser import GherkinParser
+from nl2gherkin.nlp.analyzer import NLPAnalyzer
+
+
+@click.group()
+def cli() -> None:
+    """CLI tool for converting natural language requirements to Gherkin format."""
+    pass
+
+
+@cli.command()
+@click.argument(
+    "input_file",
+    type=click.Path(exists=True, readable=True, path_type=Path),
+)
+@click.option(
+    "--output",
+    "-o",
+    type=click.Path(path_type=Path),
+    help="Output file for the generated Gherkin.",
+)
+@click.option(
+    "--framework",
+    "-f",
+    type=click.Choice(["cucumber", "behave", "pytest-bdd"]),
+    default="cucumber",
+    help="BDD framework to export for.",
+)
+@click.option(
+    "--validate/--no-validate",
+    default=True,
+    help="Validate Gherkin syntax after generation.",
+)
+@click.option(
+    "--ambiguity-check/--no-ambiguity-check",
+    default=True,
+    help="Check for ambiguous language in requirements.",
+)
+def convert(
+    input_file: Path,
+    output: Optional[Path],
+    framework: str,
+    validate: bool,
+    ambiguity_check: bool,
+) -> None:
+    """Convert a requirements file to Gherkin format.
+
+    INPUT_FILE should be a text file containing natural language requirements.
+    """
+    try:
+        content = input_file.read_text(encoding="utf-8")
+
+        analyzer = NLPAnalyzer()
+        parser = GherkinParser()
+        generator = GherkinGenerator(parser)
+
+        exporter: BaseExporter
+        if framework == "cucumber":
+            exporter = CucumberExporter()
+        elif framework == "behave":
+            exporter = BehaveExporter()
+        else:
+            exporter = PytestBDDExporter()
+
+        requirements = content.strip().split("\n\n")
+
+        gherkin_features = []
+        all_ambiguities = []
+
+        for req in requirements:
+            if not req.strip():
+                continue
+
+            if ambiguity_check:
+                ambiguities = analyzer.analyze_ambiguity(req)
+                if ambiguities:
+                    all_ambiguities.extend(ambiguities)
+                    click.echo("\n[WARNING] Ambiguities found in requirement:")
+                    for amb in ambiguities:
+                        click.echo(f"  - {amb.message}")
+                        if amb.suggestion:
+                            click.echo(f"    Suggestion: {amb.suggestion}")
+
+            analysis = analyzer.analyze(req)
+            gherkin = generator.generate(analysis)
+            gherkin_features.append(gherkin)
+
+        output_content = exporter.export(gherkin_features)
+
+        if validate:
+            valid, errors = parser.validate(output_content)
+            if not valid:
+                click.echo("\n[ERROR] Validation failed:")
+                for error in errors:
+                    click.echo(f"  {error}")
+                sys.exit(1)
+            else:
+                click.echo("[OK] Gherkin syntax is valid.")
+
+        if output:
+            output.write_text(output_content, encoding="utf-8")
+            click.echo(f"\nOutput written to: {output}")
+        else:
+            click.echo(f"\n{output_content}")
+
+    except Exception as e:
+        click.echo(f"[ERROR] {e}")
+        sys.exit(1)
+
+
+@cli.command()
+@click.option(
+    "--framework",
+    "-f",
+    type=click.Choice(["cucumber", "behave", "pytest-bdd"]),
+    default="cucumber",
+    help="BDD framework to export for.",
+)
+def interactive(framework: str) -> None:
+    """Enter interactive mode for editing requirements."""
+    from nl2gherkin.cli.interactive import run_interactive_session
+
+    exporter: BaseExporter
+    if framework == "cucumber":
+        exporter = CucumberExporter()
+    elif framework == "behave":
+        exporter = BehaveExporter()
+    else:
+        exporter = PytestBDDExporter()
+
+    run_interactive_session(exporter)
+
+
+@cli.command()
+@click.argument(
+    "gherkin_file",
+    type=click.Path(exists=True, readable=True, path_type=Path),
+)
+def validate(gherkin_file: Path) -> None:
+    """Validate a Gherkin file for syntax correctness."""
+    try:
+        content = gherkin_file.read_text(encoding="utf-8")
+        parser = GherkinParser()
+
+        valid, errors = parser.validate(content)
+
+        if valid:
+            click.echo("[OK] Gherkin syntax is valid.")
+        else:
+            click.echo("\n[ERROR] Validation failed:")
+            for error in errors:
+                click.echo(f"  {error}")
+            sys.exit(1)
+
+    except Exception as e:
+        click.echo(f"[ERROR] {e}")
+        sys.exit(1)
--- a/src/nl2gherkin/cli/interactive.py
+++ b/src/nl2gherkin/cli/interactive.py
@@ -0,0 +1,108 @@
+"""Interactive mode for the NL2Gherkin CLI."""
+
+from typing import List
+
+import click
+
+from nl2gherkin.exporters.base import BaseExporter
+from nl2gherkin.gherkin.generator import GherkinGenerator
+from nl2gherkin.gherkin.parser import GherkinParser
+from nl2gherkin.nlp.analyzer import NLPAnalyzer
+
+
+def _colorize(text: str, color: str) -> str:
+    """Helper to apply color to text."""
+    return click.style(text, fg=color)
+
+
+def run_interactive_session(exporter: BaseExporter) -> None:
+    """Run the interactive session for editing requirements."""
+    analyzer = NLPAnalyzer()
+    parser = GherkinParser()
+    generator = GherkinGenerator(parser)
+
+    history: List[dict] = []
+    generated_scenarios: List[str] = []
+
+    click.echo("\n[NL2Gherkin Interactive Mode]")
+    click.echo("Enter your requirements (press Ctrl+C to exit)")
+    click.echo("Use 'edit' to modify the last generated scenario")
+    click.echo("Use 'export' to export all scenarios")
+    click.echo("Use 'clear' to clear all scenarios\n")
+
+    while True:
+        try:
+            requirement = click.prompt(
+                "Enter requirement",
+                type=str,
+                default="",
+                show_default=False,
+            )
+
+            if not requirement.strip():
+                continue
+
+            if requirement.lower() == "edit":
+                if not generated_scenarios:
+                    click.echo(_colorize("No scenarios to edit.", "yellow"))
+                    continue
+
+                idx = click.prompt(
+                    "Enter scenario number to edit",
+                    type=int,
+                    default=len(generated_scenarios),
+                    show_default=False,
+                )
+                if 1 <= idx <= len(generated_scenarios):
+                    edited_req = click.prompt(
+                        "Enter modified requirement",
+                        type=str,
+                        default=generated_scenarios[idx - 1],
+                        show_default=False,
+                    )
+                    analysis = analyzer.analyze(edited_req)
+                    gherkin = generator.generate(analysis)
+                    generated_scenarios[idx - 1] = gherkin
+                    click.echo(f"\nUpdated scenario {idx}:")
+                    click.echo(gherkin)
+                else:
+                    click.echo(_colorize("Invalid scenario number.", "yellow"))
+                continue
+
+            if requirement.lower() == "export":
+                if not generated_scenarios:
+                    click.echo(_colorize("No scenarios to export.", "yellow"))
+                    continue
+
+                output = exporter.export(generated_scenarios)
+                click.echo("\n--- Exported Gherkin ---")
+                click.echo(output)
+                continue
+
+            if requirement.lower() == "clear":
+                generated_scenarios = []
+                click.echo(_colorize("Cleared all scenarios.", "green"))
+                continue
+
+            analysis = analyzer.analyze(requirement)
+            gherkin = generator.generate(analysis)
+
+            generated_scenarios.append(gherkin)
+            history.append({"requirement": requirement, "gherkin": gherkin})
+
+            click.echo("\n--- Generated Scenario ---")
+            click.echo(gherkin)
+
+            ambiguities = analyzer.analyze_ambiguity(requirement)
+            if ambiguities:
+                click.echo(_colorize("\n[WARNING] Potential ambiguities:", "yellow"))
+                for amb in ambiguities:
+                    click.echo(f"  - {amb.message}")
+                    if amb.suggestion:
+                        click.echo(f"    Suggestion: {amb.suggestion}")
+
+            click.echo("")
+
+        except KeyboardInterrupt:
+            click.echo("\n\nExiting interactive mode.")
+            break
--- a/src/nl2gherkin/exporters/init.py
+++ b/src/nl2gherkin/exporters/init.py
@@ -0,0 +1,13 @@
+"""Exporters module for BDD framework output."""
+
+from nl2gherkin.exporters.base import BaseExporter
+from nl2gherkin.exporters.behave import BehaveExporter
+from nl2gherkin.exporters.cucumber import CucumberExporter
+from nl2gherkin.exporters.pytest_bdd import PytestBDDExporter
+
+__all__ = [
+    "BaseExporter",
+    "CucumberExporter",
+    "BehaveExporter",
+    "PytestBDDExporter",
+]
--- a/src/nl2gherkin/exporters/base.py
+++ b/src/nl2gherkin/exporters/base.py
@@ -0,0 +1,59 @@
+"""Base exporter class for BDD frameworks."""
+
+from abc import ABC, abstractmethod
+from typing import Dict, List
+
+
+class BaseExporter(ABC):
+    """Base class for BDD framework exporters."""
+
+    @abstractmethod
+    def export(self, features: List[str]) -> str:
+        """Export features to the target framework format.
+        
+        Args:
+            features: List of Gherkin feature strings.
+            
+        Returns:
+            Exported content string.
+        """
+        pass
+
+    @abstractmethod
+    def get_step_definitions_template(self) -> str:
+        """Get step definitions template for this framework.
+        
+        Returns:
+            Step definitions template string.
+        """
+        pass
+
+    @abstractmethod
+    def get_configuration_template(self) -> Dict[str, str]:
+        """Get configuration files for this framework.
+        
+        Returns:
+            Dictionary mapping filenames to content templates.
+        """
+        pass
+
+    def _extract_scenarios(self, feature: str) -> List[str]:
+        """Extract individual scenarios from a feature string."""
+        scenarios: List[str] = []
+        current_scenario: List[str] = []
+        in_scenario = False
+
+        for line in feature.split("\n"):
+            stripped = line.strip()
+            if stripped.startswith("Scenario:") or stripped.startswith("Scenario Outline:"):
+                if current_scenario:
+                    scenarios.append("\n".join(current_scenario))
+                current_scenario = [line]
+                in_scenario = True
+            elif in_scenario:
+                current_scenario.append(line)
+
+        if current_scenario:
+            scenarios.append("\n".join(current_scenario))
+
+        return scenarios
--- a/src/nl2gherkin/exporters/behave.py
+++ b/src/nl2gherkin/exporters/behave.py
@@ -0,0 +1,118 @@
+"""Behave exporter for Python BDD projects."""
+
+from typing import Dict, List
+
+from nl2gherkin.exporters.base import BaseExporter
+
+
+class BehaveExporter(BaseExporter):
+    """Exporter for Behave (Python)."""
+
+    def __init__(self) -> None:
+        """Initialize the Behave exporter."""
+        pass
+
+    def export(self, features: List[str]) -> str:
+        """Export features to Behave format.
+        
+        Args:
+            features: List of Gherkin feature strings.
+            
+        Returns:
+            Behave-compatible feature file content.
+        """
+        combined = "\n\n".join(features)
+        return combined
+
+    def get_step_definitions_template(self) -> str:
+        """Get Behave step definitions template.
+        
+        Returns:
+            Step definitions template string.
+        """
+        return '''"""Behave step definitions."""
+
+from behave import given, when, then
+
+
+@given("a setup condition")
+def step_given_setup(context):
+    """Given step implementation."""
+    pass
+
+
+@when("an action occurs")
+def step_when_action(context):
+    """When step implementation."""
+    pass
+
+
+@then("an expected result")
+def step_then_result(context):
+    """Then step implementation."""
+    pass
+'''
+
+    def get_configuration_template(self) -> Dict[str, str]:
+        """Get Behave configuration files.
+        
+        Returns:
+            Dictionary mapping filenames to content.
+        """
+        return {
+            "behave.ini": '''[behave]
+format = progress
+outfiles = behave-report.txt
+''',
+            "features/environment.py": '''"""Behave environment configuration."""
+
+def before_scenario(context, scenario):
+    """Run before each scenario."""
+    pass
+
+
+def after_scenario(context, scenario):
+    """Run after each scenario."""
+    pass
+''',
+        }
+
+    def generate_step_definitions(self, scenarios: List[str]) -> str:
+        """Generate step definitions for given scenarios.
+        
+        Args:
+            scenarios: List of scenario texts.
+            
+        Returns:
+            Step definitions Python code.
+        """
+        step_defs = []
+
+        for scenario in scenarios:
+            lines = scenario.split("\n")
+            for line in lines:
+                stripped = line.strip()
+                if stripped.startswith(("Given ", "When ", "Then ", "And ")):
+                    step_text = " ".join(stripped.split()[1:])
+                    step_def = stripped.split()[0].lower()
+
+                    params = self._extract_parameters(step_text)
+
+                    step_def_line = f'@given("{step_text}")'
+
+                    if params:
+                        extra_params = ", " + ", ".join(f'"{p}"' for p in params)
+                    else:
+                        extra_params = ""
+
+                    step_impl = f'def step_impl(context{extra_params}):\n    """{stripped.split()[0]} step implementation."""\n    pass\n'
+
+                    step_defs.append(step_def_line)
+                    step_defs.append(step_impl)
+
+        return "\n".join(step_defs)
+
+    def _extract_parameters(self, step_text: str) -> List[str]:
+        """Extract parameters from a step text."""
+        import re
+        return re.findall(r"<([^>]+)>", step_text)
--- a/src/nl2gherkin/exporters/cucumber.py
+++ b/src/nl2gherkin/exporters/cucumber.py
@@ -0,0 +1,89 @@
+"""Cucumber exporter for JavaScript/TypeScript projects."""
+
+from typing import Dict, List
+
+from nl2gherkin.exporters.base import BaseExporter
+
+
+class CucumberExporter(BaseExporter):
+    """Exporter for Cucumber (JavaScript/TypeScript)."""
+
+    def __init__(self) -> None:
+        """Initialize the Cucumber exporter."""
+        self.step_definitions_template = """const {{ Given, When, Then }} = require('@cucumber/cucumber');
+
+{{step_definitions}}
+"""
+
+    def export(self, features: List[str]) -> str:
+        """Export features to Cucumber format.
+        
+        Args:
+            features: List of Gherkin feature strings.
+            
+        Returns:
+            Cucumber-compatible feature file content.
+        """
+        combined = "\n\n".join(features)
+        return combined
+
+    def get_step_definitions_template(self) -> str:
+        """Get Cucumber step definitions template.
+        
+        Returns:
+            Step definitions template string.
+        """
+        return self.step_definitions_template
+
+    def get_configuration_template(self) -> Dict[str, str]:
+        """Get Cucumber configuration files.
+        
+        Returns:
+            Dictionary mapping filenames to content.
+        """
+        return {
+            "cucumber.js": '''module.exports = {
+  default: '--publish-quiet'
+}
+''',
+            ".cucumberrc": '''default:
+  publish-quiet: true
+  format: ['progress-bar', 'html:cucumber-report.html']
+''',
+        }
+
+    def generate_step_definitions(self, scenarios: List[str]) -> str:
+        """Generate step definitions for given scenarios.
+        
+        Args:
+            scenarios: List of scenario texts.
+            
+        Returns:
+            Step definitions JavaScript code.
+        """
+        step_defs = []
+
+        for scenario in scenarios:
+            lines = scenario.split("\n")
+            for line in lines:
+                stripped = line.strip()
+                if stripped.startswith(("Given ", "When ", "Then ", "And ")):
+                    step_text = " ".join(stripped.split()[1:])
+                    step_def = stripped.split()[0].lower()
+                    indent = "  " * (1 if stripped.startswith("And") or stripped.startswith("But") else 0)
+
+                    params = self._extract_parameters(step_text)
+                    param_str = ", ".join(f'"{p}"' for p in params) if params else ""
+                    params_list = ", ".join(p for p in params)
+
+                    step_def_code = step_def.capitalize() + "(" + param_str + ", async function (" + params_list + ") {\n"
+                    step_def_code += "  // TODO: implement step\n"
+                    step_def_code += "});\n"
+                    step_defs.append(step_def_code)
+
+        return "\n".join(step_defs)
+
+    def _extract_parameters(self, step_text: str) -> List[str]:
+        """Extract parameters from a step text."""
+        import re
+        return re.findall(r"<([^>]+)>", step_text)
--- a/src/nl2gherkin/exporters/pytest_bdd.py
+++ b/src/nl2gherkin/exporters/pytest_bdd.py
@@ -0,0 +1,141 @@
+"""pytest-bdd exporter for pytest projects."""
+
+from typing import Dict, List
+
+from nl2gherkin.exporters.base import BaseExporter
+
+
+class PytestBDDExporter(BaseExporter):
+    """Exporter for pytest-bdd (Python)."""
+
+    def __init__(self) -> None:
+        """Initialize the pytest-bdd exporter."""
+        pass
+
+    def export(self, features: List[str]) -> str:
+        """Export features to pytest-bdd format.
+        
+        Args:
+            features: List of Gherkin feature strings.
+            
+        Returns:
+            pytest-bdd-compatible feature file content.
+        """
+        combined = "\n\n".join(features)
+        return combined
+
+    def get_step_definitions_template(self) -> str:
+        """Get pytest-bdd step definitions template.
+        
+        Returns:
+            Step definitions template string.
+        """
+        return '''"""pytest-bdd step definitions."""
+
+import pytest
+from pytest_bdd import given, when, then, scenarios
+
+
+scenarios('features')
+
+
+@given("a setup condition")
+def setup_condition():
+    """Given step implementation."""
+    return {}
+
+
+@when("an action occurs")
+def action_occurs():
+    """When step implementation."""
+    pass
+
+
+@then("an expected result")
+def expected_result():
+    """Then step implementation."""
+    pass
+'''
+
+    def get_configuration_template(self) -> Dict[str, str]:
+        """Get pytest-bdd configuration files.
+        
+        Returns:
+            Dictionary mapping filenames to content.
+        """
+        return {
+            "conftest.py": '''"""pytest configuration and fixtures."""
+
+import pytest
+from pytest_bdd import scenarios
+
+
+scenarios('features')
+
+
+@pytest.fixture
+def context():
+    """Shared test context."""
+    return {}
+
+
+def pytest_configure(config):
+    """Configure pytest."""
+    pass
+''',
+            "pytest.ini": '''[pytest]
+bdd_features_base_dir = features/
+''',
+        }
+
+    def generate_step_definitions(self, scenarios: List[str], feature_name: str = "features") -> str:
+        """Generate step definitions for given scenarios.
+        
+        Args:
+            scenarios: List of scenario texts.
+            feature_name: Name of the feature file.
+            
+        Returns:
+            Step definitions Python code.
+        """
+        step_defs = []
+
+        for scenario in scenarios:
+            lines = scenario.split("\n")
+            scenario_name = ""
+            for line in lines:
+                stripped = line.strip()
+                if stripped.startswith("Scenario:"):
+                    scenario_name = stripped[9:].strip().replace(" ", "_")
+                    break
+
+        for line in lines:
+            stripped = line.strip()
+            if stripped.startswith(("Given ", "When ", "Then ", "And ")):
+                step_text = " ".join(stripped.split()[1:])
+                step_def = stripped.split()[0].lower()
+
+                params = self._extract_parameters(step_text)
+                param_str = ", ".join(f'"{p}"' for p in params) if params else ""
+
+                if params:
+                    step_impl = f'''@pytest.{step_def}("{step_text}")
+def {step_def}_{scenario_name}({", ".join(params)}):
+    """{stripped.split()[0]} step implementation."""
+    pass
+'''
+                else:
+                    step_impl = f'''@{step_def}("{step_text}")
+def {step_def}_{scenario_name}():
+    """{stripped.split()[0]} step implementation."""
+    pass
+'''
+
+                step_defs.append(step_impl)
+
+        return "\n".join(step_defs)
+
+    def _extract_parameters(self, step_text: str) -> List[str]:
+        """Extract parameters from a step text."""
+        import re
+        return re.findall(r"<([^>]+)>", step_text)
--- a/src/nl2gherkin/gherkin/init.py
+++ b/src/nl2gherkin/gherkin/init.py
@@ -0,0 +1,11 @@
+"""Gherkin module for generating and parsing Gherkin syntax."""
+
+from nl2gherkin.gherkin.generator import GherkinGenerator
+from nl2gherkin.gherkin.parser import GherkinParser
+from nl2gherkin.gherkin.templates import GherkinTemplates
+
+__all__ = [
+    "GherkinGenerator",
+    "GherkinParser",
+    "GherkinTemplates",
+]
--- a/src/nl2gherkin/gherkin/generator.py
+++ b/src/nl2gherkin/gherkin/generator.py
@@ -0,0 +1,219 @@
+"""Gherkin generator for creating Gherkin syntax from analyzed requirements."""
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, List, Optional
+
+from nl2gherkin.nlp.analyzer import RequirementAnalysis
+
+
+class ScenarioType(str, Enum):
+    """Types of Gherkin scenarios."""
+    SCENARIO = "Scenario"
+    SCENARIO_OUTLINE = "Scenario Outline"
+
+
+@dataclass
+class GherkinStep:
+    """A single step in a Gherkin scenario."""
+    keyword: str
+    text: str
+
+
+@dataclass
+class GherkinScenario:
+    """A Gherkin scenario."""
+    name: str
+    scenario_type: ScenarioType = ScenarioType.SCENARIO
+    steps: List[GherkinStep] = field(default_factory=list)
+    examples: List[str] = field(default_factory=list)
+    tags: List[str] = field(default_factory=list)
+
+
+@dataclass
+class GherkinFeature:
+    """A Gherkin feature."""
+    name: str
+    description: Optional[str] = None
+    scenarios: List[GherkinScenario] = field(default_factory=list)
+    tags: List[str] = field(default_factory=list)
+    background: Optional[List[GherkinStep]] = None
+
+
+class GherkinGenerator:
+    """Generator for creating Gherkin syntax from requirements analysis."""
+
+    def __init__(self, parser: Optional[Any] = None) -> None:
+        """Initialize the generator.
+        
+        Args:
+            parser: Optional parser for validation.
+        """
+        self.parser = parser
+
+    def generate(self, analysis: RequirementAnalysis) -> str:
+        """Generate Gherkin from a requirement analysis.
+        
+        Args:
+            analysis: The analyzed requirement.
+            
+        Returns:
+            Gherkin formatted string.
+        """
+        feature = self._create_feature(analysis)
+        return self._render_feature(feature)
+
+    def generate_scenario(self, analysis: RequirementAnalysis) -> GherkinScenario:
+        """Generate a scenario from analysis.
+        
+        Args:
+            analysis: The analyzed requirement.
+            
+        Returns:
+            GherkinScenario object.
+        """
+        return self._create_scenario(analysis)
+
+    def _create_feature(self, analysis: RequirementAnalysis) -> GherkinFeature:
+        """Create a Gherkin feature from analysis."""
+        scenario = self._create_scenario(analysis)
+
+        feature_name = f"{analysis.actor or 'User'} {analysis.action or 'does something'}"
+
+        feature = GherkinFeature(
+            name=feature_name,
+            description=self._create_description(analysis),
+            scenarios=[scenario],
+        )
+
+        return feature
+
+    def _create_description(self, analysis: RequirementAnalysis) -> Optional[str]:
+        """Create a description from analysis."""
+        parts = []
+
+        if analysis.raw_text:
+            parts.append(analysis.raw_text)
+
+        if analysis.benefit:
+            parts.append(f"So that: {analysis.benefit}")
+
+        return " ".join(parts) if parts else None
+
+    def _create_scenario(self, analysis: RequirementAnalysis) -> GherkinScenario:
+        """Create a Gherkin scenario from analysis."""
+        steps: List[GherkinStep] = []
+
+        if analysis.condition:
+            steps.append(GherkinStep("Given", analysis.condition))
+
+        if analysis.actor:
+            given_text = f"a {analysis.actor}"
+            if analysis.target:
+                given_text += f" wants to interact with {analysis.target}"
+            elif analysis.action:
+                given_text += f" wants to {analysis.action}"
+            steps.append(GherkinStep("Given", given_text))
+
+        if analysis.action:
+            when_text = analysis.action
+            if analysis.target and not analysis.condition:
+                when_text = f"the {analysis.actor} {analysis.action} the {analysis.target}"
+            elif analysis.condition:
+                when_text = f"the {analysis.actor} {analysis.action} the {analysis.target}"
+            steps.append(GherkinStep("When", when_text))
+
+        if analysis.target:
+            then_text = f"the {analysis.target} should be {analysis.action}ed"
+            if analysis.action_type.value in ["read", "search", "filter"]:
+                then_text = f"the {analysis.target} should be displayed"
+            steps.append(GherkinStep("Then", then_text))
+
+        scenario_type = ScenarioType.SCENARIO
+        examples: List[str] = []
+
+        if analysis.variables:
+            scenario_type = ScenarioType.SCENARIO_OUTLINE
+            examples = self._create_examples(analysis)
+
+        scenario_name = self._create_scenario_name(analysis)
+
+        return GherkinScenario(
+            name=scenario_name,
+            scenario_type=scenario_type,
+            steps=steps,
+            examples=examples,
+        )
+
+    def _create_scenario_name(self, analysis: RequirementAnalysis) -> str:
+        """Create a scenario name from analysis."""
+        parts = []
+
+        if analysis.actor:
+            parts.append(analysis.actor.capitalize())
+
+        if analysis.action:
+            parts.append(analysis.action.capitalize())
+
+        if analysis.target:
+            target_name = analysis.target.split()[-1] if analysis.target else ""
+            parts.append(target_name.capitalize())
+
+        return " ".join(parts) if parts else "Sample Scenario"
+
+    def _create_examples(self, analysis: RequirementAnalysis) -> List[str]:
+        """Create Examples table from variables."""
+        if not analysis.variables:
+            return []
+
+        headers = list(analysis.variables.keys())
+        header_row = "| " + " | ".join(headers) + " |"
+
+        example_rows: List[str] = []
+        if analysis.examples:
+            for example in analysis.examples:
+                if isinstance(example, dict):
+                    row_values = [str(example.get(h, "")) for h in headers]
+                else:
+                    row_values = [str(example)]
+                row = "| " + " | ".join(row_values) + " |"
+                example_rows.append(row)
+        else:
+            default_row = "| " + " | ".join(["value"] * len(headers)) + " |"
+            example_rows.append(default_row)
+
+        return [header_row] + example_rows
+
+    def _render_feature(self, feature: GherkinFeature) -> str:
+        """Render a GherkinFeature to string."""
+        lines: List[str] = []
+
+        for tag in feature.tags:
+            lines.append(f"@{tag}")
+
+        lines.append(f"Feature: {feature.name}")
+
+        if feature.description:
+            lines.append(f"  {feature.description}")
+
+        if feature.background:
+            lines.append("  Background:")
+            for step in feature.background:
+                lines.append(f"    {step.keyword} {step.text}")
+
+        for scenario in feature.scenarios:
+            lines.append("")
+            for tag in scenario.tags:
+                lines.append(f"  @{tag}")
+
+            lines.append(f"  {scenario.scenario_type.value}: {scenario.name}")
+
+            for step in scenario.steps:
+                lines.append(f"    {step.keyword} {step.text}")
+
+            if scenario.examples:
+                lines.append("    Examples:")
+                for example in scenario.examples:
+                    lines.append(f"      {example}")
+
+        return "\n".join(lines)
--- a/src/nl2gherkin/gherkin/parser.py
+++ b/src/nl2gherkin/gherkin/parser.py
@@ -0,0 +1,167 @@
+"""Gherkin parser for validation."""
+
+import re
+from typing import List, Optional, Tuple
+
+
+class GherkinParser:
+    """Parser and validator for Gherkin syntax."""
+
+    def __init__(self) -> None:
+        """Initialize the Gherkin parser."""
+        pass
+
+    def parse(self, content: str) -> dict:
+        """Parse Gherkin content into an AST.
+        
+        Args:
+            content: The Gherkin content to parse.
+            
+        Returns:
+            Dictionary representing the Gherkin AST.
+        """
+        lines = content.strip().split("\n")
+        ast: dict = {
+            "feature": None,
+            "scenarios": [],
+        }
+
+        current_section = None
+        scenario: Optional[dict] = None
+
+        for i, line in enumerate(lines):
+            stripped = line.strip()
+
+            if stripped.startswith("Feature:"):
+                ast["feature"] = {
+                    "name": stripped[8:].strip(),
+                    "description": "",
+                    "line": i,
+                }
+            elif stripped.startswith("Scenario:"):
+                if scenario:
+                    ast["scenarios"].append(scenario)
+                scenario = {
+                    "name": stripped[9:].strip(),
+                    "type": "Scenario",
+                    "steps": [],
+                    "line": i,
+                }
+            elif stripped.startswith("Scenario Outline:"):
+                if scenario:
+                    ast["scenarios"].append(scenario)
+                scenario = {
+                    "name": stripped[17:].strip(),
+                    "type": "Scenario Outline",
+                    "steps": [],
+                    "line": i,
+                }
+            elif stripped.startswith("Given ") or stripped.startswith("When ") or \
+                 stripped.startswith("Then ") or stripped.startswith("And ") or \
+                 stripped.startswith("But "):
+                if scenario:
+                    scenario["steps"].append({
+                        "keyword": stripped.split()[0],
+                        "text": " ".join(stripped.split()[1:]),
+                        "line": i,
+                    })
+            elif stripped.startswith("Examples:"):
+                if scenario:
+                    scenario["has_examples"] = True
+
+        if scenario:
+            ast["scenarios"].append(scenario)
+
+        return ast
+
+    def validate(self, content: str) -> Tuple[bool, List[str]]:
+        """Validate Gherkin syntax.
+        
+        Args:
+            content: The Gherkin content to validate.
+            
+        Returns:
+            Tuple of (is_valid, list_of_errors).
+        """
+        errors: List[str] = []
+
+        if not content.strip():
+            return False, ["Empty content"]
+
+        lines = content.strip().split("\n")
+
+        if not lines[0].strip().startswith("Feature:"):
+            return False, ["Gherkin must start with 'Feature:'"]
+
+        has_scenario = any(
+            line.strip().startswith("Scenario:") or
+            line.strip().startswith("Scenario Outline:")
+            for line in lines
+        )
+
+        if not has_scenario:
+            return False, ["Feature must have at least one Scenario"]
+
+        try:
+            self.parse(content)
+        except Exception as e:
+            error_msg = str(e)
+            line_match = _extract_line_number(error_msg)
+            if line_match:
+                errors.append(f"Line {line_match}: {error_msg}")
+            else:
+                errors.append(f"Validation error: {error_msg}")
+            return False, errors
+
+        for i, line in enumerate(lines):
+            stripped = line.strip()
+
+            if stripped.startswith("Examples:") and not any(
+                "Scenario Outline" in l for l in lines[:i]
+            ):
+                errors.append(f"Line {i + 1}: Examples table can only be used with Scenario Outline")
+
+        for i, line in enumerate(lines):
+            stripped = line.strip()
+            if stripped and not stripped.startswith(("Feature:", "Scenario", "Given ", "When ",
+                "Then ", "And ", "But ", "Background:", "Examples:", "|", "@", "  ")):
+                if not stripped.startswith("#"):
+                    if i > 0 and lines[i-1].strip().endswith(":"):
+                        continue
+                    pass
+
+        if errors:
+            return False, errors
+
+        return True, []
+
+    def validate_feature(self, feature_content: str) -> Tuple[bool, List[str]]:
+        """Validate a single feature.
+        
+        Args:
+            feature_content: The feature content to validate.
+            
+        Returns:
+            Tuple of (is_valid, list_of_errors).
+        """
+        if not feature_content.strip().startswith("Feature:"):
+            return False, ["Content must start with 'Feature:'"]
+
+        return self.validate(feature_content)
+
+
+def _extract_line_number(error_msg: str) -> Optional[int]:
+    """Extract line number from error message."""
+    patterns = [
+        r"line\s+(\d+)",
+        r"(\d+):",
+        r"row\s+(\d+)",
+        r"line\s+(\d+)",
+    ]
+
+    for pattern in patterns:
+        match = re.search(pattern, error_msg, re.IGNORECASE)
+        if match:
+            return int(match.group(1))
+
+    return None
--- a/src/nl2gherkin/gherkin/templates.py
+++ b/src/nl2gherkin/gherkin/templates.py
@@ -0,0 +1,105 @@
+"""Gherkin templates for formatting output."""
+
+from typing import Any, Optional
+
+
+class GherkinTemplates:
+    """Templates for Gherkin syntax generation."""
+
+    FEATURE = """Feature: {name}
+  {description}
+"""
+
+    SCENARIO = """  Scenario: {name}
+    Given {given}
+    When {when}
+    Then {then}
+"""
+
+    SCENARIO_OUTLINE = """  Scenario Outline: {name}
+    Given {given}
+    When {when}
+    Then {then}
+    Examples:
+      | {variables} |
+"""
+
+    BACKGROUND = """  Background:
+"""
+
+    STEP_GIVEN = "    Given {text}"
+    STEP_WHEN = "    When {text}"
+    STEP_THEN = "    Then {text}"
+    STEP_AND = "    And {text}"
+    STEP_BUT = "    But {text}"
+
+    EXAMPLES_TABLE = """    Examples:
+      | {header} |
+      | {row} |
+"""
+
+    @staticmethod
+    def format_feature(name: str, description: str = "") -> str:
+        """Format a feature."""
+        if description:
+            return GherkinTemplates.FEATURE.format(name=name, description=description)
+        return f"Feature: {name}\n"
+
+    @staticmethod
+    def format_scenario(
+        name: str,
+        given: str,
+        when: str,
+        then: str,
+        additional_steps: Optional[list] = None,
+    ) -> str:
+        """Format a scenario."""
+        lines = [f"  Scenario: {name}"]
+        lines.append(f"    Given {given}")
+        lines.append(f"    When {when}")
+        lines.append(f"    Then {then}")
+
+        if additional_steps:
+            for step in additional_steps:
+                lines.append(f"    And {step}")
+
+        return "\n".join(lines)
+
+    @staticmethod
+    def format_scenario_outline(
+        name: str,
+        given: str,
+        when: str,
+        then: str,
+        variables: list,
+        examples: list,
+    ) -> str:
+        """Format a scenario outline with examples."""
+        lines = [f"  Scenario Outline: {name}"]
+        lines.append(f"    Given {given}")
+        lines.append(f"    When {when}")
+        lines.append(f"    Then {then}")
+        lines.append("    Examples:")
+
+        header = " | ".join(variables)
+        lines.append(f"      | {header} |")
+
+        for row in examples:
+            row_str = " | ".join(str(v) for v in row)
+            lines.append(f"      | {row_str} |")
+
+        return "\n".join(lines)
+
+    @staticmethod
+    def format_step(keyword: str, text: str) -> str:
+        """Format a single step."""
+        templates = {
+            "Given": GherkinTemplates.STEP_GIVEN,
+            "When": GherkinTemplates.STEP_WHEN,
+            "Then": GherkinTemplates.STEP_THEN,
+            "And": GherkinTemplates.STEP_AND,
+            "But": GherkinTemplates.STEP_BUT,
+        }
+
+        template = templates.get(keyword, GherkinTemplates.STEP_GIVEN)
+        return template.format(text=text)
--- a/src/nl2gherkin/main.py
+++ b/src/nl2gherkin/main.py
@@ -0,0 +1,14 @@
+"""Main entry point for the NL2Gherkin CLI."""
+
+
+
+from nl2gherkin.cli.commands import cli
+
+
+def main() -> None:
+    """Entry point for the CLI."""
+    cli()
+
+
+if __name__ == "__main__":
+    main()
--- a/src/nl2gherkin/nlp/init.py
+++ b/src/nl2gherkin/nlp/init.py
@@ -0,0 +1,12 @@
+"""NLP module for natural language processing."""
+
+from nl2gherkin.nlp.ambiguity import AmbiguityDetector, AmbiguityWarning
+from nl2gherkin.nlp.analyzer import NLPAnalyzer
+from nl2gherkin.nlp.patterns import RequirementPattern
+
+__all__ = [
+    "NLPAnalyzer",
+    "AmbiguityDetector",
+    "AmbiguityWarning",
+    "RequirementPattern",
+]
--- a/src/nl2gherkin/nlp/ambiguity.py
+++ b/src/nl2gherkin/nlp/ambiguity.py
@@ -0,0 +1,232 @@
+"""Ambiguity detection for requirements."""
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, List, Optional
+
+
+class AmbiguityType(str, Enum):
+    """Types of ambiguity in requirements."""
+    PRONOUN = "pronoun"
+    VAGUE_QUANTIFIER = "vague_quantifier"
+    TEMPORAL = "temporal"
+    MISSING_CONDITION = "missing_condition"
+    UNCLEAR_REFERENCE = "unclear_reference"
+    PASSIVE_VOICE = "passive_voice"
+    UNDEFINED_TERM = "undefined_term"
+
+
+@dataclass
+class AmbiguityWarning:
+    """A warning about ambiguous language in a requirement."""
+    type: AmbiguityType
+    message: str
+    position: int = 0
+    length: int = 0
+    suggestion: Optional[str] = None
+    severity: str = "medium"
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "type": self.type.value,
+            "message": self.message,
+            "position": self.position,
+            "length": self.length,
+            "suggestion": self.suggestion,
+            "severity": self.severity,
+        }
+
+
+class AmbiguityDetector:
+    """Detector for ambiguous language in requirements."""
+
+    PRONOUNS = {
+        "it", "they", "them", "he", "she", "this", "that", "these", "those",
+        "its", "their", "his", "her", "which", "what", "who", "whom",
+    }
+
+    VAGUE_QUANTIFIERS = {
+        "some", "many", "few", "several", "various", "multiple", "somewhat",
+        "roughly", "approximately", "generally", "usually", "often", "sometimes",
+        "occasionally", "maybe", "possibly", "probably", "likely",
+    }
+
+    TEMPORAL_AMBIGUITIES = {
+        "soon", "later", "eventually", "eventually", "currently", "presently",
+        "before long", "in the future", "at some point", "eventually",
+    }
+
+    CONDITIONAL_KEYWORDS = {
+        "if", "when", "unless", "provided", "given", "assuming", "while",
+    }
+
+    def detect(self, text: str) -> List[AmbiguityWarning]:
+        """Detect ambiguities in the given text.
+        
+        Args:
+            text: The requirement text to analyze.
+            
+        Returns:
+            List of ambiguity warnings.
+        """
+        warnings: List[AmbiguityWarning] = []
+
+        warnings.extend(self._detect_pronouns(text))
+        warnings.extend(self._detect_vague_quantifiers(text))
+        warnings.extend(self._detect_temporal_ambiguities(text))
+        warnings.extend(self._detect_missing_conditions(text))
+        warnings.extend(self._detect_passive_voice(text))
+
+        return warnings
+
+    def _detect_pronouns(self, text: str) -> List[AmbiguityWarning]:
+        """Detect pronoun usage that may be ambiguous."""
+        warnings: List[AmbiguityWarning] = []
+
+        words = text.split()
+
+        for i, word in enumerate(words):
+            clean_word = word.strip(".,!?;:\"'()[]{}").lower()
+            if clean_word in self.PRONOUNS and len(clean_word) > 2:
+                pos = text.find(word)
+                warnings.append(
+                    AmbiguityWarning(
+                        type=AmbiguityType.PRONOUN,
+                        message=f"Pronoun '{word}' may have unclear antecedent",
+                        position=pos,
+                        length=len(word),
+                        suggestion=f"Replace '{word}' with the specific noun it refers to",
+                        severity="low",
+                    )
+                )
+
+        return warnings
+
+    def _detect_vague_quantifiers(self, text: str) -> List[AmbiguityWarning]:
+        """Detect vague quantifiers that lack precision."""
+        warnings: List[AmbiguityWarning] = []
+
+        words = text.split()
+
+        for i, word in enumerate(words):
+            clean_word = word.strip(".,!?;:\"'()[]{}").lower()
+            if clean_word in self.VAGUE_QUANTIFIERS:
+                pos = text.find(word)
+
+                if clean_word in {"some", "few", "several"}:
+                    suggestion = f"Specify an exact number or range for '{word}'"
+                elif clean_word in {"many", "multiple", "various"}:
+                    suggestion = f"Specify a count or percentage for '{word}'"
+                elif clean_word in {"approximately", "roughly"}:
+                    suggestion = "Replace with a specific value or range"
+                else:
+                    suggestion = "Provide more specific criteria"
+
+                warnings.append(
+                    AmbiguityWarning(
+                        type=AmbiguityType.VAGUE_QUANTIFIER,
+                        message=f"Vague quantifier '{word}' lacks precision",
+                        position=pos,
+                        length=len(word),
+                        suggestion=suggestion,
+                        severity="medium",
+                    )
+                )
+
+        return warnings
+
+    def _detect_temporal_ambiguities(self, text: str) -> List[AmbiguityWarning]:
+        """Detect temporal ambiguities in the text."""
+        warnings: List[AmbiguityWarning] = []
+
+        words = text.split()
+
+        for i, word in enumerate(words):
+            clean_word = word.strip(".,!?;:\"'()[]{}").lower()
+            if clean_word in self.TEMPORAL_AMBIGUITIES:
+                pos = text.find(word)
+                warnings.append(
+                    AmbiguityWarning(
+                        type=AmbiguityType.TEMPORAL,
+                        message=f"Temporal term '{word}' is ambiguous",
+                        position=pos,
+                        length=len(word),
+                        suggestion=f"Specify an exact time, deadline, or condition for '{word}'",
+                        severity="low",
+                    )
+                )
+
+        return warnings
+
+    def _detect_missing_conditions(self, text: str) -> List[AmbiguityWarning]:
+        """Detect potential missing conditions in requirements."""
+        warnings: List[AmbiguityWarning] = []
+
+        import re
+
+        has_conditional = any(
+            re.search(r"\b" + kw + r"\b", text, re.IGNORECASE)
+            for kw in self.CONDITIONAL_KEYWORDS
+        )
+
+        action_patterns = [
+            r"\bmust\b", r"\bshall\b", r"\bshould\b", r"\bwill\b",
+            r"\bcan\b", r"\benable\b", r"\ballow\b",
+        ]
+
+        has_action = any(
+            re.search(pattern, text, re.IGNORECASE)
+            for pattern in action_patterns
+        )
+
+        if has_action and not has_conditional:
+            action_match = re.search(
+                r"(must|shall|should|will|can|enable|allow)\s+\w+",
+                text,
+                re.IGNORECASE
+            )
+            if action_match:
+                warnings.append(
+                    AmbiguityWarning(
+                        type=AmbiguityType.MISSING_CONDITION,
+                        message="No explicit condition found for this action",
+                        position=action_match.start(),
+                        length=len(action_match.group()),
+                        suggestion="Add a condition using 'when', 'if', 'after', or 'before'",
+                        severity="medium",
+                    )
+                )
+
+        return warnings
+
+    def _detect_passive_voice(self, text: str) -> List[AmbiguityWarning]:
+        """Detect passive voice usage."""
+        warnings: List[AmbiguityWarning] = []
+
+        import re
+
+        passive_patterns = [
+            r"\bwas\s+\w+ed\b",
+            r"\bwere\s+\w+ed\b",
+            r"\bhas\s+been\s+\w+ed\b",
+            r"\bhave\s+been\s+\w+ed\b",
+            r"\bwill\s+be\s+\w+ed\b",
+            r"\bby\s+the\s+\w+\b",
+        ]
+
+        for pattern in passive_patterns:
+            matches = list(re.finditer(pattern, text, re.IGNORECASE))
+            for match in matches:
+                warnings.append(
+                    AmbiguityWarning(
+                        type=AmbiguityType.PASSIVE_VOICE,
+                        message=f"Passive voice detected: '{match.group()}'",
+                        position=match.start(),
+                        length=match.end() - match.start(),
+                        suggestion="Consider rewriting in active voice for clarity",
+                        severity="low",
+                    )
+                )
+
+        return warnings
--- a/src/nl2gherkin/nlp/analyzer.py
+++ b/src/nl2gherkin/nlp/analyzer.py
@@ -0,0 +1,237 @@
+"""NLP analyzer for extracting structured information from requirements."""
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+
+import spacy
+from spacy.tokens import Doc
+
+if TYPE_CHECKING:
+    from nl2gherkin.nlp.ambiguity import AmbiguityWarning
+
+
+class ActorType(str, Enum):
+    """Types of actors in requirements."""
+    USER = "user"
+    SYSTEM = "system"
+    ADMIN = "admin"
+    API = "api"
+    UNKNOWN = "unknown"
+
+
+class ActionType(str, Enum):
+    """Types of actions in requirements."""
+    CREATE = "create"
+    READ = "read"
+    UPDATE = "update"
+    DELETE = "delete"
+    VALIDATE = "validate"
+    SEND = "send"
+    RECEIVE = "receive"
+    LOGIN = "login"
+    LOGOUT = "logout"
+    SEARCH = "search"
+    FILTER = "filter"
+    EXPORT = "export"
+    IMPORT = "import"
+    UNKNOWN = "unknown"
+
+
+@dataclass
+class RequirementAnalysis:
+    """Structured analysis of a requirement."""
+    raw_text: str
+    actor: Optional[str] = None
+    actor_type: ActorType = ActorType.UNKNOWN
+    action: Optional[str] = None
+    action_type: ActionType = ActionType.UNKNOWN
+    target: Optional[str] = None
+    condition: Optional[str] = None
+    benefit: Optional[str] = None
+    examples: List[str] = field(default_factory=list)
+    variables: Dict[str, str] = field(default_factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "raw_text": self.raw_text,
+            "actor": self.actor,
+            "actor_type": self.actor_type.value,
+            "action": self.action,
+            "action_type": self.action_type.value,
+            "target": self.target,
+            "condition": self.condition,
+            "benefit": self.benefit,
+            "examples": self.examples,
+            "variables": self.variables,
+        }
+
+
+class NLPAnalyzer:
+    """Analyzer for natural language requirements."""
+
+    def __init__(self, model: str = "en_core_web_sm"):
+        """Initialize the analyzer with a spaCy model.
+        
+        Args:
+            model: spaCy model name. Defaults to en_core_web_sm.
+        """
+        try:
+            self.nlp = spacy.load(model)
+        except OSError:
+            import subprocess
+            subprocess.run(
+                ["python", "-m", "spacy", "download", model],
+                check=True,
+            )
+            self.nlp = spacy.load(model)
+
+    def analyze(self, text: str) -> RequirementAnalysis:
+        """Analyze a requirement text and extract structured information.
+        
+        Args:
+            text: The natural language requirement text.
+            
+        Returns:
+            RequirementAnalysis with extracted components.
+        """
+        doc = self.nlp(text)
+        analysis = RequirementAnalysis(raw_text=text)
+
+        self._extract_user_story(doc, analysis)
+        self._extract_subject(doc, analysis)
+        self._extract_action(doc, analysis)
+        self._extract_object(doc, analysis)
+        self._extract_condition(doc, analysis)
+        self._extract_variables(doc, analysis)
+
+        return analysis
+
+    def _extract_user_story(self, doc: Doc, analysis: RequirementAnalysis) -> None:
+        """Extract user story format: As a [role], I want [feature] so that [benefit]."""
+        import re
+
+        user_story_patterns = [
+            (r"as\s+a\s+(.*?),\s*i\s+want\s+(.*?)(?:\s+so\s+that\s+(.*))?$", True),
+            (r"as\s+an\s+(.*?),\s*i\s+want\s+(.*?)(?:\s+so\s+that\s+(.*))?$", True),
+            (r"as\s+a\s+(.*?)\s+i\s+can\s+(.*?)$", False),
+        ]
+
+        for pattern, has_benefit in user_story_patterns:
+            match = re.search(pattern, doc.text, re.IGNORECASE)
+            if match:
+                groups = match.groups()
+                analysis.actor = groups[0].strip()
+
+                if has_benefit and len(groups) >= 3 and groups[2]:
+                    analysis.benefit = groups[2].strip()
+                    analysis.action = groups[1].strip()
+                else:
+                    feature_text = groups[1].strip()
+                    parts = feature_text.split(" to ")
+                    if len(parts) > 1:
+                        analysis.action = parts[1].strip()
+                    else:
+                        analysis.action = feature_text
+                break
+
+    def _extract_subject(self, doc: Doc, analysis: RequirementAnalysis) -> None:
+        """Extract the subject/actor from the requirement."""
+        if analysis.actor:
+            return
+
+        for token in doc:
+            if token.dep_ == "nsubj" or token.dep_ == "nsubjpass":
+                if not analysis.actor:
+                    analysis.actor = token.text
+
+                    actor_lower = token.text.lower()
+                    if any(x in actor_lower for x in ["user", "customer", "visitor"]):
+                        analysis.actor_type = ActorType.USER
+                    elif any(x in actor_lower for x in ["admin", "administrator"]):
+                        analysis.actor_type = ActorType.ADMIN
+                    elif any(x in actor_lower for x in ["system", "app", "application"]):
+                        analysis.actor_type = ActorType.SYSTEM
+                    elif any(x in actor_lower for x in ["api", "service", "endpoint"]):
+                        analysis.actor_type = ActorType.API
+                break
+
+    def _extract_action(self, doc: Doc, analysis: RequirementAnalysis) -> None:
+        """Extract the action/verb from the requirement."""
+        if analysis.action:
+            return
+
+        action_keywords = {
+            "create": ["create", "add", "make", "generate", "produce"],
+            "read": ["view", "see", "display", "show", "list", "retrieve", "get"],
+            "update": ["update", "edit", "modify", "change", "alter"],
+            "delete": ["delete", "remove", "destroy", "cancel"],
+            "validate": ["validate", "verify", "check", "confirm", "ensure"],
+            "send": ["send", "submit", "post", "push", "dispatch"],
+            "receive": ["receive", "get", "fetch", "pull"],
+            "login": ["login", "sign in", "log in", "authenticate"],
+            "logout": ["logout", "sign out", "log out"],
+            "search": ["search", "find", "look for", "query"],
+            "filter": ["filter", "narrow", "refine"],
+            "export": ["export", "download", "save"],
+            "import": ["import", "upload", "load"],
+        }
+
+        for token in doc:
+            token_lower = token.text.lower()
+            for action_type, keywords in action_keywords.items():
+                if token_lower in keywords:
+                    analysis.action_type = ActionType(action_type)
+                    if not analysis.action:
+                        analysis.action = token.text
+                    break
+
+    def _extract_object(self, doc: Doc, analysis: RequirementAnalysis) -> None:
+        """Extract the object/target from the requirement."""
+        if analysis.target:
+            return
+
+        for token in doc:
+            if token.dep_ in ["dobj", "pobj", "attr"]:
+                if not analysis.target:
+                    subtree_tokens = list(token.subtree)
+                    analysis.target = " ".join([t.text for t in subtree_tokens])
+                break
+
+    def _extract_condition(self, doc: Doc, analysis: RequirementAnalysis) -> None:
+        """Extract conditions from the requirement."""
+        condition_markers = ["if", "when", "after", "before", "during", "while"]
+
+        for i, token in enumerate(doc):
+            if token.text.lower() in condition_markers:
+                remaining_tokens = list(doc[i + 1 :])
+                if remaining_tokens:
+                    analysis.condition = " ".join([t.text for t in remaining_tokens[:15]])
+                break
+
+    def _extract_variables(self, doc: Doc, analysis: RequirementAnalysis) -> None:
+        """Extract variables/parameters from the requirement."""
+        import re
+
+        pattern = r"<(\w+)>"
+        matches = re.findall(pattern, doc.text)
+
+        for var in matches:
+            analysis.variables[var] = "string"
+            if var not in analysis.examples:
+                analysis.examples.append(var)
+
+    def analyze_ambiguity(self, text: str) -> "list[AmbiguityWarning]":
+        """Analyze text for ambiguous language.
+        
+        Args:
+            text: The text to analyze.
+            
+        Returns:
+            List of ambiguity warnings.
+        """
+        from nl2gherkin.nlp.ambiguity import AmbiguityDetector
+
+        detector = AmbiguityDetector()
+        return detector.detect(text)  # type: ignore[return-value]
--- a/src/nl2gherkin/nlp/patterns.py
+++ b/src/nl2gherkin/nlp/patterns.py
@@ -0,0 +1,112 @@
+"""Pattern definitions for requirement parsing."""
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import List, Optional
+
+
+class PatternType(str, Enum):
+    """Types of requirement patterns."""
+    USER_STORY = "user_story"
+    SCENARIO = "scenario"
+    ACCEPTANCE_CRITERIA = "acceptance_criteria"
+    BUSINESS_RULE = "business_rule"
+    GENERAL = "general"
+
+
+@dataclass
+class RequirementPattern:
+    """A pattern for matching requirements."""
+    name: str
+    pattern: str
+    pattern_type: PatternType
+    priority: int = 0
+    description: Optional[str] = None
+
+    def matches(self, text: str) -> bool:
+        """Check if the text matches this pattern."""
+        import re
+        return bool(re.search(self.pattern, text, re.IGNORECASE))
+
+
+USER_STORY_PATTERNS = [
+    RequirementPattern(
+        name="classic_user_story",
+        pattern=r"as\s+a?\s*(?:user|role|customer|visitor|admin|sys(?:tem)?)\b.*,\s*i\s+(?:want|can|would like to)\b",
+        pattern_type=PatternType.USER_STORY,
+        priority=10,
+        description="Classic user story format",
+    ),
+    RequirementPattern(
+        name="short_user_story",
+        pattern=r"as\s+(?:an?)\s+\w+\s*,\s*\w+.*",
+        pattern_type=PatternType.USER_STORY,
+        priority=5,
+        description="Short user story format",
+    ),
+]
+
+SCENARIO_PATTERNS = [
+    RequirementPattern(
+        name="if_when_then",
+        pattern=r"(?:if|when)\s+.*\s+then\s+",
+        pattern_type=PatternType.SCENARIO,
+        priority=10,
+        description="If-When-Then scenario format",
+    ),
+    RequirementPattern(
+        name="given_when_then",
+        pattern=r"(?:given|when|then)\s+.*",
+        pattern_type=PatternType.SCENARIO,
+        priority=5,
+        description="Given-When-Then format",
+    ),
+]
+
+ACCEPTANCE_CRITERIA_PATTERNS = [
+    RequirementPattern(
+        name="bullet_points",
+        pattern=r"(?:^|\n)\s*[-*•]\s+",
+        pattern_type=PatternType.ACCEPTANCE_CRITERIA,
+        priority=10,
+        description="Bullet point format",
+    ),
+    RequirementPattern(
+        name="numbered_list",
+        pattern=r"(?:^|\n)\s*\d+[.)]\s+",
+        pattern_type=PatternType.ACCEPTANCE_CRITERIA,
+        priority=10,
+        description="Numbered list format",
+    ),
+]
+
+
+def get_patterns_by_type(pattern_type: PatternType) -> List[RequirementPattern]:
+    """Get all patterns of a specific type."""
+    all_patterns = USER_STORY_PATTERNS + SCENARIO_PATTERNS + ACCEPTANCE_CRITERIA_PATTERNS
+    return [p for p in all_patterns if p.pattern_type == pattern_type]
+
+
+def detect_pattern_type(text: str) -> Optional[PatternType]:
+    """Detect the type of requirement based on patterns."""
+    for pattern_type in PatternType:
+        patterns = get_patterns_by_type(pattern_type)
+        for pattern in patterns:
+            if pattern.matches(text):
+                return pattern_type
+    return PatternType.GENERAL
+
+
+def find_best_matching_pattern(text: str) -> Optional[RequirementPattern]:
+    """Find the best matching pattern for the text."""
+    all_patterns = USER_STORY_PATTERNS + SCENARIO_PATTERNS + ACCEPTANCE_CRITERIA_PATTERNS
+    best_match = None
+    highest_priority = -1
+
+    for pattern in all_patterns:
+        if pattern.matches(text):
+            if pattern.priority > highest_priority:
+                highest_priority = pattern.priority
+                best_match = pattern
+
+    return best_match