prod-end-2026/backend/app/services/execution_service.py

from __future__ import annotations

import asyncio
import json
import os
import re
from urllib.parse import urlparse
import uuid
from datetime import datetime, timezone
from typing import Any

import httpx
from redis.asyncio import Redis
from sqlalchemy.ext.asyncio import AsyncSession

from app.core.database.session import SessionLocal
from app.models import (
    Action,
    Capability,
    ExecutionRun,
    ExecutionRunStatus,
    ExecutionStepRun,
    ExecutionStepStatus,
    HttpMethod,
    Pipeline,
    PipelineStatus,
)
from app.models.capability import CapabilityType
from app.utils.business_logger import log_business_event


class ExecutionServiceError(Exception):
    pass


class StepExecutionError(ExecutionServiceError):
    def __init__(self, message: str, response_snapshot: dict[str, Any] | None = None) -> None:
        super().__init__(message)
        self.response_snapshot = response_snapshot


class RunContextStore:
    _memory_fallback: dict[str, dict[str, Any]] = {}

    def __init__(self, redis_url: str | None = None, *, ttl_seconds: int = 24 * 60 * 60) -> None:
        self.redis_url = redis_url or os.getenv("REDIS_URL")
        self.ttl_seconds = ttl_seconds
        self._redis: Redis | None = None
        self._redis_disabled = False

    async def load_context(self, run_id: uuid.UUID) -> dict[str, Any]:
        key = self._build_key(run_id)
        redis = await self._get_redis()
        if redis is not None:
            raw = await redis.get(key)
            if isinstance(raw, str) and raw.strip():
                try:
                    payload = json.loads(raw)
                    if isinstance(payload, dict):
                        return payload
                except json.JSONDecodeError:
                    pass

        cached = self._memory_fallback.get(key)
        if isinstance(cached, dict):
            return cached

        return {}

    async def save_context(self, run_id: uuid.UUID, context: dict[str, Any]) -> None:
        key = self._build_key(run_id)
        redis = await self._get_redis()
        if redis is not None:
            await redis.set(key, json.dumps(context, ensure_ascii=False, default=str), ex=self.ttl_seconds)
        self._memory_fallback[key] = context

    def _build_key(self, run_id: uuid.UUID) -> str:
        return f"execution:{run_id}:context"

    async def _get_redis(self) -> Redis | None:
        if self._redis_disabled or not self.redis_url:
            return None
        if self._redis is not None:
            return self._redis

        try:
            self._redis = Redis.from_url(self.redis_url, encoding="utf-8", decode_responses=True)
            await self._redis.ping()
            return self._redis
        except Exception:
            self._redis_disabled = True
            self._redis = None
            return None


class ExecutionService:
    ACTIVE_TASKS: set[asyncio.Task[Any]] = set()

    def __init__(self, session: AsyncSession, *, context_store: RunContextStore | None = None) -> None:
        self.session = session
        self.context_store = context_store or RunContextStore()

    async def create_run(
        self,
        *,
        pipeline_id: uuid.UUID,
        inputs: dict[str, Any] | None = None,
        initiated_by: uuid.UUID | None = None,
    ) -> ExecutionRun:
        pipeline = await self.session.get(Pipeline, pipeline_id)
        if pipeline is None:
            raise ExecutionServiceError("Pipeline not found")
        if pipeline.status != PipelineStatus.READY:
            raise ExecutionServiceError("Pipeline is not ready for execution")

        run = ExecutionRun(
            pipeline_id=pipeline_id,
            initiated_by=initiated_by,
            status=ExecutionRunStatus.QUEUED,
            inputs=inputs or {},
        )
        self.session.add(run)
        await self.session.commit()
        await self.session.refresh(run)

        await self.context_store.save_context(run.id, self._build_empty_context())
        log_business_event(
            "execution_run_queued",
            run_id=str(run.id),
            pipeline_id=str(run.pipeline_id),
            user_id=str(initiated_by) if initiated_by is not None else None,
            inputs_count=len(run.inputs or {}),
        )
        return run

    @classmethod
    def start_background_execution(cls, run_id: uuid.UUID) -> None:
        task = asyncio.create_task(cls._run_in_background(run_id))
        cls.ACTIVE_TASKS.add(task)
        task.add_done_callback(cls.ACTIVE_TASKS.discard)

    @classmethod
    async def _run_in_background(cls, run_id: uuid.UUID) -> None:
        async with SessionLocal() as session:
            service = cls(session)
            await service.execute_run(run_id)

    async def execute_run(self, run_id: uuid.UUID) -> None:
        run = await self.session.get(ExecutionRun, run_id)
        if run is None:
            log_business_event(
                "execution_run_rejected",
                run_id=str(run_id),
                reason="run_not_found",
            )
            raise ExecutionServiceError("Execution run not found")

        pipeline = await self.session.get(Pipeline, run.pipeline_id)
        if pipeline is None:
            run.status = ExecutionRunStatus.FAILED
            run.error = "Pipeline not found"
            run.finished_at = self._now_utc()
            await self.session.commit()
            log_business_event(
                "execution_run_failed",
                run_id=str(run.id),
                pipeline_id=str(run.pipeline_id),
                reason="pipeline_not_found",
            )
            return

        try:
            node_by_step, edges, edges_by_target, edges_by_source = self._normalize_graph(pipeline.nodes, pipeline.edges)
            ordered_steps = self._topological_sort(list(node_by_step.keys()), edges)
            if not ordered_steps:
                raise ExecutionServiceError("Pipeline graph has no executable steps")
        except Exception as exc:
            run.status = ExecutionRunStatus.FAILED
            run.error = f"Invalid pipeline graph: {exc}"
            run.finished_at = self._now_utc()
            await self.session.commit()
            log_business_event(
                "execution_run_failed",
                run_id=str(run.id),
                pipeline_id=str(run.pipeline_id),
                reason="invalid_pipeline_graph",
                details=str(exc),
            )
            return

        run.status = ExecutionRunStatus.RUNNING
        run.started_at = self._now_utc()
        run.error = None
        run.summary = None
        await self.session.commit()
        log_business_event(
            "execution_run_started",
            run_id=str(run.id),
            pipeline_id=str(run.pipeline_id),
            user_id=str(run.initiated_by) if run.initiated_by is not None else None,
            total_steps=len(ordered_steps),
        )

        context = await self.context_store.load_context(run.id)
        context = self._normalize_context(context)
        step_outputs = context["step_outputs"]
        edge_values = context["edge_values"]
        await self.context_store.save_context(run.id, context)

        status_by_step: dict[int, ExecutionStepStatus] = {}
        succeeded_count = 0
        failed_count = 0
        skipped_count = 0

        for index, step in enumerate(ordered_steps):
            node = node_by_step.get(step)
            if node is None:
                continue
            request_payload: dict[str, Any] = {}
            incoming = edges_by_target.get(step, [])

            step_run = self._create_step_run_from_node(run.id, node, status=ExecutionStepStatus.RUNNING)
            step_run.started_at = self._now_utc()
            self.session.add(step_run)
            await self.session.commit()

            try:
                resolved_inputs, _missing_external = self._resolve_node_inputs(
                    node=node,
                    incoming_edges=incoming,
                    step_outputs=step_outputs,
                    edge_values=edge_values,
                    run_inputs=run.inputs or {},
                )
                request_payload = {
                    "resolved_inputs": dict(resolved_inputs),
                    "request_snapshot": {
                        "chain_mode": "sequential_endpoints",
                        "endpoints_trace": [],
                    },
                }
                try:
                    (
                        request_payload,
                        response_snapshot,
                        output_payload,
                        primary_capability_id,
                        primary_action_id,
                    ) = await self._execute_node_endpoint_chain(
                        node=node,
                        resolved_inputs=resolved_inputs,
                        run_inputs=run.inputs or {},
                    )
                except StepExecutionError as chain_exc:
                    chain_snapshot = (
                        chain_exc.response_snapshot
                        if isinstance(chain_exc.response_snapshot, dict)
                        else {}
                    )
                    chain_trace = chain_snapshot.get("endpoints_trace")
                    if isinstance(chain_trace, list):
                        request_payload["request_snapshot"]["endpoints_trace"] = chain_trace
                    raise

                step_run.capability_id = primary_capability_id
                step_run.action_id = primary_action_id

                step_outputs[str(step)] = output_payload
                context["step_outputs"] = step_outputs
                context["final_output_step"] = step
                context["final_output"] = output_payload
                for edge in edges_by_source.get(step, []):
                    edge_type = edge.get("type")
                    to_step = edge.get("to_step")
                    if not isinstance(edge_type, str) or not isinstance(to_step, int):
                        continue
                    value = self._extract_value_from_output(output_payload, edge_type)
                    if value is not None:
                        edge_values[self._build_edge_value_key(step, to_step, edge_type)] = value
                context["edge_values"] = edge_values
                await self.context_store.save_context(run.id, context)

                await self._finalize_step_run(
                    step_run=step_run,
                    status=ExecutionStepStatus.SUCCEEDED,
                    request_payload=request_payload,
                    response_snapshot=response_snapshot,
                    error=None,
                )

                status_by_step[step] = ExecutionStepStatus.SUCCEEDED
                succeeded_count += 1
            except StepExecutionError as exc:
                await self._finalize_step_run(
                    step_run=step_run,
                    status=ExecutionStepStatus.FAILED,
                    request_payload=request_payload,
                    response_snapshot=exc.response_snapshot,
                    error=str(exc),
                )
                log_business_event(
                    "execution_step_failed",
                    run_id=str(run.id),
                    pipeline_id=str(run.pipeline_id),
                    step=step,
                    reason=str(exc),
                )

                status_by_step[step] = ExecutionStepStatus.FAILED
                failed_count += 1
                skipped_count += await self._mark_remaining_steps_as_skipped(
                    run_id=run.id,
                    node_by_step=node_by_step,
                    remaining_steps=ordered_steps[index + 1:],
                    status_by_step=status_by_step,
                    reason=f"Skipped: run stopped after failure at step {step}",
                )
                break
            except Exception as exc:
                await self._finalize_step_run(
                    step_run=step_run,
                    status=ExecutionStepStatus.FAILED,
                    request_payload=request_payload,
                    response_snapshot={
                        "error_type": type(exc).__name__,
                    },
                    error=f"Unhandled step error: {exc}",
                )
                log_business_event(
                    "execution_step_failed",
                    run_id=str(run.id),
                    pipeline_id=str(run.pipeline_id),
                    step=step,
                    reason=f"Unhandled step error: {exc}",
                )

                status_by_step[step] = ExecutionStepStatus.FAILED
                failed_count += 1
                skipped_count += await self._mark_remaining_steps_as_skipped(
                    run_id=run.id,
                    node_by_step=node_by_step,
                    remaining_steps=ordered_steps[index + 1:],
                    status_by_step=status_by_step,
                    reason=f"Skipped: run stopped after failure at step {step}",
                )
                break

        run.finished_at = self._now_utc()
        run.summary = {
            "total_steps": len(ordered_steps),
            "succeeded_steps": succeeded_count,
            "failed_steps": failed_count,
            "skipped_steps": skipped_count,
            "final_output_step": context.get("final_output_step"),
            "final_output": context.get("final_output"),
        }

        if failed_count == 0 and skipped_count == 0:
            run.status = ExecutionRunStatus.SUCCEEDED
            run.error = None
        elif succeeded_count > 0:
            run.status = ExecutionRunStatus.PARTIAL_FAILED
            run.error = "Execution finished with failed/skipped steps"
        else:
            run.status = ExecutionRunStatus.FAILED
            run.error = "Execution failed"

        await self.session.commit()
        log_business_event(
            "execution_run_finished",
            run_id=str(run.id),
            pipeline_id=str(run.pipeline_id),
            user_id=str(run.initiated_by) if run.initiated_by is not None else None,
            result_status=run.status.value,
            total_steps=len(ordered_steps),
            succeeded_steps=succeeded_count,
            failed_steps=failed_count,
            skipped_steps=skipped_count,
        )

    async def _finalize_step_run(
        self,
        *,
        step_run: ExecutionStepRun,
        status: ExecutionStepStatus,
        request_payload: dict[str, Any],
        response_snapshot: dict[str, Any] | None,
        error: str | None,
    ) -> None:
        step_run.status = status
        step_run.resolved_inputs = request_payload.get("resolved_inputs")
        step_run.request_snapshot = request_payload.get("request_snapshot")
        step_run.response_snapshot = response_snapshot
        step_run.error = error
        step_run.finished_at = self._now_utc()
        step_run.duration_ms = self._duration_ms(step_run.started_at, step_run.finished_at)
        self.session.add(step_run)
        await self.session.commit()

    @staticmethod
    def _build_empty_context() -> dict[str, Any]:
        return {
            "step_outputs": {},
            "edge_values": {},
            "final_output_step": None,
            "final_output": None,
        }

    def _normalize_context(self, raw_context: Any) -> dict[str, Any]:
        context = dict(raw_context) if isinstance(raw_context, dict) else {}
        step_outputs = context.get("step_outputs")
        if not isinstance(step_outputs, dict):
            step_outputs = {}
        edge_values = context.get("edge_values")
        if not isinstance(edge_values, dict):
            edge_values = {}
        final_output_step = context.get("final_output_step")
        if not isinstance(final_output_step, int):
            final_output_step = None
        final_output = context.get("final_output")
        return {
            "step_outputs": step_outputs,
            "edge_values": edge_values,
            "final_output_step": final_output_step,
            "final_output": final_output,
        }

    @staticmethod
    def _build_edge_value_key(from_step: int, to_step: int, edge_type: str) -> str:
        return f"{from_step}:{to_step}:{edge_type}"

    async def _execute_node_endpoint_chain(
        self,
        *,
        node: dict[str, Any],
        resolved_inputs: dict[str, Any],
        run_inputs: dict[str, Any],
    ) -> tuple[dict[str, Any], dict[str, Any], Any, uuid.UUID | None, uuid.UUID | None]:
        endpoints = self._get_node_endpoints(node)
        if not endpoints:
            raise StepExecutionError("Node endpoint does not have a valid capability_id")

        endpoints_trace: list[dict[str, Any]] = []
        chain_scope = dict(resolved_inputs)
        protected_inputs = set(resolved_inputs.keys())
        previous_output: Any = None
        final_output: Any = None
        final_request_snapshot: dict[str, Any] | None = None
        final_response_snapshot: dict[str, Any] | None = None
        primary_capability_id: uuid.UUID | None = None
        primary_action_id: uuid.UUID | None = None

        for endpoint_index, endpoint in enumerate(endpoints, start=1):
            capability_uuid, capability = await self._get_capability_from_endpoint(endpoint)
            if primary_capability_id is None:
                primary_capability_id = capability_uuid

            capability_type = self._capability_type_value(capability)
            trace_item: dict[str, Any] = {
                "endpoint_index": endpoint_index,
                "capability_id": str(capability_uuid),
                "capability_type": capability_type,
            }

            if capability_type == CapabilityType.COMPOSITE.value:
                expected_inputs = self._collect_expected_input_names(capability=capability)
                endpoint_inputs = self._apply_chained_output_inputs(
                    base_scope=chain_scope,
                    previous_output=previous_output,
                    expected_inputs=expected_inputs,
                    protected_inputs=protected_inputs,
                )
                composite_request_snapshot = {
                    "capability_type": capability_type,
                    "recipe_version": (
                        capability.recipe.get("version")
                        if isinstance(capability.recipe, dict)
                        else None
                    ),
                }
                trace_item["resolved_inputs"] = endpoint_inputs
                trace_item["request_snapshot"] = composite_request_snapshot
                try:
                    endpoint_response, endpoint_output = await self._execute_composite_capability(
                        capability=capability,
                        resolved_inputs=endpoint_inputs,
                        run_inputs=run_inputs,
                    )
                except StepExecutionError as exc:
                    trace_item["status"] = "failed"
                    trace_item["response_snapshot"] = exc.response_snapshot
                    trace_item["error"] = str(exc)
                    endpoints_trace.append(trace_item)
                    raise StepExecutionError(
                        f"Endpoint {endpoint_index} failed: {exc}",
                        response_snapshot={"endpoints_trace": endpoints_trace},
                    ) from exc

                trace_item["status"] = "succeeded"
                trace_item["response_snapshot"] = endpoint_response
                endpoints_trace.append(trace_item)
                final_request_snapshot = composite_request_snapshot
                final_response_snapshot = endpoint_response
                final_output = endpoint_output
                previous_output = endpoint_output
                chain_scope = endpoint_inputs
                continue

            action = await self._get_action_from_capability(capability_uuid, capability)
            if endpoint_index == 1 and primary_action_id is None:
                primary_action_id = action.id

            expected_inputs = self._collect_expected_input_names(
                capability=capability,
                action=action,
            )
            endpoint_inputs = self._apply_chained_output_inputs(
                base_scope=chain_scope,
                previous_output=previous_output,
                expected_inputs=expected_inputs,
                protected_inputs=protected_inputs,
            )
            step_request = self._build_request_payload(
                action=action,
                resolved_inputs=endpoint_inputs,
            )
            missing_required = sorted(set(step_request["missing_required"]))
            trace_item["action_id"] = str(action.id)
            trace_item["resolved_inputs"] = endpoint_inputs
            trace_item["request_snapshot"] = step_request.get("request_snapshot")

            if missing_required:
                trace_item["status"] = "failed"
                trace_item["missing_required"] = missing_required
                endpoints_trace.append(trace_item)
                raise StepExecutionError(
                    f"Endpoint {endpoint_index} missing required inputs: {missing_required}",
                    response_snapshot={"endpoints_trace": endpoints_trace},
                )

            try:
                endpoint_response, endpoint_output = await self._call_action(action, step_request)
            except StepExecutionError as exc:
                trace_item["status"] = "failed"
                trace_item["response_snapshot"] = exc.response_snapshot
                trace_item["error"] = str(exc)
                endpoints_trace.append(trace_item)
                raise StepExecutionError(
                    f"Endpoint {endpoint_index} failed: {exc}",
                    response_snapshot={"endpoints_trace": endpoints_trace},
                ) from exc

            trace_item["status"] = "succeeded"
            trace_item["response_snapshot"] = endpoint_response
            endpoints_trace.append(trace_item)
            final_request_snapshot = step_request.get("request_snapshot")
            final_response_snapshot = endpoint_response
            final_output = endpoint_output
            previous_output = endpoint_output
            chain_scope = endpoint_inputs

        request_snapshot = (
            dict(final_request_snapshot)
            if isinstance(final_request_snapshot, dict)
            else {}
        )
        request_snapshot["chain_mode"] = "sequential_endpoints"
        request_snapshot["endpoints_trace"] = endpoints_trace

        response_snapshot = (
            dict(final_response_snapshot)
            if isinstance(final_response_snapshot, dict)
            else {}
        )
        response_snapshot["endpoints_trace"] = endpoints_trace
        if "body" not in response_snapshot:
            response_snapshot["body"] = final_output

        request_payload = {
            "resolved_inputs": dict(resolved_inputs),
            "request_snapshot": request_snapshot,
        }
        return (
            request_payload,
            response_snapshot,
            final_output,
            primary_capability_id,
            primary_action_id,
        )

    def _apply_chained_output_inputs(
        self,
        *,
        base_scope: dict[str, Any],
        previous_output: Any,
        expected_inputs: list[str],
        protected_inputs: set[str] | None = None,
    ) -> dict[str, Any]:
        merged = dict(base_scope)
        protected = protected_inputs or set()
        if previous_output is None:
            return merged

        for expected_input in expected_inputs:
            if expected_input in merged and expected_input in protected:
                continue
            resolved = self._resolve_expected_input_from_output(
                output=previous_output,
                expected_input=expected_input,
            )
            if resolved is not None:
                merged[expected_input] = resolved
        return merged

    def _collect_expected_input_names(
        self,
        *,
        capability: Capability | None = None,
        action: Action | None = None,
    ) -> list[str]:
        names: list[str] = []
        seen: set[str] = set()

        def add_name(raw_name: Any) -> None:
            if not isinstance(raw_name, (str, int)):
                return
            name = str(raw_name).strip()
            if not name or name in seen:
                return
            names.append(name)
            seen.add(name)

        if capability is not None and isinstance(capability.input_schema, dict):
            for name in self._collect_schema_input_names(capability.input_schema):
                add_name(name)

        if action is not None:
            for schema in (action.parameters_schema, action.request_body_schema):
                if not isinstance(schema, dict):
                    continue
                for name in self._collect_schema_input_names(schema):
                    add_name(name)

        return names

    @staticmethod
    def _collect_schema_input_names(schema: dict[str, Any]) -> list[str]:
        names: list[str] = []
        required = schema.get("required")
        if isinstance(required, list):
            names.extend(str(item) for item in required if isinstance(item, (str, int)))
        properties = schema.get("properties")
        if isinstance(properties, dict):
            names.extend(
                str(name) for name in properties.keys() if isinstance(name, (str, int))
            )

        deduplicated: list[str] = []
        seen: set[str] = set()
        for name in names:
            normalized = str(name).strip()
            if not normalized or normalized in seen:
                continue
            deduplicated.append(normalized)
            seen.add(normalized)
        return deduplicated

    def _resolve_expected_input_from_output(self, *, output: Any, expected_input: str) -> Any:
        if isinstance(output, dict):
            if expected_input in output:
                return output[expected_input]
            expected_base = expected_input[:-2] if expected_input.endswith("[]") else expected_input
            if expected_base in output:
                return output[expected_base]
            for field_name, field_value in output.items():
                if not isinstance(field_name, str):
                    continue
                if self._field_alias_matches(
                    field_name=field_name,
                    expected_input=expected_input,
                ):
                    return field_value

        fallback = self._extract_value_from_output(output, expected_input)
        return fallback

    def _field_alias_matches(self, *, field_name: str, expected_input: str) -> bool:
        left = str(field_name).strip()
        right = str(expected_input).strip()
        if not left or not right:
            return False
        if left == right:
            return True

        left_base = left[:-2] if left.endswith("[]") else left
        right_base = right[:-2] if right.endswith("[]") else right
        if left_base == right_base:
            return True

        left_normalized = self._normalize_lookup_token(left_base)
        right_normalized = self._normalize_lookup_token(right_base)
        if left_normalized and right_normalized and left_normalized == right_normalized:
            return True

        left_tokens = self._tokenize_field_name(left_base)
        right_tokens = self._tokenize_field_name(right_base)
        return bool(left_tokens and right_tokens and left_tokens == right_tokens)

    @staticmethod
    def _tokenize_field_name(value: str) -> set[str]:
        normalized = re.sub(r"([a-z])([A-Z])", r"\1 \2", str(value))
        normalized = normalized.replace("_", " ").replace("-", " ")
        tokens = {
            token
            for token in re.findall(r"[a-zA-Z0-9]+", normalized.lower())
            if token
        }
        singularized = {
            token[:-1]
            for token in tokens
            if token.endswith("s") and len(token) > 3
        }
        return tokens | singularized

    @staticmethod
    def _normalize_lookup_token(value: str) -> str:
        return re.sub(r"[^a-zA-Z0-9]+", "", str(value).lower())

    async def _get_capability_from_endpoint(
        self,
        endpoint: dict[str, Any],
    ) -> tuple[uuid.UUID, Capability]:
        capability_id = endpoint.get("capability_id") if isinstance(endpoint, dict) else None
        capability_uuid = self._to_uuid(capability_id)
        if capability_uuid is None:
            raise StepExecutionError("Node endpoint does not have a valid capability_id")

        capability = await self.session.get(Capability, capability_uuid)
        if capability is None:
            raise StepExecutionError(f"Capability not found: {capability_uuid}")
        return capability_uuid, capability

    async def _get_capability_from_node(self, node: dict[str, Any]) -> tuple[uuid.UUID, Capability]:
        endpoints = self._get_node_endpoints(node)
        if not endpoints:
            raise StepExecutionError("Node endpoint does not have a valid capability_id")
        return await self._get_capability_from_endpoint(endpoints[0])

    async def _get_action_from_capability(
        self,
        capability_uuid: uuid.UUID,
        capability: Capability,
    ) -> Action:
        action_uuid = capability.action_id
        if action_uuid is None:
            raise StepExecutionError(
                f"Capability does not have action_id: {capability_uuid}"
            )

        action = await self.session.get(Action, action_uuid)
        if action is None:
            raise StepExecutionError(
                f"Action not found for capability {capability_uuid}: {action_uuid}"
            )
        return action

    async def _get_action_from_node(self, node: dict[str, Any]) -> tuple[uuid.UUID, Action]:
        capability_uuid, capability = await self._get_capability_from_node(node)
        action = await self._get_action_from_capability(capability_uuid, capability)
        return capability_uuid, action

    def _create_step_run_from_node(
        self,
        run_id: uuid.UUID,
        node: dict[str, Any],
        *,
        status: ExecutionStepStatus,
    ) -> ExecutionStepRun:
        endpoints = self._get_node_endpoints(node)
        endpoint = endpoints[0] if endpoints else {}
        capability_id = self._to_uuid(endpoint.get("capability_id"))
        action_id = self._to_uuid(endpoint.get("action_id"))
        return ExecutionStepRun(
            run_id=run_id,
            step=self._safe_int(node.get("step"), fallback=0),
            name=str(node.get("name")) if node.get("name") is not None else None,
            capability_id=capability_id,
            action_id=action_id,
            status=status,
        )

    def _resolve_node_inputs(
        self,
        *,
        node: dict[str, Any],
        incoming_edges: list[dict[str, Any]],
        step_outputs: dict[str, Any],
        edge_values: dict[str, Any],
        run_inputs: dict[str, Any],
    ) -> tuple[dict[str, Any], list[str]]:
        resolved: dict[str, Any] = {}

        def add_resolved(key: str, value: Any) -> None:
            resolved[key] = value
            # Normalize common edge notation (users[] -> users) so request/body
            # keys and composite bindings can resolve expected field names.
            if key.endswith("[]"):
                normalized = key[:-2]
                if normalized and normalized not in resolved:
                    resolved[normalized] = value
            self._add_inferred_input_aliases(resolved=resolved, key=key, value=value)

        for edge in incoming_edges:
            src = edge.get("from_step")
            dst = edge.get("to_step")
            edge_type = edge.get("type")
            if not isinstance(src, int) or not isinstance(dst, int) or not isinstance(edge_type, str):
                continue
            edge_key = self._build_edge_value_key(src, dst, edge_type)
            if edge_key in edge_values:
                add_resolved(edge_type, edge_values[edge_key])
                continue
            source_output = step_outputs.get(str(src))
            if source_output is None:
                continue
            value = self._extract_value_from_output(source_output, edge_type)
            if value is not None:
                add_resolved(edge_type, value)

        external_inputs = self._normalize_str_list(node.get("external_inputs"))
        for input_name in external_inputs:
            if input_name in run_inputs:
                resolved[input_name] = run_inputs[input_name]

        # One-click execution: external inputs are optional and do not block run start.
        # Required fields are validated against action schema in _build_request_payload.
        missing_external: list[str] = []
        return resolved, missing_external

    def _add_inferred_input_aliases(
        self,
        *,
        resolved: dict[str, Any],
        key: str,
        value: Any,
    ) -> None:
        aliases: set[str] = set()
        key_base = key[:-2] if key.endswith("[]") else key
        normalized_key = self._normalize_lookup_token(key_base)

        # Some generated graphs use synthetic edge types like "user_hotel_pairs"
        # for both segment and assignment transitions. Mirror these aliases so
        # downstream request schemas (segments/assignments) are satisfied.
        if normalized_key in {
            "userhotelpairs",
            "hoteluserpairs",
            "userhotelpair",
            "hoteluserpair",
            "pairs",
        }:
            aliases.update({"segments", "assignments"})

        inferred_alias = self._infer_collection_alias(value)
        if inferred_alias:
            aliases.add(inferred_alias)

        for alias in aliases:
            if alias not in resolved:
                resolved[alias] = value

    def _infer_collection_alias(self, value: Any) -> str | None:
        if not isinstance(value, list):
            return None
        sample = next((item for item in value if isinstance(item, dict)), None)
        if not isinstance(sample, dict):
            return None

        keys = {
            self._normalize_lookup_token(str(key))
            for key in sample.keys()
            if isinstance(key, str)
        }
        if {"segmentid", "hotelid", "userids"}.issubset(keys):
            return "segments"
        if {"userid", "hotelid"}.issubset(keys):
            return "assignments"
        if {"id", "email", "lastactive"}.issubset(keys):
            return "users"
        if {"id", "name", "city"}.issubset(keys):
            return "hotels"
        return None

    def _build_request_payload(self, *, action: Action, resolved_inputs: dict[str, Any]) -> dict[str, Any]:
        params_schema = action.parameters_schema if isinstance(action.parameters_schema, dict) else {}
        params_properties = params_schema.get("properties", {}) if isinstance(params_schema.get("properties"), dict) else {}
        params_required = [
            str(name)
            for name in params_schema.get("required", [])
            if isinstance(name, (str, int))
        ]

        body_schema = action.request_body_schema if isinstance(action.request_body_schema, dict) else {}
        body_type = body_schema.get("type") if isinstance(body_schema.get("type"), str) else None
        body_properties = body_schema.get("properties", {}) if isinstance(body_schema.get("properties"), dict) else {}
        body_required = [
            str(name)
            for name in body_schema.get("required", [])
            if isinstance(name, (str, int))
        ]

        path_params: dict[str, Any] = {}
        query_params: dict[str, Any] = {}
        headers: dict[str, Any] = {}
        cookies: dict[str, Any] = {}
        body: Any = {} if body_type == "object" else None
        unresolved: dict[str, Any] = {}

        for key, value in resolved_inputs.items():
            property_schema = params_properties.get(key)
            if isinstance(property_schema, dict):
                location = property_schema.get("x-parameter-location", "query")
                if location == "path":
                    path_params[key] = value
                elif location == "header":
                    headers[key] = value
                elif location == "cookie":
                    cookies[key] = value
                else:
                    query_params[key] = value
                continue

            if body_type == "object" and (not body_properties or key in body_properties):
                if not isinstance(body, dict):
                    body = {}
                body[key] = value
                continue

            unresolved[key] = value

        self._apply_schema_defaults(params_properties, path_params, query_params, headers, cookies)
        if body_type == "object":
            if not isinstance(body, dict):
                body = {}
            for field_name, field_schema in body_properties.items():
                if field_name in body:
                    continue
                fallback = self._schema_default_or_example(field_schema)
                if fallback is not None:
                    body[field_name] = fallback
            if not body and isinstance(body_schema.get("example"), dict):
                body = dict(body_schema["example"])

        if unresolved:
            if action.method in {HttpMethod.GET, HttpMethod.DELETE, HttpMethod.HEAD, HttpMethod.OPTIONS}:
                query_params.update({key: value for key, value in unresolved.items() if key not in query_params})
            else:
                if body is None:
                    body = {}
                if isinstance(body, dict):
                    for key, value in unresolved.items():
                        body.setdefault(key, value)
                else:
                    body = unresolved

        missing_required: list[str] = []
        for field_name in params_required:
            if field_name in path_params or field_name in query_params or field_name in headers or field_name in cookies:
                continue
            missing_required.append(field_name)

        if body_type == "object":
            body_dict = body if isinstance(body, dict) else {}
            for field_name in body_required:
                if field_name not in body_dict:
                    missing_required.append(field_name)
        elif body_schema.get("x-required") and body in (None, "", {}, []):
            missing_required.append("__request_body__")

        path = action.path or ""
        for path_param in re.findall(r"{([^{}]+)}", path):
            if path_param in path_params:
                path = path.replace(f"{{{path_param}}}", str(path_params[path_param]))
            else:
                missing_required.append(path_param)

        path_is_absolute_url = self._is_absolute_url(path)
        base_url = self._resolve_action_base_url(action)
        if not path_is_absolute_url and not base_url:
            missing_required.append("__base_url__")
        if path_is_absolute_url:
            url = path
        else:
            url = self._join_url(base_url or "", path)

        content_type = body_schema.get("x-content-type")
        if isinstance(content_type, str) and body is not None:
            headers.setdefault("Content-Type", content_type)

        return {
            "url": url,
            "query_params": query_params,
            "headers": headers,
            "cookies": cookies,
            "json_body": body,
            "missing_required": sorted(set(missing_required)),
            "resolved_inputs": resolved_inputs,
            "request_snapshot": {
                "method": action.method.value,
                "url": url,
                "path_params": path_params,
                "query_params": query_params,
                "headers": headers,
                "cookies": cookies,
                "json_body": body,
            },
        }

    def _resolve_action_base_url(self, action: Action) -> str | None:
        fallback_base_url = os.getenv("EXECUTION_DEFAULT_BASE_URL")
        fallback_normalized = self._normalize_base_url(fallback_base_url)

        base_url = self._normalize_base_url(getattr(action, "base_url", None))
        resolved_base_url = self._resolve_base_url_with_fallback(
            candidate=base_url,
            fallback=fallback_normalized,
        )
        if resolved_base_url:
            return resolved_base_url

        raw_spec = getattr(action, "raw_spec", None)
        if isinstance(raw_spec, dict):
            servers = raw_spec.get("servers")
            if isinstance(servers, list):
                for server in servers:
                    candidate = self._resolve_server_url(server)
                    resolved = self._resolve_base_url_with_fallback(
                        candidate=candidate,
                        fallback=fallback_normalized,
                    )
                    if resolved:
                        return resolved

        if fallback_normalized:
            return fallback_normalized

        return None

    def _resolve_server_url(self, server: Any) -> str | None:
        if not isinstance(server, dict):
            return None
        raw_url = server.get("url")
        if not isinstance(raw_url, str):
            return None
        url = raw_url.strip()
        if not url:
            return None

        variables = server.get("variables")
        if isinstance(variables, dict):
            for variable_name, variable_payload in variables.items():
                placeholder = f"{{{variable_name}}}"
                if placeholder not in url:
                    continue
                default_value: str | None = None
                if isinstance(variable_payload, dict):
                    raw_default = variable_payload.get("default")
                    if isinstance(raw_default, str):
                        default_value = raw_default.strip()
                if default_value:
                    url = url.replace(placeholder, default_value)

        return self._normalize_base_url(url)

    def _resolve_base_url_with_fallback(
        self,
        *,
        candidate: str | None,
        fallback: str | None,
    ) -> str | None:
        if not candidate:
            return fallback
        if self._is_absolute_url(candidate):
            return candidate
        if fallback:
            return self._join_url(fallback, candidate)
        return None

    @staticmethod
    def _normalize_base_url(value: Any) -> str | None:
        if not isinstance(value, str):
            return None
        normalized = value.strip()
        return normalized or None

    @staticmethod
    def _is_absolute_url(value: str) -> bool:
        parsed = urlparse(str(value or ""))
        return bool(parsed.scheme and parsed.netloc)

    async def _call_action(
        self,
        action: Action,
        request_payload: dict[str, Any],
    ) -> tuple[dict[str, Any], Any]:
        timeout_seconds = float(os.getenv("EXECUTION_STEP_TIMEOUT_SECONDS", "30"))
        async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
            try:
                response = await client.request(
                    method=action.method.value,
                    url=request_payload["url"],
                    params=request_payload["query_params"] or None,
                    headers=request_payload["headers"] or None,
                    cookies=request_payload["cookies"] or None,
                    json=request_payload["json_body"],
                )
            except httpx.TimeoutException as exc:
                raise StepExecutionError(f"Timeout while calling endpoint: {exc}") from exc
            except httpx.RequestError as exc:
                raise StepExecutionError(f"Request error while calling endpoint: {exc}") from exc

        response_body = self._extract_response_body(response)
        response_snapshot = {
            "status_code": response.status_code,
            "content_type": response.headers.get("content-type"),
            "body": response_body,
        }
        if response.status_code >= 400:
            raise StepExecutionError(f"Upstream endpoint returned HTTP {response.status_code}", response_snapshot=response_snapshot)

        return response_snapshot, response_body

    async def _execute_composite_capability(
        self,
        *,
        capability: Capability,
        resolved_inputs: dict[str, Any],
        run_inputs: dict[str, Any],
    ) -> tuple[dict[str, Any], Any]:
        recipe = capability.recipe if isinstance(capability.recipe, dict) else None
        if recipe is None:
            raise StepExecutionError(
                f"Composite capability does not have a valid recipe: {capability.id}"
            )
        steps = recipe.get("steps")
        if not isinstance(steps, list) or not steps:
            raise StepExecutionError(
                f"Composite capability recipe has no steps: {capability.id}"
            )

        ordered_steps = sorted(
            [step for step in steps if isinstance(step, dict)],
            key=lambda item: self._safe_int(item.get("step"), fallback=0),
        )
        composite_run_scope = dict(run_inputs)
        composite_run_scope.update(resolved_inputs)
        nested_outputs: dict[int, Any] = {}
        nested_trace: list[dict[str, Any]] = []

        for raw_step in ordered_steps:
            step_number = self._safe_int(raw_step.get("step"), fallback=0)
            if step_number <= 0:
                raise StepExecutionError("Composite recipe has invalid step number")

            step_capability_uuid = self._to_uuid(raw_step.get("capability_id"))
            if step_capability_uuid is None:
                raise StepExecutionError(
                    f"Composite recipe step {step_number} has invalid capability_id"
                )
            step_capability = await self.session.get(Capability, step_capability_uuid)
            if step_capability is None:
                raise StepExecutionError(
                    f"Composite recipe step {step_number} capability not found: {step_capability_uuid}"
                )
            if self._capability_type_value(step_capability) != CapabilityType.ATOMIC.value:
                raise StepExecutionError(
                    f"Composite recipe step {step_number} must reference ATOMIC capability: {step_capability_uuid}"
                )

            step_action = await self._get_action_from_capability(
                step_capability_uuid,
                step_capability,
            )

            raw_inputs = raw_step.get("inputs")
            normalized_inputs: dict[str, Any] = {}
            if isinstance(raw_inputs, dict):
                for input_name, binding_expr in raw_inputs.items():
                    if not isinstance(input_name, str):
                        continue
                    if not isinstance(binding_expr, str):
                        continue
                    value = self._resolve_composite_binding(
                        binding_expr=binding_expr.strip(),
                        run_scope=composite_run_scope,
                        step_outputs=nested_outputs,
                    )
                    if value is not None:
                        normalized_inputs[input_name] = value

            step_request = self._build_request_payload(
                action=step_action,
                resolved_inputs=normalized_inputs,
            )
            missing_required = sorted(set(step_request["missing_required"]))
            if missing_required:
                nested_trace.append(
                    {
                        "step": step_number,
                        "capability_id": str(step_capability_uuid),
                        "action_id": str(step_action.id),
                        "status": "failed",
                        "resolved_inputs": normalized_inputs,
                        "missing_required": missing_required,
                    }
                )
                raise StepExecutionError(
                    f"Composite step {step_number} missing required inputs: {missing_required}",
                    response_snapshot={"nested_trace": nested_trace},
                )

            try:
                action_response, action_output = await self._call_action(step_action, step_request)
            except StepExecutionError as exc:
                nested_trace.append(
                    {
                        "step": step_number,
                        "capability_id": str(step_capability_uuid),
                        "action_id": str(step_action.id),
                        "status": "failed",
                        "resolved_inputs": normalized_inputs,
                        "request_snapshot": step_request.get("request_snapshot"),
                        "response_snapshot": exc.response_snapshot,
                        "error": str(exc),
                    }
                )
                raise StepExecutionError(
                    f"Composite step {step_number} failed: {exc}",
                    response_snapshot={"nested_trace": nested_trace},
                ) from exc

            nested_outputs[step_number] = action_output
            nested_trace.append(
                {
                    "step": step_number,
                    "capability_id": str(step_capability_uuid),
                    "action_id": str(step_action.id),
                    "status": "succeeded",
                    "resolved_inputs": normalized_inputs,
                    "request_snapshot": step_request.get("request_snapshot"),
                    "response_snapshot": action_response,
                }
            )

        if not nested_outputs:
            raise StepExecutionError(
                f"Composite capability recipe has no executable steps: {capability.id}"
            )

        final_step = max(nested_outputs.keys())
        final_output = nested_outputs[final_step]
        composite_response = {
            "capability_type": CapabilityType.COMPOSITE.value,
            "recipe_version": recipe.get("version"),
            "steps_executed": len(nested_outputs),
            "nested_trace": nested_trace,
        }
        return composite_response, final_output

    def _resolve_composite_binding(
        self,
        *,
        binding_expr: str,
        run_scope: dict[str, Any],
        step_outputs: dict[int, Any],
    ) -> Any:
        if binding_expr.startswith("$run."):
            path = binding_expr[len("$run.") :]
            return self._resolve_dot_path(run_scope, path)
        if binding_expr.startswith("$step."):
            match = re.fullmatch(r"\$step\.(\d+)\.(.+)", binding_expr)
            if not match:
                return None
            source_step = int(match.group(1))
            path = match.group(2)
            source_payload = step_outputs.get(source_step)
            return self._resolve_dot_path(source_payload, path)
        return None

    def _resolve_dot_path(self, payload: Any, path: str) -> Any:
        if payload is None:
            return None
        current: Any = payload
        for part in [chunk for chunk in str(path).split(".") if chunk]:
            if isinstance(current, dict):
                if part not in current:
                    return None
                current = current.get(part)
                continue
            if isinstance(current, list):
                if not part.isdigit():
                    return None
                index = int(part)
                if index < 0 or index >= len(current):
                    return None
                current = current[index]
                continue
            return None
        return current

    def _capability_type_value(self, capability: Capability) -> str:
        raw = getattr(capability, "type", None)
        if isinstance(raw, CapabilityType):
            return raw.value
        if isinstance(raw, str):
            return raw
        if hasattr(raw, "value"):
            return str(raw.value)
        return CapabilityType.ATOMIC.value

    @staticmethod
    def _extract_response_body(response: httpx.Response) -> Any:
        content_type = response.headers.get("content-type", "")
        if "json" in content_type.lower():
            try:
                return response.json()
            except ValueError:
                pass

        text_body = response.text
        if len(text_body) > 20000:
            return text_body[:20000] + "...(truncated)"
        return text_body

    @staticmethod
    def _extract_value_from_output(output: Any, edge_type: str) -> Any:
        if isinstance(output, dict):
            if edge_type in output:
                return output[edge_type]
            normalized = edge_type[:-2] if edge_type.endswith("[]") else edge_type
            if normalized in output:
                return output[normalized]
            if len(output) == 1:
                return next(iter(output.values()))
        if isinstance(output, list):
            return output
        return output

    @staticmethod
    def _normalize_graph(
        raw_nodes: Any,
        raw_edges: Any,
    ) -> tuple[dict[int, dict[str, Any]], list[dict[str, Any]], dict[int, list[dict[str, Any]]], dict[int, list[dict[str, Any]]]]:
        node_by_step: dict[int, dict[str, Any]] = {}
        if isinstance(raw_nodes, list):
            for node in raw_nodes:
                if not isinstance(node, dict):
                    continue
                step = node.get("step")
                if isinstance(step, int):
                    node_by_step[step] = node

        edges: list[dict[str, Any]] = []
        edges_by_target: dict[int, list[dict[str, Any]]] = {}
        edges_by_source: dict[int, list[dict[str, Any]]] = {}
        if isinstance(raw_edges, list):
            for edge in raw_edges:
                if not isinstance(edge, dict):
                    continue
                src = edge.get("from_step")
                dst = edge.get("to_step")
                edge_type = edge.get("type")
                if not isinstance(src, int) or not isinstance(dst, int) or not isinstance(edge_type, str):
                    continue
                if src not in node_by_step or dst not in node_by_step:
                    continue
                normalized_edge = {"from_step": src, "to_step": dst, "type": edge_type}
                edges.append(normalized_edge)
                edges_by_target.setdefault(dst, []).append(normalized_edge)
                edges_by_source.setdefault(src, []).append(normalized_edge)

        return node_by_step, edges, edges_by_target, edges_by_source

    @staticmethod
    def _topological_sort(steps: list[int], edges: list[dict[str, Any]]) -> list[int]:
        if not steps:
            return []

        in_degree: dict[int, int] = {step: 0 for step in steps}
        adjacency: dict[int, set[int]] = {step: set() for step in steps}

        for edge in edges:
            src = edge["from_step"]
            dst = edge["to_step"]
            if dst not in in_degree or src not in adjacency:
                continue
            if dst in adjacency[src]:
                continue
            adjacency[src].add(dst)
            in_degree[dst] += 1

        queue = sorted([step for step, degree in in_degree.items() if degree == 0])
        ordered: list[int] = []

        while queue:
            current = queue.pop(0)
            ordered.append(current)
            for neighbor in sorted(adjacency[current]):
                in_degree[neighbor] -= 1
                if in_degree[neighbor] == 0:
                    queue.append(neighbor)
            queue.sort()

        if len(ordered) != len(steps):
            raise ExecutionServiceError("Graph contains a cycle")
        return ordered

    @staticmethod
    def _get_node_endpoints(node: dict[str, Any]) -> list[dict[str, Any]]:
        endpoints = node.get("endpoints")
        if not isinstance(endpoints, list):
            return []
        return [item for item in endpoints if isinstance(item, dict)]

    @staticmethod
    def _normalize_str_list(value: Any) -> list[str]:
        if not isinstance(value, list):
            return []
        return [str(item) for item in value if isinstance(item, (str, int))]

    @staticmethod
    def _to_uuid(value: Any) -> uuid.UUID | None:
        if value is None:
            return None
        try:
            return uuid.UUID(str(value))
        except (ValueError, TypeError):
            return None

    @staticmethod
    def _safe_int(value: Any, *, fallback: int) -> int:
        if isinstance(value, int):
            return value
        try:
            return int(value)
        except (TypeError, ValueError):
            return fallback

    @staticmethod
    def _join_url(base_url: str, path: str) -> str:
        if ExecutionService._is_absolute_url(path):
            return path
        if not base_url:
            return path
        base = base_url.rstrip("/")
        suffix = path if path.startswith("/") else f"/{path}"
        return f"{base}{suffix}"

    @staticmethod
    def _now_utc() -> datetime:
        return datetime.now(timezone.utc)

    @staticmethod
    def _duration_ms(started_at: datetime | None, finished_at: datetime | None) -> int | None:
        if started_at is None or finished_at is None:
            return None
        return max(0, int((finished_at - started_at).total_seconds() * 1000))

    @staticmethod
    def _schema_default_or_example(schema: Any) -> Any:
        if not isinstance(schema, dict):
            return None
        if "default" in schema:
            return schema.get("default")
        if "example" in schema:
            return schema.get("example")
        examples = schema.get("examples")
        if isinstance(examples, dict):
            for example_payload in examples.values():
                if isinstance(example_payload, dict) and "value" in example_payload:
                    return example_payload["value"]
                if example_payload is not None:
                    return example_payload
        return None

    def _apply_schema_defaults(
        self,
        parameter_properties: dict[str, Any],
        path_params: dict[str, Any],
        query_params: dict[str, Any],
        headers: dict[str, Any],
        cookies: dict[str, Any],
    ) -> None:
        for parameter_name, parameter_schema in parameter_properties.items():
            if not isinstance(parameter_schema, dict):
                continue
            if parameter_name in path_params or parameter_name in query_params or parameter_name in headers or parameter_name in cookies:
                continue
            fallback = self._schema_default_or_example(parameter_schema)
            if fallback is None:
                continue
            location = parameter_schema.get("x-parameter-location", "query")
            if location == "path":
                path_params[parameter_name] = fallback
            elif location == "header":
                headers[parameter_name] = fallback
            elif location == "cookie":
                cookies[parameter_name] = fallback
            else:
                query_params[parameter_name] = fallback

    async def _mark_remaining_steps_as_skipped(
        self,
        *,
        run_id: uuid.UUID,
        node_by_step: dict[int, dict[str, Any]],
        remaining_steps: list[int],
        status_by_step: dict[int, ExecutionStepStatus],
        reason: str,
    ) -> int:
        if not remaining_steps:
            return 0

        now = self._now_utc()
        skipped_items: list[ExecutionStepRun] = []
        for step in remaining_steps:
            node = node_by_step.get(step)
            if node is None:
                continue
            step_run = self._create_step_run_from_node(
                run_id,
                node,
                status=ExecutionStepStatus.SKIPPED,
            )
            step_run.error = reason
            step_run.started_at = now
            step_run.finished_at = now
            step_run.duration_ms = 0
            skipped_items.append(step_run)
            status_by_step[step] = ExecutionStepStatus.SKIPPED

        if skipped_items:
            self.session.add_all(skipped_items)
            await self.session.commit()

        return len(skipped_items)