prod-end-2026/backend/app/services/openapi_service.py

from __future__ import annotations

import re
from typing import Any

import yaml

from app.models import ActionIngestStatus, HttpMethod


class OpenAPIService:
    SUPPORTED_METHODS = {method.value.lower(): method for method in HttpMethod}
    JSON_CONTENT_TYPES = ("application/json", "application/*+json")

    @staticmethod
    def load_document(raw_bytes: bytes) -> dict[str, Any]:
        if not raw_bytes:
            raise ValueError("OpenAPI file is empty")

        try:
            document = yaml.safe_load(raw_bytes.decode("utf-8"))
        except UnicodeDecodeError as exc:
            raise ValueError("OpenAPI file must be UTF-8 encoded") from exc
        except yaml.YAMLError as exc:
            raise ValueError("OpenAPI file is not valid YAML or JSON") from exc

        if not isinstance(document, dict):
            raise ValueError("OpenAPI root must be an object")

        openapi_version = document.get("openapi")
        if not isinstance(openapi_version, str) or not openapi_version.startswith("3."):
            raise ValueError("Only OpenAPI 3.x documents are supported")

        if not isinstance(document.get("paths"), dict) or not document["paths"]:
            raise ValueError("OpenAPI file must contain a non-empty paths section")

        base_url = OpenAPIService._extract_base_url(document)
        if base_url is None:
            raise ValueError(
                "OpenAPI file must contain servers[0].url (base_url)"
            )

        return document

    @classmethod
    def extract_actions(
        cls,
        document: dict[str, Any],
        *,
        source_filename: str | None = None,
    ) -> list[dict[str, Any]]:
        return cls.extract_actions_with_failures(document, source_filename=source_filename)["succeeded"]

    @classmethod
    def extract_actions_with_failures(
        cls,
        document: dict[str, Any],
        *,
        source_filename: str | None = None,
    ) -> dict[str, list[dict[str, Any]]]:
        base_url = cls._extract_base_url(document)
        succeeded_actions: list[dict[str, Any]] = []
        failed_actions: list[dict[str, Any]] = []

        for path, path_item in document.get("paths", {}).items():
            if not isinstance(path_item, dict):
                continue

            shared_parameters = path_item.get("parameters", [])

            for method_name, operation in path_item.items():
                if method_name not in cls.SUPPORTED_METHODS:
                    continue
                if not isinstance(operation, dict):
                    failed_actions.append(
                        cls._build_failed_action_payload(
                            method_name=method_name,
                            path=path,
                            base_url=base_url,
                            source_filename=source_filename,
                            raw_spec=operation,
                            error_message="Operation definition must be an object",
                        )
                    )
                    continue

                try:
                    succeeded_actions.append(
                        cls._build_succeeded_action_payload(
                            method_name=method_name,
                            path=path,
                            operation=operation,
                            shared_parameters=shared_parameters,
                            document=document,
                            base_url=base_url,
                            source_filename=source_filename,
                        )
                    )
                except ValueError as exc:
                    failed_actions.append(
                        cls._build_failed_action_payload(
                            method_name=method_name,
                            path=path,
                            base_url=base_url,
                            source_filename=source_filename,
                            raw_spec=operation,
                            error_message=str(exc),
                        )
                    )

        return {
            "succeeded": succeeded_actions,
            "failed": failed_actions,
        }

    @classmethod
    def _build_succeeded_action_payload(
        cls,
        *,
        method_name: str,
        path: str,
        operation: dict[str, Any],
        shared_parameters: list[Any] | None,
        document: dict[str, Any],
        base_url: str | None,
        source_filename: str | None,
    ) -> dict[str, Any]:
        normalized_operation = cls._dereference(operation, document)
        parameters = cls._merge_parameters(shared_parameters, normalized_operation.get("parameters", []), document)

        return {
            "operation_id": normalized_operation.get("operationId") or cls._build_operation_id(method_name, path),
            "method": cls.SUPPORTED_METHODS[method_name],
            "path": path,
            "base_url": base_url,
            "summary": normalized_operation.get("summary"),
            "description": normalized_operation.get("description"),
            "tags": normalized_operation.get("tags"),
            "parameters_schema": cls._build_parameters_schema(parameters, document),
            "request_body_schema": cls._extract_request_body_schema(normalized_operation, document),
            "response_schema": cls._extract_response_schema(normalized_operation, document),
            "source_filename": source_filename,
            "raw_spec": normalized_operation,
            "ingest_status": ActionIngestStatus.SUCCEEDED,
            "ingest_error": None,
        }

    @classmethod
    def _build_failed_action_payload(
        cls,
        *,
        method_name: str,
        path: str,
        base_url: str | None,
        source_filename: str | None,
        raw_spec: Any,
        error_message: str,
    ) -> dict[str, Any]:
        operation = raw_spec if isinstance(raw_spec, dict) else {}

        return {
            "operation_id": operation.get("operationId") or cls._build_operation_id(method_name, path),
            "method": cls.SUPPORTED_METHODS[method_name],
            "path": path,
            "base_url": base_url,
            "summary": operation.get("summary"),
            "description": operation.get("description"),
            "tags": operation.get("tags"),
            "parameters_schema": None,
            "request_body_schema": None,
            "response_schema": None,
            "source_filename": source_filename,
            "raw_spec": operation or None,
            "ingest_status": ActionIngestStatus.FAILED,
            "ingest_error": error_message,
        }

    @staticmethod
    def _extract_base_url(document: dict[str, Any]) -> str | None:
        servers = document.get("servers")
        if isinstance(servers, list) and servers:
            first_server = servers[0]
            if isinstance(first_server, dict):
                url = first_server.get("url")
                if isinstance(url, str):
                    normalized_url = url.strip()
                    if normalized_url:
                        return normalized_url
        return None

    @classmethod
    def _merge_parameters(
        cls,
        path_parameters: list[Any] | None,
        operation_parameters: list[Any] | None,
        document: dict[str, Any],
    ) -> list[dict[str, Any]]:
        merged: dict[tuple[str | None, str | None], dict[str, Any]] = {}

        for raw_parameter in (path_parameters or []) + (operation_parameters or []):
            parameter = cls._dereference(raw_parameter, document)
            if not isinstance(parameter, dict):
                continue
            key = (parameter.get("name"), parameter.get("in"))
            merged[key] = parameter

        return list(merged.values())

    @classmethod
    def _build_parameters_schema(
        cls,
        parameters: list[dict[str, Any]],
        document: dict[str, Any],
    ) -> dict[str, Any] | None:
        if not parameters:
            return None

        properties: dict[str, Any] = {}
        required: list[str] = []

        for parameter in parameters:
            name = parameter.get("name")
            if not name:
                continue
            if parameter.get("in") not in {"query", "path", "header", "cookie"}:
                continue

            schema = parameter.get("schema")
            if schema is None:
                schema = cls._extract_schema_from_content(parameter.get("content"), document)
            else:
                schema = cls._dereference(schema, document)

            property_schema = schema if isinstance(schema, dict) else {"type": "string"}
            property_schema = {
                **property_schema,
                "x-parameter-location": parameter.get("in"),
            }

            if parameter.get("description"):
                property_schema["description"] = parameter["description"]

            properties[name] = property_schema

            if parameter.get("required"):
                required.append(name)

        if not properties:
            return None

        schema: dict[str, Any] = {
            "type": "object",
            "properties": properties,
        }
        if required:
            schema["required"] = required

        return schema

    @classmethod
    def _extract_request_body_schema(
        cls,
        operation: dict[str, Any],
        document: dict[str, Any],
    ) -> dict[str, Any] | None:
        request_body = operation.get("requestBody")
        if not isinstance(request_body, dict):
            return None
        request_body = cls._dereference(request_body, document)
        schema = cls._extract_schema_from_content(request_body.get("content"), document)
        if not isinstance(schema, dict):
            return None

        if request_body.get("required"):
            schema = {**schema, "x-required": True}

        return schema

    @classmethod
    def _extract_response_schema(
        cls,
        operation: dict[str, Any],
        document: dict[str, Any],
    ) -> dict[str, Any] | None:
        responses = operation.get("responses")
        if not isinstance(responses, dict):
            return None

        for status_code, response in responses.items():
            if not str(status_code).startswith("2"):
                continue

            normalized_response = cls._dereference(response, document)
            if not isinstance(normalized_response, dict):
                continue

            schema = cls._extract_schema_from_content(normalized_response.get("content"), document)
            if isinstance(schema, dict):
                return schema

            if normalized_response.get("description"):
                return {"description": normalized_response["description"]}

        return None

    @classmethod
    def _extract_schema_from_content(cls, content: Any, document: dict[str, Any]) -> dict[str, Any] | None:
        if not isinstance(content, dict):
            return None

        preferred_content_type = next((content_type for content_type in cls.JSON_CONTENT_TYPES if content_type in content), None)
        items = []
        if preferred_content_type:
            items.append((preferred_content_type, content[preferred_content_type]))
        items.extend((content_type, value) for content_type, value in content.items() if content_type != preferred_content_type)

        for content_type, value in items:
            if not isinstance(value, dict):
                continue
            schema = value.get("schema")
            if not isinstance(schema, dict):
                continue

            normalized_schema = cls._dereference(schema, document)
            if isinstance(normalized_schema, dict):
                return {
                    **normalized_schema,
                    "x-content-type": content_type,
                }

        return None

    @classmethod
    def _dereference(cls, value: Any, document: dict[str, Any]) -> Any:
        if isinstance(value, list):
            return [cls._dereference(item, document) for item in value]

        if not isinstance(value, dict):
            return value

        if "$ref" in value:
            resolved = cls._resolve_ref(value["$ref"], document)
            merged = cls._dereference(resolved, document)
            if not isinstance(merged, dict):
                return merged

            sibling_fields = {key: cls._dereference(item, document) for key, item in value.items() if key != "$ref"}
            return {**merged, **sibling_fields}

        return {key: cls._dereference(item, document) for key, item in value.items()}

    @staticmethod
    def _resolve_ref(ref: str, document: dict[str, Any]) -> Any:
        if not ref.startswith("#/"):
            raise ValueError(f"Only local $ref values are supported, got: {ref}")

        current: Any = document
        for part in ref[2:].split("/"):
            token = part.replace("~1", "/").replace("~0", "~")
            if not isinstance(current, dict) or token not in current:
                raise ValueError(f"Could not resolve OpenAPI reference: {ref}")
            current = current[token]

        return current

    @staticmethod
    def _build_operation_id(method_name: str, path: str) -> str:
        normalized_path = re.sub(r"[{}]", "", path).strip("/")
        normalized_path = re.sub(r"[^a-zA-Z0-9/]+", "_", normalized_path)
        normalized_path = normalized_path.replace("/", "_") or "root"
        return f"{method_name.lower()}_{normalized_path.lower()}"