From 4d7e33ec26b232501c550d2dc11bf50742a5c4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Commaille?= <76261501+zecakeh@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:50:49 +0100 Subject: [PATCH] Add support for `$ref` URIs containing fragments in OpenAPI definitions and JSON schemas (#1751) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kévin Commaille --- .../internal/newsfragments/1751.clarification | 1 + layouts/partials/events/example.html | 11 ++- layouts/partials/events/render-event.html | 6 +- .../partials/json-schema/resolve-refs.html | 22 ++++- .../partials/openapi/render-parameters.html | 4 +- layouts/partials/openapi/render-request.html | 6 +- .../partials/openapi/render-responses.html | 5 +- layouts/shortcodes/definition.html | 4 - layouts/shortcodes/event.html | 2 +- layouts/shortcodes/http-api.html | 2 +- layouts/shortcodes/msgtypes.html | 2 +- scripts/check-event-schema-examples.py | 40 +-------- scripts/check-json-schemas.py | 15 +--- scripts/check-openapi-sources.py | 65 +------------- scripts/dump-openapi.py | 31 +------ scripts/helpers.py | 87 +++++++++++++++++++ 16 files changed, 140 insertions(+), 163 deletions(-) create mode 100644 changelogs/internal/newsfragments/1751.clarification create mode 100755 scripts/helpers.py diff --git a/changelogs/internal/newsfragments/1751.clarification b/changelogs/internal/newsfragments/1751.clarification new file mode 100644 index 00000000..50c50693 --- /dev/null +++ b/changelogs/internal/newsfragments/1751.clarification @@ -0,0 +1 @@ +Add support for `$ref` URIs containing fragments in OpenAPI definitions and JSON schemas. diff --git a/layouts/partials/events/example.html b/layouts/partials/events/example.html index 181de88f..90752fbd 100644 --- a/layouts/partials/events/example.html +++ b/layouts/partials/events/example.html @@ -1,13 +1,18 @@ {{/* - Renders an event example. Resolves `$ref`s, serializes as JSON, and ensures + Renders an event example. Resolves `$ref`s, serializes as JSON, and ensures that it can be included in HTML. - This partial is called with the example event object as its context. + Parameters: + + * `schema`: the schema of the example + * `name`: the name of the example */}} -{{ $example_content := partial "json-schema/resolve-refs" (dict "schema" . "path" "event-schemas/examples") }} +{{ $path := delimit (slice "event-schemas/examples" .name) "/" }} + +{{ $example_content := partial "json-schema/resolve-refs" (dict "schema" .schema "path" $path) }} {{ $example_json := jsonify (dict "indent" " ") $example_content }} {{ $example_json = replace $example_json "\\u003c" "<" }} {{ $example_json = replace $example_json "\\u003e" ">" | safeHTML }} diff --git a/layouts/partials/events/render-event.html b/layouts/partials/events/render-event.html index 70752721..71ba60bd 100644 --- a/layouts/partials/events/render-event.html +++ b/layouts/partials/events/render-event.html @@ -77,7 +77,7 @@ */}} {{ if $desired_example_name }} {{ if eq $example_name $desired_example_name }} - {{ partial "events/example" $example }} + {{ partial "events/example" (dict "schema" $example "name" $example_name) }} {{ end }} {{/* If `$desired_example_name` is not given, we will include any @@ -86,7 +86,7 @@ the event name includes a "$". */}} {{ else if eq $event_name $example_name }} - {{ partial "events/example" $example }} + {{ partial "events/example" (dict "schema" $example "name" $example_name) }} {{/* If `$desired_example_name` is not given, we will include any examples whose first part (before "$") matches the event name @@ -96,7 +96,7 @@ {{ $pieces := split $example_name "$" }} {{ $example_base_name := index $pieces 0 }} {{ if eq $event_name $example_base_name }} - {{ partial "events/example" $example }} + {{ partial "events/example" (dict "schema" $example "name" $example_name) }} {{ end }} {{ end }} {{ end }} diff --git a/layouts/partials/json-schema/resolve-refs.html b/layouts/partials/json-schema/resolve-refs.html index 1d99201d..9a36e413 100644 --- a/layouts/partials/json-schema/resolve-refs.html +++ b/layouts/partials/json-schema/resolve-refs.html @@ -1,7 +1,10 @@ {{/* Resolves the `$ref` JSON schema keyword, by recursively replacing - it with the object it points to. + it with the object it points to, given: + + * `schema`: the schema where the references should be resolved + * `path`: the path of the file containing the schema This template uses [`Scratch`](https://gohugo.io/functions/scratch/) rather than a normal `dict` because with `dict` you can't replace key values: @@ -20,8 +23,12 @@ {{ $scratch.Set "result_map" dict }} {{ $ref_value := index $schema "$ref"}} - {{ if $ref_value}} - {{ $full_path := path.Join $path $ref_value }} + {{ if $ref_value }} + {{ $uri := urls.Parse $path }} + {{ $ref_uri := urls.Parse $ref_value }} + {{ $full_uri := $uri.ResolveReference $ref_uri }} + + {{ $full_path := strings.TrimPrefix "/" $full_uri.Path }} {{/* Apparently Hugo doesn't give us a nice way to split the extension off a filename. */}} @@ -30,11 +37,18 @@ {{ $ref := index site.Data $pieces }} + {{/* If there is a fragment, follow the JSON Pointer */}} + {{ if $full_uri.Fragment }} + {{ $fragment := strings.TrimPrefix "/" $full_uri.Fragment }} + {{ $pieces := split $fragment "/" }} + {{ $ref = index $ref $pieces }} + {{ end }} + {{ $new_path := (path.Split $full_path).Dir}} {{ $result_map := partial "json-schema/resolve-refs" (dict "schema" $ref "path" $new_path)}} {{ if $result_map}} {{ $scratch.Set "result_map" $result_map }} - {{end }} + {{ end }} {{ end }} diff --git a/layouts/partials/openapi/render-parameters.html b/layouts/partials/openapi/render-parameters.html index 925b0197..ecabfc05 100644 --- a/layouts/partials/openapi/render-parameters.html +++ b/layouts/partials/openapi/render-parameters.html @@ -5,6 +5,7 @@ * `parameters`: OpenAPI data specifying the parameters * `type`: the type of parameters to render: "header, ""path", "query" * `caption`: caption to use for the table + * `path`: the path where this definition was found, to enable us to resolve "$ref" This template renders a single table containing parameters of the given type. @@ -13,7 +14,9 @@ {{ $parameters := .parameters }} {{ $type := .type }} {{ $caption := .caption }} +{{ $path := .path }} +{{ $parameters = partial "json-schema/resolve-refs" (dict "schema" $parameters "path" $path) }} {{ $parameters_of_type := where $parameters "in" $type }} {{ with $parameters_of_type }} @@ -32,5 +35,4 @@ {{/* and render the parameters */}} {{ partial "openapi/render-object-table" (dict "title" $caption "properties" $param_dict) }} - {{ end }} diff --git a/layouts/partials/openapi/render-request.html b/layouts/partials/openapi/render-request.html index 3d4b0381..5ef55c64 100644 --- a/layouts/partials/openapi/render-request.html +++ b/layouts/partials/openapi/render-request.html @@ -26,9 +26,9 @@ {{ if $parameters }}

Request parameters

- {{ partial "openapi/render-parameters" (dict "parameters" $parameters "type" "header" "caption" "header parameters") }} - {{ partial "openapi/render-parameters" (dict "parameters" $parameters "type" "path" "caption" "path parameters") }} - {{ partial "openapi/render-parameters" (dict "parameters" $parameters "type" "query" "caption" "query parameters") }} + {{ partial "openapi/render-parameters" (dict "parameters" $parameters "type" "header" "caption" "header parameters" "path" .path) }} + {{ partial "openapi/render-parameters" (dict "parameters" $parameters "type" "path" "caption" "path parameters" "path" .path) }} + {{ partial "openapi/render-parameters" (dict "parameters" $parameters "type" "query" "caption" "query parameters" "path" .path) }} {{ end }} diff --git a/layouts/partials/openapi/render-responses.html b/layouts/partials/openapi/render-responses.html index 99662ad0..82c2f954 100644 --- a/layouts/partials/openapi/render-responses.html +++ b/layouts/partials/openapi/render-responses.html @@ -26,6 +26,8 @@ Description +{{ $responses = partial "json-schema/resolve-refs" (dict "schema" $responses "path" $path) }} + {{ range $code, $response := $responses }} @@ -49,8 +51,7 @@ Display the JSON schemas */}} - {{ $schema := partial "json-schema/resolve-refs" (dict "schema" $json_body.schema "path" $path) }} - {{ $schema := partial "json-schema/resolve-allof" $schema }} + {{ $schema := partial "json-schema/resolve-allof" $json_body.schema }} {{/* All this is to work out how to express the content of the response diff --git a/layouts/shortcodes/definition.html b/layouts/shortcodes/definition.html index 0699ff7b..23461878 100644 --- a/layouts/shortcodes/definition.html +++ b/layouts/shortcodes/definition.html @@ -22,10 +22,6 @@ {{ errorf "site data %s not found" $path }} {{ end }} -{{/* The base path, which we use to resolve $ref, omits the last component */}} -{{ $pieces = first (sub (len $pieces) 1) $pieces}} -{{ $path = delimit $pieces "/" }} - {{/* Resolve $ref and allOf */}} {{ $definition = partial "json-schema/resolve-refs" (dict "schema" $definition "path" $path) }} {{ $definition = partial "json-schema/resolve-allof" $definition }} diff --git a/layouts/shortcodes/event.html b/layouts/shortcodes/event.html index a9838542..c671318a 100644 --- a/layouts/shortcodes/event.html +++ b/layouts/shortcodes/event.html @@ -25,7 +25,7 @@ */}} {{ $event_data := index .Site.Data "event-schemas" "schema" .Params.event }} -{{ $path := "event-schemas/schema" }} +{{ $path := delimit (slice "event-schemas/schema" .Params.event) "/" }} {{ $event_data = partial "json-schema/resolve-refs" (dict "schema" $event_data "path" $path) }} {{ $event_data := partial "json-schema/resolve-allof" $event_data }} diff --git a/layouts/shortcodes/http-api.html b/layouts/shortcodes/http-api.html index a3b706db..43d08b9e 100644 --- a/layouts/shortcodes/http-api.html +++ b/layouts/shortcodes/http-api.html @@ -21,6 +21,6 @@ {{ $api_data := index .Site.Data.api .Params.spec .Params.api }} {{ $base_url := (index $api_data.servers 0).variables.basePath.default }} -{{ $path := delimit (slice "api" $spec) "/" }} +{{ $path := delimit (slice "api" $spec $api) "/" }} {{ partial "openapi/render-api" (dict "api_data" $api_data "base_url" $base_url "path" $path) }} diff --git a/layouts/shortcodes/msgtypes.html b/layouts/shortcodes/msgtypes.html index ba731111..1ab28aae 100644 --- a/layouts/shortcodes/msgtypes.html +++ b/layouts/shortcodes/msgtypes.html @@ -6,7 +6,6 @@ */}} -{{ $path := "event-schemas/schema" }} {{ $compact := false }} {{/* @@ -40,6 +39,7 @@ {{ range $msgtypes }} {{ $event_data := index $site_data "event-schemas" "schema" . }} + {{ $path := delimit (slice "event-schemas/schema" .) "/" }} {{ $event_data = partial "json-schema/resolve-refs" (dict "schema" $event_data "path" $path) }} {{ $event_data := partial "json-schema/resolve-allof" $event_data }} diff --git a/scripts/check-event-schema-examples.py b/scripts/check-event-schema-examples.py index b258ca2e..9058ff4e 100755 --- a/scripts/check-event-schema-examples.py +++ b/scripts/check-event-schema-examples.py @@ -18,6 +18,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import helpers import sys import json import os @@ -48,51 +49,16 @@ except ImportError as e: raise -def load_file(path): - print("Loading reference: %s" % path) - if not path.startswith("file://"): - raise Exception("Bad ref: %s" % (path,)) - path = path[len("file://"):] - with open(path, "r") as f: - if path.endswith(".json"): - return json.load(f) - else: - # We have to assume it's YAML because some of the YAML examples - # do not have file extensions. - return yaml.safe_load(f) - - -def resolve_references(path, schema): - if isinstance(schema, dict): - # do $ref first - if '$ref' in schema: - value = schema['$ref'] - path = os.path.abspath(os.path.join(os.path.dirname(path), value)) - ref = load_file("file://" + path) - result = resolve_references(path, ref) - del schema['$ref'] - else: - result = {} - - for key, value in schema.items(): - result[key] = resolve_references(path, value) - return result - elif isinstance(schema, list): - return [resolve_references(path, value) for value in schema] - else: - return schema - - def check_example_file(examplepath, schemapath): with open(examplepath) as f: - example = resolve_references(examplepath, json.load(f)) + example = helpers.resolve_references(examplepath, json.load(f)) with open(schemapath) as f: schema = yaml.safe_load(f) fileurl = "file://" + os.path.abspath(schemapath) schema["id"] = fileurl - resolver = jsonschema.RefResolver(fileurl, schema, handlers={"file": load_file}) + resolver = jsonschema.RefResolver(fileurl, schema, handlers={"file": helpers.load_file_from_uri}) print ("Checking schema for: %r %r" % (examplepath, schemapath)) try: diff --git a/scripts/check-json-schemas.py b/scripts/check-json-schemas.py index 3901300f..06b24106 100755 --- a/scripts/check-json-schemas.py +++ b/scripts/check-json-schemas.py @@ -18,6 +18,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import helpers import sys import json import os @@ -67,23 +68,11 @@ class SchemaDirReport: def add(self, other_report): self.files += other_report.files self.errors += other_report.errors - -def load_file(path): - if not path.startswith("file://"): - raise Exception(f"Bad ref: {path}") - path = path[len("file://"):] - with open(path, "r") as f: - if path.endswith(".json"): - return json.load(f) - else: - # We have to assume it's YAML because some of the YAML examples - # do not have file extensions. - return yaml.safe_load(f) def check_example(path, schema, example): # URI with scheme is necessary to make RefResolver work. fileurl = "file://" + os.path.abspath(path) - resolver = jsonschema.RefResolver(fileurl, schema, handlers={"file": load_file}) + resolver = jsonschema.RefResolver(fileurl, schema, handlers={"file": helpers.load_file_from_uri}) validator = jsonschema.Draft202012Validator(schema, resolver) validator.validate(example) diff --git a/scripts/check-openapi-sources.py b/scripts/check-openapi-sources.py index 467e8091..7f28d860 100755 --- a/scripts/check-openapi-sources.py +++ b/scripts/check-openapi-sources.py @@ -19,6 +19,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import helpers import sys import json import os @@ -49,9 +50,7 @@ except ImportError as e: def check_schema(filepath, example, schema): - example = resolve_references(filepath, example) - schema = resolve_references(filepath, schema) - resolver = jsonschema.RefResolver(filepath, schema, handlers={"file": load_file}) + resolver = jsonschema.RefResolver(filepath, schema, handlers={"file": helpers.load_file_from_uri}) validator = jsonschema.Draft202012Validator(schema, resolver) validator.validate(example) @@ -120,6 +119,8 @@ def check_openapi_file(filepath): with open(filepath) as f: openapi = yaml.safe_load(f) + openapi = helpers.resolve_references(filepath, openapi) + openapi_version = openapi.get('openapi') if not openapi_version: # This is not an OpenAPI file, skip. @@ -149,64 +150,6 @@ def check_openapi_file(filepath): check_response(filepath, request, code, json_response) -def resolve_references(path, schema): - """Recurse through a given schema until we find a $ref key. Upon doing so, - check that the referenced file exists, then load it up and check all of the - references in that file. Continue on until we've hit all dead ends. - - $ref values are deleted from schemas as they are validated, to prevent - duplicate work. - """ - if isinstance(schema, dict): - # do $ref first - if '$ref' in schema: - # Pull the referenced filepath from the schema - referenced_file = schema['$ref'] - - # Referenced filepaths are relative, so take the current path's - # directory and append the relative, referenced path to it. - inner_path = os.path.join(os.path.dirname(path), referenced_file) - - # Then convert the path (which may contiain '../') into a - # normalised, absolute path - inner_path = os.path.abspath(inner_path) - - # Load the referenced file - ref = load_file("file://" + inner_path) - - # Check that the references in *this* file are valid - result = resolve_references(inner_path, ref) - - # They were valid, and so were the sub-references. Delete - # the reference here to ensure we don't pass over it again - # when checking other files - del schema['$ref'] - else: - result = {} - - for key, value in schema.items(): - result[key] = resolve_references(path, value) - return result - elif isinstance(schema, list): - return [resolve_references(path, value) for value in schema] - else: - return schema - - -def load_file(path): - print("Loading reference: %s" % path) - if not path.startswith("file://"): - raise Exception("Bad ref: %s" % (path,)) - path = path[len("file://"):] - with open(path, "r") as f: - if path.endswith(".json"): - return json.load(f) - else: - # We have to assume it's YAML because some of the YAML examples - # do not have file extensions. - return yaml.safe_load(f) - - if __name__ == '__main__': # Get the directory that this script is residing in script_directory = os.path.dirname(os.path.realpath(__file__)) diff --git a/scripts/dump-openapi.py b/scripts/dump-openapi.py index 1cc2279c..490ac9bf 100755 --- a/scripts/dump-openapi.py +++ b/scripts/dump-openapi.py @@ -20,6 +20,7 @@ import argparse import errno +import helpers import json import logging import os.path @@ -31,34 +32,6 @@ import yaml scripts_dir = os.path.dirname(os.path.abspath(__file__)) api_dir = os.path.join(os.path.dirname(scripts_dir), "data", "api") -def resolve_references(path, schema): - if isinstance(schema, dict): - # do $ref first - if '$ref' in schema: - value = schema['$ref'] - previous_path = path - path = os.path.join(os.path.dirname(path), value) - try: - with open(path, encoding="utf-8") as f: - ref = yaml.safe_load(f) - result = resolve_references(path, ref) - del schema['$ref'] - path = previous_path - except FileNotFoundError: - print("Resolving {}".format(schema)) - print("File not found: {}".format(path)) - result = {} - else: - result = {} - - for key, value in schema.items(): - result[key] = resolve_references(path, value) - return result - elif isinstance(schema, list): - return [resolve_references(path, value) for value in schema] - else: - return schema - def prefix_absolute_path_references(text, base_url): """Adds base_url to absolute-path references. @@ -176,7 +149,7 @@ for filename in os.listdir(selected_api_dir): print("Reading OpenAPI: %s" % filepath) with open(filepath, "r") as f: api = yaml.safe_load(f.read()) - api = resolve_references(filepath, api) + api = helpers.resolve_references(filepath, api) basePath = api['servers'][0]['variables']['basePath']['default'] for path, methods in api["paths"].items(): diff --git a/scripts/helpers.py b/scripts/helpers.py new file mode 100755 index 00000000..c35e8e2a --- /dev/null +++ b/scripts/helpers.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +# Helpers to resolve $ref recursively in OpenAPI and JSON schemas. + +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import os.path +import urllib.parse +import yaml + +def resolve_references(path, schema): + """Recurse through a given schema until we find a $ref key. Upon doing so, + check that the referenced file exists, then load it up and check all of the + references in that file. Continue on until we've hit all dead ends. + + $ref values are deleted from schemas as they are validated, to prevent + duplicate work. + """ + if isinstance(schema, dict): + # do $ref first + if '$ref' in schema: + # Pull the referenced URI from the schema + ref_uri = schema['$ref'] + + # Join the referenced URI with the URI of the file, to resolve + # relative URIs + full_ref_uri = urllib.parse.urljoin("file://" + path, ref_uri) + + # Separate the fragment. + (full_ref_uri, fragment) = urllib.parse.urldefrag(full_ref_uri) + + # Load the referenced file + ref = load_file_from_uri(full_ref_uri) + + if fragment: + # The fragment should be a JSON Pointer + keys = fragment.strip('/').split('/') + for key in keys: + ref = ref[key] + + # Check that the references in *this* file are valid + result = resolve_references(urllib.parse.urlsplit(full_ref_uri).path, ref) + + # They were valid, and so were the sub-references. Delete + # the reference here to ensure we don't pass over it again + # when checking other files + del schema['$ref'] + else: + result = {} + + for key, value in schema.items(): + result[key] = resolve_references(path, value) + return result + elif isinstance(schema, list): + return [resolve_references(path, value) for value in schema] + else: + return schema + + +def load_file_from_uri(path): + """Load a JSON or YAML file from a file:// URI. + """ + print("Loading reference: %s" % path) + if not path.startswith("file://"): + raise Exception("Bad ref: %s" % (path,)) + path = path[len("file://"):] + with open(path, "r") as f: + if path.endswith(".json"): + return json.load(f) + else: + # We have to assume it's YAML because some of the YAML examples + # do not have file extensions. + return yaml.safe_load(f) \ No newline at end of file