From 5b66d95df0f78dcac6437c4aa6e43198369622c4 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Mon, 9 Dec 2024 05:28:46 +0100 Subject: [PATCH 1/9] feat: add oold utils --- src/osw/utils/oold.py | 398 ++++++++++++++++++++++++++++++++++++++++++ tests/utils/oold.py | 135 ++++++++++++++ 2 files changed, 533 insertions(+) create mode 100644 src/osw/utils/oold.py create mode 100644 tests/utils/oold.py diff --git a/src/osw/utils/oold.py b/src/osw/utils/oold.py new file mode 100644 index 00000000..2cdc8149 --- /dev/null +++ b/src/osw/utils/oold.py @@ -0,0 +1,398 @@ +"""Contains essential functions for working with JSON, JSON-SCHEMA +and JSON-LD context objects. Python implementation of +https://github.com/OpenSemanticLab/mediawiki-extensions-MwJson/blob/main/modules/ext.MwJson.util/MwJson_util.js +""" + +from copy import deepcopy +from enum import StrEnum +from typing import Dict, Optional, TypeVar + +from pydantic import BaseModel +from typing_extensions import deprecated + +JsonType = TypeVar("JsonType", dict, list, float, int, str, None) + + +def deep_equal(x: JsonType, y: JsonType): + """Compares two objects deeply. + + Parameters + ---------- + x + a dictionary, list or scalar value + y + another dictionary, list or scalar value + + Returns + ------- + True if the two objects are deeply equal, False otherwise + """ + + if x is not None and y is not None and isinstance(x, dict) and isinstance(y, dict): + return len(x.keys()) == len(y.keys()) and all( + deep_equal(x[key], y.get(key, None)) for key in x + ) + elif ( + x is not None and y is not None and isinstance(x, list) and isinstance(y, list) + ): + return len(x) == len(y) and all( + deep_equal(x[key], y[key]) for key in range(0, len(x)) + ) + else: + return x == y + # all(deep_equal(x[key], y.get(key)) for key in x) or x == y + + +def unique_array(array: list) -> list: + """Returns a new array with only unique elements by comparing them deeply. + + Parameters: + array: list + The array to be filtered + + Returns: + list + A new array with only unique elements + """ + result = [] + for item in array: + add = True + for added_item in result: + if deep_equal(added_item, item): + add = False + break + if add: + result.append(item) + return result + + +def is_object(obj): + """Tests if an object is a dictionary. + + Parameters + ---------- + obj + the object to be tested + + Returns + ------- + True if the object is a dictionary, False otherwise + """ + return isinstance(obj, dict) + + +def is_array(obj): + """Tests if an object is a list. + + Parameters + ---------- + obj + the object to be tested + + Returns + ------- + True if the object is a list, False otherwise + """ + return isinstance(obj, list) + + +def is_string(obj): + """Tests if an object is a string. + + Parameters + ---------- + obj + the object to be tested + + Returns + ------- + True if the object is a string, False otherwise + """ + return isinstance(obj, str) + + +def copy_deep(target: JsonType) -> JsonType: + """Copies an object deeply. + + Parameters + ---------- + target + the object which values will be copied + + Returns + ------- + the copied object + """ + return deepcopy(target) + + +@deprecated("Use merge_deep instead") +def merge_deep_objects(target: dict, source: dict) -> JsonType: + """Merges two objects deeply, does not handle lists. + If dictionaries are encountered, the values of the source object + will overwrite the target object. + Missing keys in the target object will be added. + If an array is encountered as a subelement, the arrays are + concatenated and duplicates are removed. + If literals are encountered, the source value will + overwrite the target value. + + Parameters + ---------- + target + the object which values will be potentially overwritten + source + the object which values will take precedence over the target object + + Returns + ------- + the merged object + """ + if not target: + return source + if not source: + return target + output = deepcopy(target) + if is_object(target) and is_object(source): + for key in source: + if is_array(source[key]) and is_array(target.get(key)): + if key not in target: + output[key] = source[key] + else: + output[key] = unique_array(target[key] + source[key]) + elif is_object(source[key]): + if key not in target: + output[key] = source[key] + else: + output[key] = merge_deep(target[key], source[key]) + else: + output[key] = source[key] + + return output + + +def merge_deep(target: JsonType, source: JsonType) -> JsonType: + """Merges two objects deeply. + If dictionaries are encountered, the values of the source object + will overwrite the target object. + Missing keys in the target object will be added. + If an array is encountered as a subelement, the arrays are + concatenated and duplicates are removed. + If literals are encountered, the source value will + overwrite the target value. + + Parameters + ---------- + target + the object which values will be potentially overwritten + source + the object which values will take precedence over the target object + + Returns + ------- + the merged object + """ + if not target: + return source + if not source: + return target + output = deepcopy(target) + + if is_object(target) and is_object(source): + for key in source: + output[key] = merge_deep(output.get(key, None), source[key]) + elif is_array(source) and is_array(target): + output = unique_array(target + source) + else: + output = source + return output + + +def merge_jsonld_context_object_list(context: list) -> list: + """to cleanup generated json-ld context + ["/some/remove/context", {"a": "ex:a"}, {"a": "ex:a", "b": "ex:b"}] + => ["/some/remove/context", {"a": "ex:a", "b": "ex:b"}] + + Parameters + ---------- + list + mixed list of strings and dictionaries + """ + + # interate over all elements + # if element is a string, add it to the result list + # if element is a dictionary, merge it with the last dictionary in the + # result list + + # if not a list, return immediately + if not is_array(context): + return context + + result = [] + last = None + for e in context: + if is_object(e): + if last is None: + last = e + else: + last = merge_deep(last, e) + else: + if last is not None: + result.append(last) + last = None + result.append(e) + if last is not None: + result.append(last) + return result + + +class AggregateGeneratedSchemasParamMode(StrEnum): + ROOT_LEVEL = "root_level" + """ The generated schema is merged at the root level """ + DEFINITIONS_SECTION = "definitions_section" + """ The generated schema is merged into the definitions section """ + + +class AggregateGeneratedSchemasParam(BaseModel): + target_schema: Optional[dict] = {} + """ The target schema to be merged with the generated schema """ + generated_schemas: Dict[str, dict] + """ List of JSON schemas to be aggregated """ + mode: AggregateGeneratedSchemasParamMode = ( + AggregateGeneratedSchemasParamMode.ROOT_LEVEL + ) + """ The mode to be used for aggregation """ + def_key: Optional[str] = "$defs" + """ The keyword for schema definitions. $defs is recommended""" + gen_def_key: Optional[str] = "generated" + """ The keyword to store the generated schema. + Note: Having a separate section per generated schema would lead + to many partial classes in code generation """ + generate_root_ref: Optional[bool] = False + """ If true, generate $ref: "#/def...", else allOf: [{$ref: "#/def...""}. + Root refs are not supported by json_ref_parser < 0.10 and data-model-codegen """ + gen_def_pointer: Optional[str] = None + """ The pointer to the generated schema. If None, it will be set to + "#/" + def_key + "/" + gen_def_key """ + + def __init__(self, **data): + super().__init__(**data) + if self.gen_def_pointer is None: + self.gen_def_pointer = "#/" + self.def_key + "/" + self.gen_def_key + + +class AggregateGeneratedSchemasResult(BaseModel): + aggregated_schema: dict + """ The aggregated schema """ + + +def aggregate_generated_schemas( + param: AggregateGeneratedSchemasParam, +) -> AggregateGeneratedSchemasResult: + """Applies a merge operation on two OO-LD schemas. + + Parameters + ---------- + param + see AggregateGeneratedSchemasParam + + Returns + ------- + see AggregateGeneratedSchemasResult + """ + mode = param.mode + def_key = param.def_key + gen_def_key = param.gen_def_key + gen_def_pointer = param.gen_def_pointer + generate_root_ref = param.generate_root_ref + schema = param.target_schema + + for generated_schema_id in param.generated_schemas.keys(): + generated_schema = param.generated_schemas[generated_schema_id] + if mode == AggregateGeneratedSchemasParamMode.ROOT_LEVEL: + schema = merge_deep(schema, generated_schema) + else: + # Store generated schema in #/$defs/generated (force overwrite), + # add $ref: #/$defs/generated to schema + # note: using $def with $ leads to recursion error in + # note: requires addition schema properties are allowed on the + # same level as $ref. allOf: $ref would imply a superclass + if "@context" in generated_schema: + generated_context = copy_deep(generated_schema["@context"]) + del generated_schema["@context"] + existing_context = schema.get("@context", None) + if existing_context is not None: + # case A: "" + "" => ["", ""] + # case B: "" + {} => ["", {}] + # case C: "" + [] => ["", ] + # case D: [] + {} => [, {}] + # case E: {} + {} => {} + # case F: [] + [] => [] + + if is_array(existing_context) and not is_array(generated_context): + generated_context = [generated_context] + # case C + D + elif not is_array(existing_context) and is_array(generated_context): + existing_context = [existing_context] + # case C + D + elif not is_array(existing_context) and not is_array( + generated_context + ): + if is_string(existing_context) or is_string( + existing_context + ): # case A + B + generated_context = [generated_context] + existing_context = [existing_context] + # case E + F: nothing to do + schema["@context"] = merge_deep( + {"@context": existing_context}, {"@context": generated_context} + )["@context"] + if is_array(schema["@context"]): + schema["@context"] = merge_jsonld_context_object_list( + schema["@context"] + ) + + if def_key not in schema: + schema[def_key] = {} + if gen_def_key not in schema[def_key]: + schema[def_key][gen_def_key] = { + "$comment": "Autogenerated section - do not edit. Generated from" + } + schema[def_key][gen_def_key]["$comment"] += " " + generated_schema_id + # schema[def_key][gen_def_key] = generated_schema; # full override + schema[def_key][gen_def_key] = merge_deep( + schema[def_key][gen_def_key], generated_schema + ) + # merge + + if generate_root_ref: + if "$ref" in schema and schema["$ref"] != gen_def_pointer: + print( + "Error while applying generated schema: $ref already set to " + + schema["$ref"] + ) + else: + schema["$ref"] = gen_def_pointer + else: + if "allOf" not in schema: + schema["allOf"] = [] + # check if any allOf already points to the generated schema + exists = any( + [allOf["$ref"] == gen_def_pointer for allOf in schema["allOf"]] + ) + if not exists: + schema["allOf"].append({"$ref": gen_def_pointer}) + if "title" in generated_schema: + schema["title"] = generated_schema["title"] + schema[def_key][gen_def_key]["title"] = ( + "Generated" + generated_schema["title"] + ) + schema[def_key][gen_def_key]["description"] = ( + "This is an autogenerated partial class definition of '" + + generated_schema["title"] + + "'" + ) + if "description" in generated_schema: + schema["description"] = generated_schema["description"] + + return AggregateGeneratedSchemasResult(aggregated_schema=schema) diff --git a/tests/utils/oold.py b/tests/utils/oold.py new file mode 100644 index 00000000..84b5069f --- /dev/null +++ b/tests/utils/oold.py @@ -0,0 +1,135 @@ +from osw.utils.oold import ( + AggregateGeneratedSchemasParam, + AggregateGeneratedSchemasParamMode, + aggregate_generated_schemas, + deep_equal, + merge_deep, + merge_jsonld_context_object_list, + unique_array, +) + + +def test_deep_equal(): + assert deep_equal({"a": 1, "b": 2}, {"a": 1, "b": 2}) is True + assert deep_equal({"a": 1, "b": 2}, {"a": 1, "b": 3}) is False + assert deep_equal({"a": 1, "b": 2}, {"a": 1}) is False + assert deep_equal({"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}) is False + assert deep_equal({"a": 1, "b": 2}, {"b": 2, "a": 1}) is True + + assert deep_equal([1, 2, 3], [1, 2, 3]) is True + assert deep_equal([1, 2, 3], [1, 2, 4]) is False + assert deep_equal([1, 2, 3], [1, 2]) is False + assert deep_equal([1, 2, 3], [1, 2, 3, 4]) is False + + assert deep_equal({"a": 1, "b": [1, 2, 3]}, {"a": 1, "b": [1, 2, 3]}) is True + assert deep_equal({"a": 1, "b": [1, 2, 3]}, {"a": 1, "b": [1, 2, 4]}) is False + + +def test_unique_array(): + assert unique_array([1, 2, 3, 1, 2, 3]) == [1, 2, 3] + assert unique_array([1, 2, 3]) == [1, 2, 3] + assert unique_array([1, 2, 3, 4, 5]) == [1, 2, 3, 4, 5] + assert unique_array([1, 2, 3, 4, 5, 1, 2, 3]) == [1, 2, 3, 4, 5] + + +def test_deep_merge(): + assert merge_deep({"a": 1, "b": 2}, {"a": 1, "b": 2}) == {"a": 1, "b": 2} + assert merge_deep({"a": 1, "b": 2}, {"a": 1, "b": 3}) == {"a": 1, "b": 3} + assert merge_deep({"a": 1, "b": 2}, {"a": 1}) == {"a": 1, "b": 2} + assert merge_deep({"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}) == { + "a": 1, + "b": 2, + "c": 3, + } + assert merge_deep({"a": 1, "b": 2}, {"b": 2, "a": 1}) == {"a": 1, "b": 2} + + assert merge_deep([1, 2, 3], [1, 2, 3]) == [1, 2, 3] + assert merge_deep([1, 2, 3], [1, 2, 4]) == [1, 2, 3, 4] + assert merge_deep([1, 2, 3], [1, 2]) == [1, 2, 3] + assert merge_deep([1, 2, 3], [1, 2, 3, 4]) == [1, 2, 3, 4] + assert merge_deep([1, 2, {"a": 2}, 3], [1, {"a": 2}, 4]) == [1, 2, {"a": 2}, 3, 4] + + assert merge_deep({"a": 1, "b": [1, 2, 3]}, {"a": 1, "b": [1, 2, 3]}) == { + "a": 1, + "b": [1, 2, 3], + } + assert merge_deep({"a": 1, "b": [1, 2, 3]}, {"a": 1, "b": [1, 2, 4]}) == { + "a": 1, + "b": [1, 2, 3, 4], + } + + +def test_merge_jsonld_context_object_list(): + assert merge_jsonld_context_object_list(["a", "b", "c"]) == ["a", "b", "c"] + assert merge_jsonld_context_object_list(["a", {"b": 1}, "c"]) == [ + "a", + {"b": 1}, + "c", + ] + assert merge_jsonld_context_object_list(["a", {"b": 1}, {"c": 2}]) == [ + "a", + {"b": 1, "c": 2}, + ] + assert merge_jsonld_context_object_list(["a", {"b": 1}, {"b": 2}]) == [ + "a", + {"b": 2}, + ] + assert merge_jsonld_context_object_list(["a", {"b": 1}, "c", {"b": 2}]) == [ + "a", + {"b": 1}, + "c", + {"b": 2}, + ] + + +def test_aggregate_generated_schemas(): + param = AggregateGeneratedSchemasParam( + target_schema={"a": 1, "b": 2}, + generated_schemas={"schema1": {"b": 3, "c": 4}, "schema2": {"b": 5, "d": 6}}, + mode=AggregateGeneratedSchemasParamMode.ROOT_LEVEL, + ) + result = aggregate_generated_schemas(param) + assert result.aggregated_schema == {"a": 1, "b": 5, "c": 4, "d": 6} + + param = AggregateGeneratedSchemasParam( + target_schema={"a": 1, "b": 2}, + generated_schemas={"schema1": {"b": 3, "c": 4}, "schema2": {"b": 5, "d": 6}}, + mode=AggregateGeneratedSchemasParamMode.DEFINITIONS_SECTION, + ) + result = aggregate_generated_schemas(param) + assert result.aggregated_schema == { + "a": 1, + "b": 2, + "$defs": { + "generated": { + "$comment": "Autogenerated section - do not edit. " + "Generated from schema1 schema2", + "b": 5, + "c": 4, + "d": 6, + } + }, + "allOf": [{"$ref": "#/$defs/generated"}], + } + + param = AggregateGeneratedSchemasParam( + target_schema={"a": 1, "b": 2}, + generated_schemas={"schema1": {"b": 3, "c": 4}, "schema2": {"b": 5, "d": 6}}, + mode=AggregateGeneratedSchemasParamMode.DEFINITIONS_SECTION, + generate_root_ref=True, + ) + result = aggregate_generated_schemas(param) + assert result.aggregated_schema == { + "a": 1, + "b": 2, + "$defs": { + "generated": { + "$comment": "Autogenerated section - do not edit. " + "Generated from schema1 schema2", + "b": 5, + "c": 4, + "d": 6, + } + }, + "$ref": "#/$defs/generated", + } From efb836944b64e40bfbef227c82e354c66d8e1c92 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Mon, 9 Dec 2024 05:58:42 +0100 Subject: [PATCH 2/9] feat: add join helper --- src/osw/utils/templates.py | 44 +++++++++++++++++++++++++++ tests/utils/templates_test.py | 56 +++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/src/osw/utils/templates.py b/src/osw/utils/templates.py index 4eb6b36d..967b6988 100644 --- a/src/osw/utils/templates.py +++ b/src/osw/utils/templates.py @@ -19,6 +19,43 @@ def compile_handlebars_template(template): return compiled_template +# Python implementations of handlebars helpers +# https://github.com/OpenSemanticLab/mediawiki-extensions-MwJson/blob/main/modules/ext.MwJson.editor/MwJson_editor.js#L1342 + + +def helper_join(this, options, context, separator=None, intro=None, outro=None): + """ + removes all empty interation results and delimits them with + the given separator (default: ", ") + {{#join literal_array }}{{.}}{{/join}} + {{#join object_array ", " "[" "]"}}{{#if print}}{{value}}{{/if}}{{/join}} + """ + + # handle optional params + if intro is None: + intro = "" + if outro is None: + outro = "" + if separator is None: + separator = ", " + + if context is None: + context = [] + items = [] + + for e in context: + inner = "".join(options["fn"](e)) + items.append(inner) + + # Remove empty or whitespace-only elements + items = [item for item in items if item.strip() != ""] + if len(items) == 0: + intro = outro = "" + + # Join with separator, wrap with intro + outro + return intro + separator.join(items) + outro + + def eval_compiled_handlebars_template( compiled_template, data, helpers=None, partials=None, add_self_as_partial=True ): @@ -41,12 +78,19 @@ def eval_compiled_handlebars_template( ------- the evaluated template as a string """ + + default_helpers = { + "join": helper_join, + } if helpers is None: helpers = {} + helpers = {**default_helpers, **helpers} + if partials is None: partials = {} if add_self_as_partial: partials["self"] = compiled_template + return compiled_template(data, helpers=helpers, partials=partials) diff --git a/tests/utils/templates_test.py b/tests/utils/templates_test.py index 26fa870c..7bb6228f 100644 --- a/tests/utils/templates_test.py +++ b/tests/utils/templates_test.py @@ -271,3 +271,59 @@ def test_metamodel_template(): output = json.loads(eval_handlebars_template(template, data)) assert output == json.loads(expected) + + +def test_helper_join(): + template = """{ + "@context": [{ + {{#join properties}} + {{#if rdf_property}}"{{name}}": "{{rdf_property}}"{{/if}} + {{/join}} + }] + }""" + + data = { + "properties": [ + { + "name": "test_property", + "rdf_property": "Property:TestPropertyWithSchema", + }, + { + "name": "test_property2", + "rdf_property": "Property:TestPropertyWithSchema", + }, + ] + } + + expected = { + "@context": [ + { + "test_property": "Property:TestPropertyWithSchema", + "test_property2": "Property:TestPropertyWithSchema", + } + ] + } + + output = json.loads(eval_handlebars_template(template, data)) + + assert output == expected + + template = """ + {{#join object_array ", " "[" "]"}}{{#if print}}{{value}}{{/if}}{{/join}} + """ + + data = { + "object_array": [ + {"value": 1, "print": True}, + {"value": 2}, + {"value": 3, "print": True}, + ] + } + + expected = [1, 3] + + output = json.loads(eval_handlebars_template(template, data)) + assert output == expected + + +test_helper_join() From 4e52cb93b4ee4957f9c8780721685a5f117f78fc Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Mon, 9 Dec 2024 06:19:21 +0100 Subject: [PATCH 3/9] feat: apply schema templates recursive --- src/osw/core.py | 107 ++++++++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 44 deletions(-) diff --git a/src/osw/core.py b/src/osw/core.py index 9ddc37ec..6aaaf473 100644 --- a/src/osw/core.py +++ b/src/osw/core.py @@ -19,6 +19,11 @@ import osw.model.entity as model from osw.model.static import OswBaseModel +from osw.utils.oold import ( + AggregateGeneratedSchemasParam, + AggregateGeneratedSchemasParamMode, + aggregate_generated_schemas, +) from osw.utils.templates import ( compile_handlebars_template, eval_compiled_handlebars_template, @@ -779,8 +784,6 @@ class _ApplyOverwriteParam(OswBaseModel): entity: OswBaseModel # actually model.Entity but this causes the "type" error policy: Union[OSW.OverwriteClassParam, OVERWRITE_CLASS_OPTIONS] namespace: Optional[str] - meta_category_title: Optional[str] - meta_category_template_str: Optional[str] remove_empty: Optional[bool] = True inplace: Optional[bool] = False debug: Optional[bool] = False @@ -988,7 +991,7 @@ class StoreEntityParam(OswBaseModel): """ID to document the change. Entities within the same store_entity() call will share the same change_id. This parameter can also be used to link multiple store_entity() calls.""" - meta_category_title: Optional[str] = "Category:Category" + meta_category_title: Optional[Union[str, List[str]]] = "Category:Category" debug: Optional[bool] = False _overwrite_per_class: Dict[str, Dict[str, OSW.OverwriteClassParam]] = ( PrivateAttr() @@ -1061,25 +1064,35 @@ def store_entity( max_index = len(param.entities) - meta_category = self.site.get_page( - WtSite.GetPageParam(titles=[param.meta_category_title]) - ).pages[0] - # ToDo: we have to do this iteratively to support meta categories inheritance - meta_category_template_str = meta_category.get_slot_content("schema_template") - meta_category_template = None + meta_category_templates = {} if param.namespace == "Category": - if param.meta_category_title: - meta_category = self.site.get_page( - WtSite.GetPageParam(titles=[param.meta_category_title]) - ).pages[0] - meta_category_template_str = meta_category.get_slot_content( - "schema_template" - ) - if meta_category_template_str: - meta_category_template = compile_handlebars_template( - meta_category_template_str + meta_category_titles = param.meta_category_title + if not isinstance(meta_category_titles, list): + meta_category_titles = [meta_category_titles] + meta_category_template_strs = {} + # We have to do this iteratively to support meta categories inheritance + while meta_category_titles is not None and len(meta_category_titles) > 0: + meta_categories = self.site.get_page( + WtSite.GetPageParam(titles=meta_category_titles) + ).pages + for meta_category in meta_categories: + meta_category_template_strs[meta_category.title] = ( + meta_category.get_slot_content("schema_template") + ) + + meta_category_titles = meta_category.get_slot_content("jsondata").get( + "subclass_of" ) + for title in meta_category_template_strs.keys(): + meta_category_template_str = meta_category_template_strs[title] + if meta_category_template_str: + meta_category_templates[title] = compile_handlebars_template( + meta_category_template_str + ) + # inverse order to have the most generic template first + meta_category_templates = dict(reversed(meta_category_templates.items())) + def store_entity_( entity_: model.Entity, namespace_: str = None, @@ -1101,41 +1114,47 @@ def store_entity_( entity=entity_, namespace=namespace_, policy=overwrite_class_param, - meta_category_template_str=meta_category_template_str, remove_empty=param.remove_empty, debug=param.debug, ) ) - if meta_category_template: + if len(meta_category_templates.keys()) > 0: + generated_schemas = {} try: jsondata = page.get_slot_content("jsondata") if param.remove_empty: remove_empty(jsondata) - schema_str = eval_compiled_handlebars_template( - meta_category_template, - jsondata, - { - "_page_title": entity_title, # Legacy - "_current_subject_": entity_title, - }, - ) - schema = json.loads(schema_str) - # Put generated schema in definitions section, - # currently only enabled for Characteristics - if hasattr(model, "CharacteristicType") and isinstance( - entity_, model.CharacteristicType - ): - new_schema = { - "$defs": {"generated": schema}, - "allOf": [{"$ref": "#/$defs/generated"}], - "@context": schema.pop("@context", None), - "title": schema.pop("title", ""), - } - schema["title"] = "Generated" + new_schema["title"] - schema = new_schema - page.set_slot_content("jsonschema", new_schema) + + for key in meta_category_templates: + meta_category_template = meta_category_templates[key] + schema_str = eval_compiled_handlebars_template( + meta_category_template, + jsondata, + { + "_page_title": entity_title, # Legacy + "_current_subject_": entity_title, + }, + ) + generated_schemas[key] = json.loads(schema_str) except Exception as e: print(f"Schema generation from template failed for {entity_}: {e}") + + mode = AggregateGeneratedSchemasParamMode.ROOT_LEVEL + # Put generated schema in definitions section, + # currently only enabled for Characteristics + if hasattr(model, "CharacteristicType") and isinstance( + entity_, model.CharacteristicType + ): + mode = AggregateGeneratedSchemasParamMode.DEFINITIONS_SECTION + + new_schema = aggregate_generated_schemas( + AggregateGeneratedSchemasParam( + schema=page.get_slot_content("jsonschema"), + generated_schemas=generated_schemas, + mode=mode, + ) + ).aggregated_schema + page.set_slot_content("jsonschema", new_schema) page.edit() # will set page.changed if the content of the page has changed if page.changed: if index is None: From 42d156ea7c2657ba2e0b76782e9e42731126c6ff Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Tue, 10 Dec 2024 10:06:49 +0100 Subject: [PATCH 4/9] feat: add oold utils --- src/osw/utils/oold.py | 398 ++++++++++++++++++++++++++++++++++++++++++ tests/utils/oold.py | 135 ++++++++++++++ 2 files changed, 533 insertions(+) create mode 100644 src/osw/utils/oold.py create mode 100644 tests/utils/oold.py diff --git a/src/osw/utils/oold.py b/src/osw/utils/oold.py new file mode 100644 index 00000000..2cdc8149 --- /dev/null +++ b/src/osw/utils/oold.py @@ -0,0 +1,398 @@ +"""Contains essential functions for working with JSON, JSON-SCHEMA +and JSON-LD context objects. Python implementation of +https://github.com/OpenSemanticLab/mediawiki-extensions-MwJson/blob/main/modules/ext.MwJson.util/MwJson_util.js +""" + +from copy import deepcopy +from enum import StrEnum +from typing import Dict, Optional, TypeVar + +from pydantic import BaseModel +from typing_extensions import deprecated + +JsonType = TypeVar("JsonType", dict, list, float, int, str, None) + + +def deep_equal(x: JsonType, y: JsonType): + """Compares two objects deeply. + + Parameters + ---------- + x + a dictionary, list or scalar value + y + another dictionary, list or scalar value + + Returns + ------- + True if the two objects are deeply equal, False otherwise + """ + + if x is not None and y is not None and isinstance(x, dict) and isinstance(y, dict): + return len(x.keys()) == len(y.keys()) and all( + deep_equal(x[key], y.get(key, None)) for key in x + ) + elif ( + x is not None and y is not None and isinstance(x, list) and isinstance(y, list) + ): + return len(x) == len(y) and all( + deep_equal(x[key], y[key]) for key in range(0, len(x)) + ) + else: + return x == y + # all(deep_equal(x[key], y.get(key)) for key in x) or x == y + + +def unique_array(array: list) -> list: + """Returns a new array with only unique elements by comparing them deeply. + + Parameters: + array: list + The array to be filtered + + Returns: + list + A new array with only unique elements + """ + result = [] + for item in array: + add = True + for added_item in result: + if deep_equal(added_item, item): + add = False + break + if add: + result.append(item) + return result + + +def is_object(obj): + """Tests if an object is a dictionary. + + Parameters + ---------- + obj + the object to be tested + + Returns + ------- + True if the object is a dictionary, False otherwise + """ + return isinstance(obj, dict) + + +def is_array(obj): + """Tests if an object is a list. + + Parameters + ---------- + obj + the object to be tested + + Returns + ------- + True if the object is a list, False otherwise + """ + return isinstance(obj, list) + + +def is_string(obj): + """Tests if an object is a string. + + Parameters + ---------- + obj + the object to be tested + + Returns + ------- + True if the object is a string, False otherwise + """ + return isinstance(obj, str) + + +def copy_deep(target: JsonType) -> JsonType: + """Copies an object deeply. + + Parameters + ---------- + target + the object which values will be copied + + Returns + ------- + the copied object + """ + return deepcopy(target) + + +@deprecated("Use merge_deep instead") +def merge_deep_objects(target: dict, source: dict) -> JsonType: + """Merges two objects deeply, does not handle lists. + If dictionaries are encountered, the values of the source object + will overwrite the target object. + Missing keys in the target object will be added. + If an array is encountered as a subelement, the arrays are + concatenated and duplicates are removed. + If literals are encountered, the source value will + overwrite the target value. + + Parameters + ---------- + target + the object which values will be potentially overwritten + source + the object which values will take precedence over the target object + + Returns + ------- + the merged object + """ + if not target: + return source + if not source: + return target + output = deepcopy(target) + if is_object(target) and is_object(source): + for key in source: + if is_array(source[key]) and is_array(target.get(key)): + if key not in target: + output[key] = source[key] + else: + output[key] = unique_array(target[key] + source[key]) + elif is_object(source[key]): + if key not in target: + output[key] = source[key] + else: + output[key] = merge_deep(target[key], source[key]) + else: + output[key] = source[key] + + return output + + +def merge_deep(target: JsonType, source: JsonType) -> JsonType: + """Merges two objects deeply. + If dictionaries are encountered, the values of the source object + will overwrite the target object. + Missing keys in the target object will be added. + If an array is encountered as a subelement, the arrays are + concatenated and duplicates are removed. + If literals are encountered, the source value will + overwrite the target value. + + Parameters + ---------- + target + the object which values will be potentially overwritten + source + the object which values will take precedence over the target object + + Returns + ------- + the merged object + """ + if not target: + return source + if not source: + return target + output = deepcopy(target) + + if is_object(target) and is_object(source): + for key in source: + output[key] = merge_deep(output.get(key, None), source[key]) + elif is_array(source) and is_array(target): + output = unique_array(target + source) + else: + output = source + return output + + +def merge_jsonld_context_object_list(context: list) -> list: + """to cleanup generated json-ld context + ["/some/remove/context", {"a": "ex:a"}, {"a": "ex:a", "b": "ex:b"}] + => ["/some/remove/context", {"a": "ex:a", "b": "ex:b"}] + + Parameters + ---------- + list + mixed list of strings and dictionaries + """ + + # interate over all elements + # if element is a string, add it to the result list + # if element is a dictionary, merge it with the last dictionary in the + # result list + + # if not a list, return immediately + if not is_array(context): + return context + + result = [] + last = None + for e in context: + if is_object(e): + if last is None: + last = e + else: + last = merge_deep(last, e) + else: + if last is not None: + result.append(last) + last = None + result.append(e) + if last is not None: + result.append(last) + return result + + +class AggregateGeneratedSchemasParamMode(StrEnum): + ROOT_LEVEL = "root_level" + """ The generated schema is merged at the root level """ + DEFINITIONS_SECTION = "definitions_section" + """ The generated schema is merged into the definitions section """ + + +class AggregateGeneratedSchemasParam(BaseModel): + target_schema: Optional[dict] = {} + """ The target schema to be merged with the generated schema """ + generated_schemas: Dict[str, dict] + """ List of JSON schemas to be aggregated """ + mode: AggregateGeneratedSchemasParamMode = ( + AggregateGeneratedSchemasParamMode.ROOT_LEVEL + ) + """ The mode to be used for aggregation """ + def_key: Optional[str] = "$defs" + """ The keyword for schema definitions. $defs is recommended""" + gen_def_key: Optional[str] = "generated" + """ The keyword to store the generated schema. + Note: Having a separate section per generated schema would lead + to many partial classes in code generation """ + generate_root_ref: Optional[bool] = False + """ If true, generate $ref: "#/def...", else allOf: [{$ref: "#/def...""}. + Root refs are not supported by json_ref_parser < 0.10 and data-model-codegen """ + gen_def_pointer: Optional[str] = None + """ The pointer to the generated schema. If None, it will be set to + "#/" + def_key + "/" + gen_def_key """ + + def __init__(self, **data): + super().__init__(**data) + if self.gen_def_pointer is None: + self.gen_def_pointer = "#/" + self.def_key + "/" + self.gen_def_key + + +class AggregateGeneratedSchemasResult(BaseModel): + aggregated_schema: dict + """ The aggregated schema """ + + +def aggregate_generated_schemas( + param: AggregateGeneratedSchemasParam, +) -> AggregateGeneratedSchemasResult: + """Applies a merge operation on two OO-LD schemas. + + Parameters + ---------- + param + see AggregateGeneratedSchemasParam + + Returns + ------- + see AggregateGeneratedSchemasResult + """ + mode = param.mode + def_key = param.def_key + gen_def_key = param.gen_def_key + gen_def_pointer = param.gen_def_pointer + generate_root_ref = param.generate_root_ref + schema = param.target_schema + + for generated_schema_id in param.generated_schemas.keys(): + generated_schema = param.generated_schemas[generated_schema_id] + if mode == AggregateGeneratedSchemasParamMode.ROOT_LEVEL: + schema = merge_deep(schema, generated_schema) + else: + # Store generated schema in #/$defs/generated (force overwrite), + # add $ref: #/$defs/generated to schema + # note: using $def with $ leads to recursion error in + # note: requires addition schema properties are allowed on the + # same level as $ref. allOf: $ref would imply a superclass + if "@context" in generated_schema: + generated_context = copy_deep(generated_schema["@context"]) + del generated_schema["@context"] + existing_context = schema.get("@context", None) + if existing_context is not None: + # case A: "" + "" => ["", ""] + # case B: "" + {} => ["", {}] + # case C: "" + [] => ["", ] + # case D: [] + {} => [, {}] + # case E: {} + {} => {} + # case F: [] + [] => [] + + if is_array(existing_context) and not is_array(generated_context): + generated_context = [generated_context] + # case C + D + elif not is_array(existing_context) and is_array(generated_context): + existing_context = [existing_context] + # case C + D + elif not is_array(existing_context) and not is_array( + generated_context + ): + if is_string(existing_context) or is_string( + existing_context + ): # case A + B + generated_context = [generated_context] + existing_context = [existing_context] + # case E + F: nothing to do + schema["@context"] = merge_deep( + {"@context": existing_context}, {"@context": generated_context} + )["@context"] + if is_array(schema["@context"]): + schema["@context"] = merge_jsonld_context_object_list( + schema["@context"] + ) + + if def_key not in schema: + schema[def_key] = {} + if gen_def_key not in schema[def_key]: + schema[def_key][gen_def_key] = { + "$comment": "Autogenerated section - do not edit. Generated from" + } + schema[def_key][gen_def_key]["$comment"] += " " + generated_schema_id + # schema[def_key][gen_def_key] = generated_schema; # full override + schema[def_key][gen_def_key] = merge_deep( + schema[def_key][gen_def_key], generated_schema + ) + # merge + + if generate_root_ref: + if "$ref" in schema and schema["$ref"] != gen_def_pointer: + print( + "Error while applying generated schema: $ref already set to " + + schema["$ref"] + ) + else: + schema["$ref"] = gen_def_pointer + else: + if "allOf" not in schema: + schema["allOf"] = [] + # check if any allOf already points to the generated schema + exists = any( + [allOf["$ref"] == gen_def_pointer for allOf in schema["allOf"]] + ) + if not exists: + schema["allOf"].append({"$ref": gen_def_pointer}) + if "title" in generated_schema: + schema["title"] = generated_schema["title"] + schema[def_key][gen_def_key]["title"] = ( + "Generated" + generated_schema["title"] + ) + schema[def_key][gen_def_key]["description"] = ( + "This is an autogenerated partial class definition of '" + + generated_schema["title"] + + "'" + ) + if "description" in generated_schema: + schema["description"] = generated_schema["description"] + + return AggregateGeneratedSchemasResult(aggregated_schema=schema) diff --git a/tests/utils/oold.py b/tests/utils/oold.py new file mode 100644 index 00000000..84b5069f --- /dev/null +++ b/tests/utils/oold.py @@ -0,0 +1,135 @@ +from osw.utils.oold import ( + AggregateGeneratedSchemasParam, + AggregateGeneratedSchemasParamMode, + aggregate_generated_schemas, + deep_equal, + merge_deep, + merge_jsonld_context_object_list, + unique_array, +) + + +def test_deep_equal(): + assert deep_equal({"a": 1, "b": 2}, {"a": 1, "b": 2}) is True + assert deep_equal({"a": 1, "b": 2}, {"a": 1, "b": 3}) is False + assert deep_equal({"a": 1, "b": 2}, {"a": 1}) is False + assert deep_equal({"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}) is False + assert deep_equal({"a": 1, "b": 2}, {"b": 2, "a": 1}) is True + + assert deep_equal([1, 2, 3], [1, 2, 3]) is True + assert deep_equal([1, 2, 3], [1, 2, 4]) is False + assert deep_equal([1, 2, 3], [1, 2]) is False + assert deep_equal([1, 2, 3], [1, 2, 3, 4]) is False + + assert deep_equal({"a": 1, "b": [1, 2, 3]}, {"a": 1, "b": [1, 2, 3]}) is True + assert deep_equal({"a": 1, "b": [1, 2, 3]}, {"a": 1, "b": [1, 2, 4]}) is False + + +def test_unique_array(): + assert unique_array([1, 2, 3, 1, 2, 3]) == [1, 2, 3] + assert unique_array([1, 2, 3]) == [1, 2, 3] + assert unique_array([1, 2, 3, 4, 5]) == [1, 2, 3, 4, 5] + assert unique_array([1, 2, 3, 4, 5, 1, 2, 3]) == [1, 2, 3, 4, 5] + + +def test_deep_merge(): + assert merge_deep({"a": 1, "b": 2}, {"a": 1, "b": 2}) == {"a": 1, "b": 2} + assert merge_deep({"a": 1, "b": 2}, {"a": 1, "b": 3}) == {"a": 1, "b": 3} + assert merge_deep({"a": 1, "b": 2}, {"a": 1}) == {"a": 1, "b": 2} + assert merge_deep({"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}) == { + "a": 1, + "b": 2, + "c": 3, + } + assert merge_deep({"a": 1, "b": 2}, {"b": 2, "a": 1}) == {"a": 1, "b": 2} + + assert merge_deep([1, 2, 3], [1, 2, 3]) == [1, 2, 3] + assert merge_deep([1, 2, 3], [1, 2, 4]) == [1, 2, 3, 4] + assert merge_deep([1, 2, 3], [1, 2]) == [1, 2, 3] + assert merge_deep([1, 2, 3], [1, 2, 3, 4]) == [1, 2, 3, 4] + assert merge_deep([1, 2, {"a": 2}, 3], [1, {"a": 2}, 4]) == [1, 2, {"a": 2}, 3, 4] + + assert merge_deep({"a": 1, "b": [1, 2, 3]}, {"a": 1, "b": [1, 2, 3]}) == { + "a": 1, + "b": [1, 2, 3], + } + assert merge_deep({"a": 1, "b": [1, 2, 3]}, {"a": 1, "b": [1, 2, 4]}) == { + "a": 1, + "b": [1, 2, 3, 4], + } + + +def test_merge_jsonld_context_object_list(): + assert merge_jsonld_context_object_list(["a", "b", "c"]) == ["a", "b", "c"] + assert merge_jsonld_context_object_list(["a", {"b": 1}, "c"]) == [ + "a", + {"b": 1}, + "c", + ] + assert merge_jsonld_context_object_list(["a", {"b": 1}, {"c": 2}]) == [ + "a", + {"b": 1, "c": 2}, + ] + assert merge_jsonld_context_object_list(["a", {"b": 1}, {"b": 2}]) == [ + "a", + {"b": 2}, + ] + assert merge_jsonld_context_object_list(["a", {"b": 1}, "c", {"b": 2}]) == [ + "a", + {"b": 1}, + "c", + {"b": 2}, + ] + + +def test_aggregate_generated_schemas(): + param = AggregateGeneratedSchemasParam( + target_schema={"a": 1, "b": 2}, + generated_schemas={"schema1": {"b": 3, "c": 4}, "schema2": {"b": 5, "d": 6}}, + mode=AggregateGeneratedSchemasParamMode.ROOT_LEVEL, + ) + result = aggregate_generated_schemas(param) + assert result.aggregated_schema == {"a": 1, "b": 5, "c": 4, "d": 6} + + param = AggregateGeneratedSchemasParam( + target_schema={"a": 1, "b": 2}, + generated_schemas={"schema1": {"b": 3, "c": 4}, "schema2": {"b": 5, "d": 6}}, + mode=AggregateGeneratedSchemasParamMode.DEFINITIONS_SECTION, + ) + result = aggregate_generated_schemas(param) + assert result.aggregated_schema == { + "a": 1, + "b": 2, + "$defs": { + "generated": { + "$comment": "Autogenerated section - do not edit. " + "Generated from schema1 schema2", + "b": 5, + "c": 4, + "d": 6, + } + }, + "allOf": [{"$ref": "#/$defs/generated"}], + } + + param = AggregateGeneratedSchemasParam( + target_schema={"a": 1, "b": 2}, + generated_schemas={"schema1": {"b": 3, "c": 4}, "schema2": {"b": 5, "d": 6}}, + mode=AggregateGeneratedSchemasParamMode.DEFINITIONS_SECTION, + generate_root_ref=True, + ) + result = aggregate_generated_schemas(param) + assert result.aggregated_schema == { + "a": 1, + "b": 2, + "$defs": { + "generated": { + "$comment": "Autogenerated section - do not edit. " + "Generated from schema1 schema2", + "b": 5, + "c": 4, + "d": 6, + } + }, + "$ref": "#/$defs/generated", + } From a336191e1268a1a984d5a1c5c12d691aa73bdee3 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Tue, 10 Dec 2024 10:06:49 +0100 Subject: [PATCH 5/9] feat: add join helper --- src/osw/utils/templates.py | 44 +++++++++++++++++++++++++++ tests/utils/templates_test.py | 56 +++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/src/osw/utils/templates.py b/src/osw/utils/templates.py index b610b492..fcbfe26c 100644 --- a/src/osw/utils/templates.py +++ b/src/osw/utils/templates.py @@ -19,6 +19,43 @@ def compile_handlebars_template(template): return compiled_template +# Python implementations of handlebars helpers +# https://github.com/OpenSemanticLab/mediawiki-extensions-MwJson/blob/main/modules/ext.MwJson.editor/MwJson_editor.js#L1342 + + +def helper_join(this, options, context, separator=None, intro=None, outro=None): + """ + removes all empty interation results and delimits them with + the given separator (default: ", ") + {{#join literal_array }}{{.}}{{/join}} + {{#join object_array ", " "[" "]"}}{{#if print}}{{value}}{{/if}}{{/join}} + """ + + # handle optional params + if intro is None: + intro = "" + if outro is None: + outro = "" + if separator is None: + separator = ", " + + if context is None: + context = [] + items = [] + + for e in context: + inner = "".join(options["fn"](e)) + items.append(inner) + + # Remove empty or whitespace-only elements + items = [item for item in items if item.strip() != ""] + if len(items) == 0: + intro = outro = "" + + # Join with separator, wrap with intro + outro + return intro + separator.join(items) + outro + + def eval_compiled_handlebars_template( compiled_template, data, helpers=None, partials=None, add_self_as_partial=True ): @@ -41,12 +78,19 @@ def eval_compiled_handlebars_template( ------- the evaluated template as a string """ + + default_helpers = { + "join": helper_join, + } if helpers is None: helpers = {} + helpers = {**default_helpers, **helpers} + if partials is None: partials = {} if add_self_as_partial: partials["self"] = compiled_template + return compiled_template(data, helpers=helpers, partials=partials) diff --git a/tests/utils/templates_test.py b/tests/utils/templates_test.py index 26fa870c..7bb6228f 100644 --- a/tests/utils/templates_test.py +++ b/tests/utils/templates_test.py @@ -271,3 +271,59 @@ def test_metamodel_template(): output = json.loads(eval_handlebars_template(template, data)) assert output == json.loads(expected) + + +def test_helper_join(): + template = """{ + "@context": [{ + {{#join properties}} + {{#if rdf_property}}"{{name}}": "{{rdf_property}}"{{/if}} + {{/join}} + }] + }""" + + data = { + "properties": [ + { + "name": "test_property", + "rdf_property": "Property:TestPropertyWithSchema", + }, + { + "name": "test_property2", + "rdf_property": "Property:TestPropertyWithSchema", + }, + ] + } + + expected = { + "@context": [ + { + "test_property": "Property:TestPropertyWithSchema", + "test_property2": "Property:TestPropertyWithSchema", + } + ] + } + + output = json.loads(eval_handlebars_template(template, data)) + + assert output == expected + + template = """ + {{#join object_array ", " "[" "]"}}{{#if print}}{{value}}{{/if}}{{/join}} + """ + + data = { + "object_array": [ + {"value": 1, "print": True}, + {"value": 2}, + {"value": 3, "print": True}, + ] + } + + expected = [1, 3] + + output = json.loads(eval_handlebars_template(template, data)) + assert output == expected + + +test_helper_join() From 8f7789fbcaa4da18f90f1ec6fe5852f5debbc2cf Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Tue, 10 Dec 2024 10:06:49 +0100 Subject: [PATCH 6/9] feat: apply schema templates recursive --- src/osw/core.py | 107 ++++++++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 44 deletions(-) diff --git a/src/osw/core.py b/src/osw/core.py index 0e492c58..d16da935 100644 --- a/src/osw/core.py +++ b/src/osw/core.py @@ -22,6 +22,11 @@ import osw.model.entity as model from osw.model.static import OswBaseModel +from osw.utils.oold import ( + AggregateGeneratedSchemasParam, + AggregateGeneratedSchemasParamMode, + aggregate_generated_schemas, +) from osw.utils.templates import ( compile_handlebars_template, eval_compiled_handlebars_template, @@ -825,8 +830,6 @@ class _ApplyOverwriteParam(OswBaseModel): entity: OswBaseModel # actually model.Entity but this causes the "type" error policy: Union[OSW.OverwriteClassParam, OVERWRITE_CLASS_OPTIONS] namespace: Optional[str] - meta_category_title: Optional[str] - meta_category_template_str: Optional[str] remove_empty: Optional[bool] = True inplace: Optional[bool] = False debug: Optional[bool] = False @@ -1035,7 +1038,7 @@ class StoreEntityParam(OswBaseModel): """ID to document the change. Entities within the same store_entity() call will share the same change_id. This parameter can also be used to link multiple store_entity() calls.""" - meta_category_title: Optional[str] = "Category:Category" + meta_category_title: Optional[Union[str, List[str]]] = "Category:Category" debug: Optional[bool] = False _overwrite_per_class: Dict[str, Dict[str, OSW.OverwriteClassParam]] = ( PrivateAttr() @@ -1108,25 +1111,35 @@ def store_entity( max_index = len(param.entities) - meta_category = self.site.get_page( - WtSite.GetPageParam(titles=[param.meta_category_title]) - ).pages[0] - # ToDo: we have to do this iteratively to support meta categories inheritance - meta_category_template_str = meta_category.get_slot_content("schema_template") - meta_category_template = None + meta_category_templates = {} if param.namespace == "Category": - if param.meta_category_title: - meta_category = self.site.get_page( - WtSite.GetPageParam(titles=[param.meta_category_title]) - ).pages[0] - meta_category_template_str = meta_category.get_slot_content( - "schema_template" - ) - if meta_category_template_str: - meta_category_template = compile_handlebars_template( - meta_category_template_str + meta_category_titles = param.meta_category_title + if not isinstance(meta_category_titles, list): + meta_category_titles = [meta_category_titles] + meta_category_template_strs = {} + # We have to do this iteratively to support meta categories inheritance + while meta_category_titles is not None and len(meta_category_titles) > 0: + meta_categories = self.site.get_page( + WtSite.GetPageParam(titles=meta_category_titles) + ).pages + for meta_category in meta_categories: + meta_category_template_strs[meta_category.title] = ( + meta_category.get_slot_content("schema_template") + ) + + meta_category_titles = meta_category.get_slot_content("jsondata").get( + "subclass_of" ) + for title in meta_category_template_strs.keys(): + meta_category_template_str = meta_category_template_strs[title] + if meta_category_template_str: + meta_category_templates[title] = compile_handlebars_template( + meta_category_template_str + ) + # inverse order to have the most generic template first + meta_category_templates = dict(reversed(meta_category_templates.items())) + def store_entity_( entity_: model.Entity, namespace_: str = None, @@ -1148,41 +1161,47 @@ def store_entity_( entity=entity_, namespace=namespace_, policy=overwrite_class_param, - meta_category_template_str=meta_category_template_str, remove_empty=param.remove_empty, debug=param.debug, ) ) - if meta_category_template: + if len(meta_category_templates.keys()) > 0: + generated_schemas = {} try: jsondata = page.get_slot_content("jsondata") if param.remove_empty: remove_empty(jsondata) - schema_str = eval_compiled_handlebars_template( - meta_category_template, - jsondata, - { - "_page_title": entity_title, # Legacy - "_current_subject_": entity_title, - }, - ) - schema = json.loads(schema_str) - # Put generated schema in definitions section, - # currently only enabled for Characteristics - if hasattr(model, "CharacteristicType") and isinstance( - entity_, model.CharacteristicType - ): - new_schema = { - "$defs": {"generated": schema}, - "allOf": [{"$ref": "#/$defs/generated"}], - "@context": schema.pop("@context", None), - "title": schema.pop("title", ""), - } - schema["title"] = "Generated" + new_schema["title"] - schema = new_schema - page.set_slot_content("jsonschema", new_schema) + + for key in meta_category_templates: + meta_category_template = meta_category_templates[key] + schema_str = eval_compiled_handlebars_template( + meta_category_template, + jsondata, + { + "_page_title": entity_title, # Legacy + "_current_subject_": entity_title, + }, + ) + generated_schemas[key] = json.loads(schema_str) except Exception as e: print(f"Schema generation from template failed for {entity_}: {e}") + + mode = AggregateGeneratedSchemasParamMode.ROOT_LEVEL + # Put generated schema in definitions section, + # currently only enabled for Characteristics + if hasattr(model, "CharacteristicType") and isinstance( + entity_, model.CharacteristicType + ): + mode = AggregateGeneratedSchemasParamMode.DEFINITIONS_SECTION + + new_schema = aggregate_generated_schemas( + AggregateGeneratedSchemasParam( + schema=page.get_slot_content("jsonschema"), + generated_schemas=generated_schemas, + mode=mode, + ) + ).aggregated_schema + page.set_slot_content("jsonschema", new_schema) page.edit() # will set page.changed if the content of the page has changed if page.changed: if index is None: From cb5bc2fb4dc3c69a9334b085893532d7b2616b69 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Thu, 12 Dec 2024 05:58:26 +0100 Subject: [PATCH 7/9] fix: replace StrEnum with str, Enum StrEnum is only available in python >= 3.11 --- src/osw/utils/oold.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/osw/utils/oold.py b/src/osw/utils/oold.py index 2cdc8149..9195e289 100644 --- a/src/osw/utils/oold.py +++ b/src/osw/utils/oold.py @@ -4,7 +4,7 @@ """ from copy import deepcopy -from enum import StrEnum +from enum import Enum from typing import Dict, Optional, TypeVar from pydantic import BaseModel @@ -246,7 +246,7 @@ def merge_jsonld_context_object_list(context: list) -> list: return result -class AggregateGeneratedSchemasParamMode(StrEnum): +class AggregateGeneratedSchemasParamMode(str, Enum): ROOT_LEVEL = "root_level" """ The generated schema is merged at the root level """ DEFINITIONS_SECTION = "definitions_section" From f9beb14cb2f735ab7290e7dbb40d45fccbc8c934 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sun, 15 Dec 2024 12:20:19 +0100 Subject: [PATCH 8/9] fix: handle double-quote and inline escape in template processing --- src/osw/core.py | 3 ++- src/osw/utils/oold.py | 32 +++++++++++++++++++++++ src/osw/utils/templates.py | 9 +++++++ tests/utils/oold.py | 6 +++++ tests/utils/templates_test.py | 48 +++++++++++++++++++++++++++++++---- 5 files changed, 92 insertions(+), 6 deletions(-) diff --git a/src/osw/core.py b/src/osw/core.py index d16da935..11180100 100644 --- a/src/osw/core.py +++ b/src/osw/core.py @@ -26,6 +26,7 @@ AggregateGeneratedSchemasParam, AggregateGeneratedSchemasParamMode, aggregate_generated_schemas, + escape_double_quotes, ) from osw.utils.templates import ( compile_handlebars_template, @@ -1176,7 +1177,7 @@ def store_entity_( meta_category_template = meta_category_templates[key] schema_str = eval_compiled_handlebars_template( meta_category_template, - jsondata, + escape_double_quotes(jsondata), { "_page_title": entity_title, # Legacy "_current_subject_": entity_title, diff --git a/src/osw/utils/oold.py b/src/osw/utils/oold.py index 9195e289..38d5a5b6 100644 --- a/src/osw/utils/oold.py +++ b/src/osw/utils/oold.py @@ -396,3 +396,35 @@ def aggregate_generated_schemas( schema["description"] = generated_schema["description"] return AggregateGeneratedSchemasResult(aggregated_schema=schema) + + +def escape_double_quotes(obj: JsonType) -> JsonType: + """replace double quotes `"` with escaped double quotes `\"` in strings. + If the object is a string, the escaped string is returned. + If the object is a list, the function is called recursively for each element. + If the object is a dictionary, the function is called recursively for each value. + Else the object is returned as is. + + Parameters + ---------- + obj + the object to handle + + Returns + ------- + returns the object with double quotes escaped if applicable + """ + if isinstance(obj, str): + # Escape double quotes in string + return obj.replace('"', '\\"') + elif isinstance(obj, list): + # Iterate over array elements + return [escape_double_quotes(item) for item in obj] + elif isinstance(obj, dict): + # Iterate over object properties + escaped_obj = {} + for key, value in obj.items(): + escaped_obj[key] = escape_double_quotes(value) + return escaped_obj + # Return the value as is for non-string, non-object types + return obj diff --git a/src/osw/utils/templates.py b/src/osw/utils/templates.py index fcbfe26c..a201c501 100644 --- a/src/osw/utils/templates.py +++ b/src/osw/utils/templates.py @@ -1,3 +1,5 @@ +import re + from pybars import Compiler @@ -15,6 +17,13 @@ def compile_handlebars_template(template): the compiled template """ compiler = Compiler() + # pybars does not support inline escaping, so we have to wrap the raw block + # e.g. \{{escaped}} => {{{{raw}}}}{{escaped}}{{{{/raw}}}} + # this workaround does not support expressions withing the escaped block, + # e.g. \{{escaped {{some_var}} }} will not work + # see https://handlebarsjs.com/guide/expressions.html#escaping-handlebars-expressions + # see https://github.com/wbond/pybars3/pull/47 + template = re.sub(r"\\\{\{([^}]+)\}\}", r"{{{{raw}}}}{{\1}}{{{{/raw}}}}", template) compiled_template = compiler.compile(template) return compiled_template diff --git a/tests/utils/oold.py b/tests/utils/oold.py index 84b5069f..cc9acd8d 100644 --- a/tests/utils/oold.py +++ b/tests/utils/oold.py @@ -3,6 +3,7 @@ AggregateGeneratedSchemasParamMode, aggregate_generated_schemas, deep_equal, + escape_double_quotes, merge_deep, merge_jsonld_context_object_list, unique_array, @@ -133,3 +134,8 @@ def test_aggregate_generated_schemas(): }, "$ref": "#/$defs/generated", } + + +def test_escape_double_quotes(): + data = 'test "string"' + assert escape_double_quotes(data) == r"test \"string\"" diff --git a/tests/utils/templates_test.py b/tests/utils/templates_test.py index 7bb6228f..cea463d7 100644 --- a/tests/utils/templates_test.py +++ b/tests/utils/templates_test.py @@ -1,6 +1,7 @@ # flake8: noqa: E501 import json +from osw.utils.oold import escape_double_quotes from osw.utils.templates import eval_handlebars_template @@ -49,7 +50,7 @@ def test_category_template(): "metaclass": ["Category:OSW725a3cf5458f4daea86615fcbd0029f8"], "description": [ { - "text": "Represents the set of all individuals. In the DL literature this is often called the top concept.", + "text": 'Represents the set of all "individuals". In the DL literature this is often called the top concept.', "lang": "en", } ], @@ -72,9 +73,9 @@ def test_category_template(): "title*": { "en": "OwlThing" }, - "description": "Represents the set of all individuals. In the DL literature this is often called the top concept.", + "description": "Represents the set of all \\"individuals\\". In the DL literature this is often called the top concept.", "description*": { - "en": "Represents the set of all individuals. In the DL literature this is often called the top concept." + "en": "Represents the set of all \\"individuals\\". In the DL literature this is often called the top concept." }, "required": [ "type" @@ -92,7 +93,7 @@ def test_category_template(): output = json.loads( eval_handlebars_template( template, - data, + escape_double_quotes(data), {"_page_title": "Category:OSW379d5a1589c74c82bc0de47938264d00"}, ) ) @@ -326,4 +327,41 @@ def test_helper_join(): assert output == expected -test_helper_join() +def test_raw_block(): + template = """ +{ + "unit": { + "format": "autocomplete", + "options": { + "autocomplete": { + "query": "[[-HasUnit::{{{quantity}}}]][[HasSymbol::like:*\{{_user_input}}*]]OR[[-HasPrefixUnit.-HasUnit::{{{quantity}}}]][[HasSymbol::like:*\{{_user_input}}*]]|?HasSymbol=label" + } + } + } +}""" + + data = { + "quantity": "Item:OSW1bd92826da6f5c53982ed6ea45bc1b9b", + } + + expected = """ +{ + "unit": { + "format": "autocomplete", + "options": { + "autocomplete": { + "query": "[[-HasUnit::Item:OSW1bd92826da6f5c53982ed6ea45bc1b9b]][[HasSymbol::like:*{{_user_input}}*]]OR[[-HasPrefixUnit.-HasUnit::Item:OSW1bd92826da6f5c53982ed6ea45bc1b9b]][[HasSymbol::like:*{{_user_input}}*]]|?HasSymbol=label" + } + } + } +} +""" + res = eval_handlebars_template( + template, + data, + {"_page_title": "Category:OSW379d5a1589c74c82bc0de47938264d00"}, + ) + + output = json.loads(res) + + assert output == json.loads(expected) From 034ec7613c5df88813b1d68218b8a75f8a535e3c Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sun, 15 Dec 2024 15:45:01 +0100 Subject: [PATCH 9/9] fix: escape also non-standard escape sequences --- src/osw/core.py | 4 ++-- src/osw/utils/oold.py | 13 ++++++++----- tests/utils/oold.py | 28 ++++++++++++++++++++++++---- tests/utils/templates_test.py | 4 ++-- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/src/osw/core.py b/src/osw/core.py index 11180100..7101436f 100644 --- a/src/osw/core.py +++ b/src/osw/core.py @@ -26,7 +26,7 @@ AggregateGeneratedSchemasParam, AggregateGeneratedSchemasParamMode, aggregate_generated_schemas, - escape_double_quotes, + escape_json_strings, ) from osw.utils.templates import ( compile_handlebars_template, @@ -1177,7 +1177,7 @@ def store_entity_( meta_category_template = meta_category_templates[key] schema_str = eval_compiled_handlebars_template( meta_category_template, - escape_double_quotes(jsondata), + escape_json_strings(jsondata), { "_page_title": entity_title, # Legacy "_current_subject_": entity_title, diff --git a/src/osw/utils/oold.py b/src/osw/utils/oold.py index 38d5a5b6..a664de2f 100644 --- a/src/osw/utils/oold.py +++ b/src/osw/utils/oold.py @@ -3,6 +3,7 @@ https://github.com/OpenSemanticLab/mediawiki-extensions-MwJson/blob/main/modules/ext.MwJson.util/MwJson_util.js """ +import json from copy import deepcopy from enum import Enum from typing import Dict, Optional, TypeVar @@ -398,8 +399,9 @@ def aggregate_generated_schemas( return AggregateGeneratedSchemasResult(aggregated_schema=schema) -def escape_double_quotes(obj: JsonType) -> JsonType: - """replace double quotes `"` with escaped double quotes `\"` in strings. +def escape_json_strings(obj: JsonType) -> JsonType: + """replace double quotes `"` with escaped double quotes `\"` in + and non-standard escape-squences in strings. If the object is a string, the escaped string is returned. If the object is a list, the function is called recursively for each element. If the object is a dictionary, the function is called recursively for each value. @@ -416,15 +418,16 @@ def escape_double_quotes(obj: JsonType) -> JsonType: """ if isinstance(obj, str): # Escape double quotes in string - return obj.replace('"', '\\"') + # Replace invalid backslashes outside of math environments + return json.dumps(obj)[1:-1] elif isinstance(obj, list): # Iterate over array elements - return [escape_double_quotes(item) for item in obj] + return [escape_json_strings(item) for item in obj] elif isinstance(obj, dict): # Iterate over object properties escaped_obj = {} for key, value in obj.items(): - escaped_obj[key] = escape_double_quotes(value) + escaped_obj[key] = escape_json_strings(value) return escaped_obj # Return the value as is for non-string, non-object types return obj diff --git a/tests/utils/oold.py b/tests/utils/oold.py index cc9acd8d..1b081b45 100644 --- a/tests/utils/oold.py +++ b/tests/utils/oold.py @@ -1,9 +1,11 @@ +import json + from osw.utils.oold import ( AggregateGeneratedSchemasParam, AggregateGeneratedSchemasParamMode, aggregate_generated_schemas, deep_equal, - escape_double_quotes, + escape_json_strings, merge_deep, merge_jsonld_context_object_list, unique_array, @@ -136,6 +138,24 @@ def test_aggregate_generated_schemas(): } -def test_escape_double_quotes(): - data = 'test "string"' - assert escape_double_quotes(data) == r"test \"string\"" +def test_escape_json_strings(): + org_data = { + "test": "normal string", + "test2": 'string with "double quotes"', + "test3": r"latex command $\textit{impedance}$", + "test4": r"math $$M \cdot L^2 \cdot T^{-2} \cdot N^{-1}$$", + } + data = escape_json_strings(org_data) + + data_string = "{" + # serialize manually to json as a handlebars template would do + for key, value in data.items(): + data_string += f'\n "{key}": "{value}"' + if key != list(data.keys())[-1]: + data_string += ", " + data_string += "\n}" + data = data_string + + result = json.loads(data) + + assert result == org_data diff --git a/tests/utils/templates_test.py b/tests/utils/templates_test.py index cea463d7..59561a82 100644 --- a/tests/utils/templates_test.py +++ b/tests/utils/templates_test.py @@ -1,7 +1,7 @@ # flake8: noqa: E501 import json -from osw.utils.oold import escape_double_quotes +from osw.utils.oold import escape_json_strings from osw.utils.templates import eval_handlebars_template @@ -93,7 +93,7 @@ def test_category_template(): output = json.loads( eval_handlebars_template( template, - escape_double_quotes(data), + escape_json_strings(data), {"_page_title": "Category:OSW379d5a1589c74c82bc0de47938264d00"}, ) )