From 8944a3248ecff25f21223b11d327d19504912ea3 Mon Sep 17 00:00:00 2001 From: Jamie McCusker Date: Tue, 23 Dec 2025 15:58:44 -0500 Subject: [PATCH 1/9] Added initial support for neptune --- setup.py | 3 +- whyis/config/default.py | 2 +- whyis/default_vocab.ttl | 16 +-- .../__init__.py | 0 .../plugin.py | 14 +- .../fuseki}/templates/search.json | 0 whyis/plugins/fuseki/vocab.ttl | 3 + whyis/plugins/neptune/__init__.py | 1 + whyis/plugins/neptune/plugin.py | 122 ++++++++++++++++++ whyis/plugins/neptune/templates/search.json | 22 ++++ whyis/plugins/neptune/vocab.ttl | 3 + 11 files changed, 165 insertions(+), 21 deletions(-) rename whyis/plugins/{sparql_entity_resolver => fuseki}/__init__.py (100%) rename whyis/plugins/{sparql_entity_resolver => fuseki}/plugin.py (87%) rename whyis/{ => plugins/fuseki}/templates/search.json (100%) create mode 100644 whyis/plugins/fuseki/vocab.ttl create mode 100644 whyis/plugins/neptune/__init__.py create mode 100644 whyis/plugins/neptune/plugin.py create mode 100644 whyis/plugins/neptune/templates/search.json create mode 100644 whyis/plugins/neptune/vocab.ttl diff --git a/setup.py b/setup.py index 95c8837ee..15ce105b8 100644 --- a/setup.py +++ b/setup.py @@ -231,7 +231,8 @@ def run(self): 'text/turtle = rdflib.plugins.sparql.results.graph:GraphResultParser' ], 'whyis': [ - 'whyis_sparql_entity_resolver = whyis.plugins.sparql_entity_resolver:SPARQLEntityResolverPlugin', + 'whyis_fuseki = whyis.plugins.fuseki:FusekiSearchPlugin', + 'whyis_neptune = whyis.plugins.neptune:NeptuneSearchPlugin', 'whyis_knowledge_explorer = whyis.plugins.knowledge_explorer:KnowledgeExplorerPlugin' ] }, diff --git a/whyis/config/default.py b/whyis/config/default.py index 285c582c5..1f4b52577 100644 --- a/whyis/config/default.py +++ b/whyis/config/default.py @@ -87,7 +87,7 @@ class Config: MULTIUSER = True PLUGINENGINE_NAMESPACE = "whyis" - PLUGINENGINE_PLUGINS = ['whyis_sparql_entity_resolver'] + PLUGINENGINE_PLUGINS = ['whyis_fuseki'] SECURITY_EMAIL_SENDER = "Name " SECURITY_FLASH_MESSAGES = True diff --git a/whyis/default_vocab.ttl b/whyis/default_vocab.ttl index 67687ca9e..5386a3d9a 100644 --- a/whyis/default_vocab.ttl +++ b/whyis/default_vocab.ttl @@ -481,22 +481,8 @@ np:Nanopublication a owl:Class; whyis:hasDescribe "nanopub_describe.json"; whyis:hasView "nanopublication_view.html". -# a whyis:searchView. -# whyis:searchView whyis:hasView "search.html". - -# a whyis:searchView. - -# whyis:searchView whyis:hasView "search-view.html". - - a whyis:searchApi . - -whyis:searchApi whyis:hasView "search-api.json". - - a whyis:search . - -whyis:HomePage whyis:searchView "search.html"; - whyis:searchData "search.json". +whyis:HomePage whyis:searchView "search.html". whyis:searchView rdfs:subPropertyOf whyis:hasView; dc:identifier "search". diff --git a/whyis/plugins/sparql_entity_resolver/__init__.py b/whyis/plugins/fuseki/__init__.py similarity index 100% rename from whyis/plugins/sparql_entity_resolver/__init__.py rename to whyis/plugins/fuseki/__init__.py diff --git a/whyis/plugins/sparql_entity_resolver/plugin.py b/whyis/plugins/fuseki/plugin.py similarity index 87% rename from whyis/plugins/sparql_entity_resolver/plugin.py rename to whyis/plugins/fuseki/plugin.py index e2659a3a0..1bedd779a 100644 --- a/whyis/plugins/sparql_entity_resolver/plugin.py +++ b/whyis/plugins/fuseki/plugin.py @@ -1,6 +1,7 @@ from whyis.plugin import Plugin, EntityResolverListener import rdflib from flask import current_app +from flask_pluginengine import PluginBlueprint, current_plugin prefixes = dict( @@ -14,7 +15,7 @@ dc = rdflib.URIRef("http://purl.org/dc/terms/") ) -class SPARQLEntityResolver(EntityResolverListener): +class FusekiEntityResolver(EntityResolverListener): context_query=""" optional { @@ -69,6 +70,7 @@ def __init__(self, database="knowledge"): self.database = database def on_resolve(self, term, type=None, context=None, label=True): + print(f'Searching {self.database} for {term}') graph = current_app.databases[self.database] context_query = '' if context is not None: @@ -93,14 +95,18 @@ def on_resolve(self, term, type=None, context=None, label=True): results.append(result) return results +plugin_blueprint = PluginBlueprint('fuseki', __name__) -class SPARQLEntityResolverPlugin(Plugin): +class FusekiSearchPlugin(Plugin): resolvers = { - "sparql" : SPARQLEntityResolver, - "fuseki" : SPARQLEntityResolver + "sparql" : FusekiEntityResolver, + "fuseki" : FusekiEntityResolver } + def create_blueprint(self): + return plugin_blueprint + def init(self): resolver_type = self.app.config.get('RESOLVER_TYPE', 'fuseki') resolver_db = self.app.config.get('RESOLVER_DB', "knowledge") diff --git a/whyis/templates/search.json b/whyis/plugins/fuseki/templates/search.json similarity index 100% rename from whyis/templates/search.json rename to whyis/plugins/fuseki/templates/search.json diff --git a/whyis/plugins/fuseki/vocab.ttl b/whyis/plugins/fuseki/vocab.ttl new file mode 100644 index 000000000..b02684bf5 --- /dev/null +++ b/whyis/plugins/fuseki/vocab.ttl @@ -0,0 +1,3 @@ +@prefix whyis: . + +whyis:HomePage whyis:searchData "whyis_fuseki:search.json". diff --git a/whyis/plugins/neptune/__init__.py b/whyis/plugins/neptune/__init__.py new file mode 100644 index 000000000..48aad58ec --- /dev/null +++ b/whyis/plugins/neptune/__init__.py @@ -0,0 +1 @@ +from .plugin import * diff --git a/whyis/plugins/neptune/plugin.py b/whyis/plugins/neptune/plugin.py new file mode 100644 index 000000000..2b5edbb6d --- /dev/null +++ b/whyis/plugins/neptune/plugin.py @@ -0,0 +1,122 @@ +from whyis.plugin import Plugin, EntityResolverListener +import rdflib +from flask import current_app +from flask_pluginengine import PluginBlueprint, current_plugin + + +prefixes = dict( + skos = rdflib.URIRef("http://www.w3.org/2004/02/skos/core#"), + foaf = rdflib.URIRef("http://xmlns.com/foaf/0.1/"), + text = rdflib.URIRef("http://jena.apache.org/fulltext#"), + schema = rdflib.URIRef("http://schema.org/"), + owl = rdflib.OWL, + rdfs = rdflib.RDFS, + rdf = rdflib.RDF, + dc = rdflib.URIRef("http://purl.org/dc/terms/"), + fts = rdflib.URIRef('http://aws.amazon.com/neptune/vocab/v01/services/fts#') +) + +class NeptuneEntityResolver(EntityResolverListener): + + context_query=""" + optional { + (?context ?cr) text:search ('''%s''' 100 0.4). + ?node ?p ?context. + } +""" + type_query = """ +?node rdf:type <%s> . +""" + + query = """ +select distinct +?node +?label +(group_concat(distinct ?type; separator="||") as ?types) +(0.9 as ?score) +where { + SERVICE fts:search { + fts:config neptune-fts:query '''%s''' . + fts:config neptune-fts:endpoint '%s' . + fts:config neptune-fts:queryType 'match' . + fts:config neptune-fts:field dc:title . + fts:config neptune-fts:field rdfs:label . + fts:config neptune-fts:field skos:prefLabel . + fts:config neptune-fts:field skos:altLabel . + fts:config neptune-fts:field foaf:name . + fts:config neptune-fts:field dc:identifier . + fts:config neptune-fts:field schema:name . + fts:config neptune-fts:field skos:notation . + fts:config neptune-fts:return ?node . + } + + optional { + ?node rdf:type ?type. + } + + %s + + filter not exists { + ?node a + } + filter not exists { + ?node a + } + filter not exists { + ?node a + } + filter not exists { + ?node a + } + filter not exists { + ?node a + } +} group by ?node ?label limit 10""" + + def __init__(self, database="knowledge"): + self.database = database + + def on_resolve(self, term, type=None, context=None, label=True): + print(f'Searching {self.database} for {term}') + graph = current_app.databases[self.database] + fts_endpoint = current_app.config['neptune_fts_endpoint'] + #context_query = '' + #if context is not None: + # context_query = self.context_query % context + + type_query = '' + if type is not None: + type_query = self.type_query% type + + query = self.query % (term, fts_endpoint, type_query) + #print(query) + results = [] + for hit in graph.query(query, initNs=prefixes): + result = hit.asdict() + result['types'] = [{'uri':x} for x in result.get('types','').split('||')] + if label: + current_app.labelize(result,'node','preflabel') + result['types'] = [ + current_app.labelize(x,'uri','label') + for x in result['types'] + ] + results.append(result) + return results + +plugin_blueprint = PluginBlueprint('neptune', __name__) + +class NeptuneSearchPlugin(Plugin): + + resolvers = { + "neptune" : NeptuneEntityResolver + } + + def create_blueprint(self): + return plugin_blueprint + + def init(self): + NS.fts = rdflib.Namespace('http://aws.amazon.com/neptune/vocab/v01/services/fts#') + resolver_type = self.app.config.get('RESOLVER_TYPE', 'neptune') + resolver_db = self.app.config.get('RESOLVER_DB', "knowledge") + resolver = self.resolvers[resolver_type](resolver_db) + self.app.add_listener(resolver) diff --git a/whyis/plugins/neptune/templates/search.json b/whyis/plugins/neptune/templates/search.json new file mode 100644 index 000000000..6cc3413f9 --- /dev/null +++ b/whyis/plugins/neptune/templates/search.json @@ -0,0 +1,22 @@ +{{""" + SELECT ?identifier (sample(?d) as ?description) (0.9 as ?score) + WHERE { + + SERVICE fts:search { + fts:config neptune-fts:query ?query . + fts:config neptune-fts:endpoint ?endpoint . + fts:config neptune-fts:queryType 'match' . + fts:config neptune-fts:field '*' . + fts:config neptune-fts:return ?identifier . + } + + (?o ?s) text:search ?query . + filter(lang(?d) = "" || langMatches(lang(?o), "en")) + ?identifier ?p ?o . + filter(!isBlank(?identifier)) + OPTIONAL { + ?identifier dc:description|skos:definition|rdfs:comment|sioc:content|dc:abstract|dc:summary|rdfs:comment|dcelements:description||prov:value|sio:hasValue| ?d. + filter(lang(?d) = "" || langMatches(lang(?d), "en")) + } + } group by ?identifier + LIMIT 1000""" | query(values={"query":rdflib.Literal(args['query'])}) | iter_labelize("identifier","label") | tojson }} diff --git a/whyis/plugins/neptune/vocab.ttl b/whyis/plugins/neptune/vocab.ttl new file mode 100644 index 000000000..13664b8b1 --- /dev/null +++ b/whyis/plugins/neptune/vocab.ttl @@ -0,0 +1,3 @@ +@prefix whyis: . + +whyis:HomePage whyis:searchData "whyis_neptune:search.json". From 4f357e16c50d8cfcc541e81491731c00aeaac325 Mon Sep 17 00:00:00 2001 From: Jamie McCusker Date: Fri, 2 Jan 2026 09:31:19 -0500 Subject: [PATCH 2/9] Initial neptune implementation --- script/build | 2 +- setup.py | 3 ++- whyis/_version.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/script/build b/script/build index c8dfbd981..8fd42f526 100755 --- a/script/build +++ b/script/build @@ -9,4 +9,4 @@ echo ${VERSION} python setup.py build python setup.py sdist -docker build . --build-arg __version__=${VERSION} -t tetherlessworld/whyis:latest -t tetherlessworld/whyis:${VERSION} +docker build . --build-arg __version__=${VERSION} -t tetherlessworld/whyis:${VERSION} # -t tetherlessworld/whyis:latest diff --git a/setup.py b/setup.py index 15ce105b8..45ea47f3c 100644 --- a/setup.py +++ b/setup.py @@ -237,7 +237,8 @@ def run(self): ] }, classifiers=[ - "Development Status :: 5 - Production/Stable", +# "Development Status :: 5 - Production/Stable", + "Development Status :: 4 - Beta", "Framework :: Flask", "Environment :: Web Environment", "Topic :: Internet :: WWW/HTTP :: WSGI :: Middleware", diff --git a/whyis/_version.py b/whyis/_version.py index dfc4ef9d0..8d6a26db9 100644 --- a/whyis/_version.py +++ b/whyis/_version.py @@ -1,4 +1,4 @@ -__version__='2.3.20' +__version__='2.4.0b1' if __name__ == '__main__': print(__version__) From 2e3927debd0e29fb4907d56c97b7095295761ed3 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Sun, 4 Jan 2026 14:55:18 -0500 Subject: [PATCH 3/9] Add Neptune plugin database driver with AWS IAM authentication and CloudFormation template (#344) * Initial plan * Add Neptune plugin IAM authentication support with database driver Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Address code review comments: add logging, imports, and documentation Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Add Neptune IAM authentication documentation to ReadTheDocs Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Expand Neptune how-to guide with detailed endpoint configuration Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Fix Neptune entity resolver to correctly use FTS service with proper escaping Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Add CloudFormation template for Neptune serverless setup with FTS Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Refactor Neptune driver to use custom session instead of subclassing Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Upgrade to rdflib 7.1.1 and simplify Neptune driver with custom session Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Use aws_requests_auth and custom_requests parameter for simpler Neptune driver Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Address code review: fix SPARQL injection, reuse session, improve efficiency Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Add test for Neptune driver registration in plugin init Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Remove redundant conditional check in driver registration test Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> --- docs/howto/index.rst | 1 + docs/howto/neptune-iam-auth.rst | 388 ++++++++++++++ setup.py | 5 +- tests/unit/test_neptune_plugin.py | 230 ++++++++ .../CLOUDFORMATION.md | 349 ++++++++++++ .../cloudformation-neptune.json | 505 ++++++++++++++++++ whyis/database/database_utils.py | 22 +- whyis/plugins/neptune/README.md | 200 +++++++ whyis/plugins/neptune/plugin.py | 238 ++++++++- 9 files changed, 1909 insertions(+), 29 deletions(-) create mode 100644 docs/howto/neptune-iam-auth.rst create mode 100644 tests/unit/test_neptune_plugin.py create mode 100644 whyis/config-template/{{cookiecutter.project_slug}}/CLOUDFORMATION.md create mode 100644 whyis/config-template/{{cookiecutter.project_slug}}/cloudformation-neptune.json create mode 100644 whyis/plugins/neptune/README.md diff --git a/docs/howto/index.rst b/docs/howto/index.rst index 3b66e28a9..32c2a5476 100644 --- a/docs/howto/index.rst +++ b/docs/howto/index.rst @@ -9,5 +9,6 @@ This section provides step-by-step guides for specific tasks. :maxdepth: 2 :caption: How-to Guides + neptune-iam-auth sdds \ No newline at end of file diff --git a/docs/howto/neptune-iam-auth.rst b/docs/howto/neptune-iam-auth.rst new file mode 100644 index 000000000..04e6346ba --- /dev/null +++ b/docs/howto/neptune-iam-auth.rst @@ -0,0 +1,388 @@ +.. _neptune-iam-auth: + +Using Neptune with AWS IAM Authentication +========================================== + +This guide explains how to configure your Whyis knowledge graph application to use Amazon Neptune with AWS IAM authentication. + +Overview +-------- + +The Neptune plugin extends Whyis to support AWS IAM authentication for Amazon Neptune databases. It uses AWS SigV4 request signing for all SPARQL operations, including: + +- SPARQL queries (SELECT, ASK, CONSTRUCT, DESCRIBE) +- SPARQL updates (INSERT, DELETE, MODIFY) +- Graph Store Protocol operations (PUT, POST, DELETE) +- Full-text search queries via Neptune FTS + +Prerequisites +------------- + +- A Whyis knowledge graph application (created with ``whyis createapp``) +- Access to an Amazon Neptune database cluster (or see Quick Start below to create one) +- AWS credentials with Neptune access permissions + +Quick Start: Automated Neptune Setup +------------------------------------- + +If you don't have a Neptune cluster yet, your Whyis application includes a CloudFormation template that automatically provisions a complete Neptune environment with Full-Text Search. + +The CloudFormation Template +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Your application's directory contains ``cloudformation-neptune.json``, which creates: + +- **Neptune Serverless Cluster** with IAM authentication enabled +- **OpenSearch Domain** for full-text search capabilities +- **Security Groups** for secure network access +- **IAM Role** with necessary permissions +- **Proper VPC Configuration** for production use + +Using the CloudFormation Template +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. **Prepare parameters** (edit values for your environment): + + .. code-block:: bash + + aws cloudformation create-stack \ + --stack-name my-kgapp-neptune \ + --template-body file://cloudformation-neptune.json \ + --parameters \ + ParameterKey=VPCId,ParameterValue=vpc-xxxxxxxx \ + ParameterKey=PrivateSubnetIds,ParameterValue="subnet-xxx,subnet-yyy" \ + ParameterKey=AllowedCIDR,ParameterValue=10.0.0.0/16 \ + ParameterKey=IAMRoleName,ParameterValue=my-kgapp-neptune-access \ + --capabilities CAPABILITY_NAMED_IAM \ + --region us-east-1 + +2. **Wait for completion** (typically 20-30 minutes): + + .. code-block:: bash + + aws cloudformation wait stack-create-complete \ + --stack-name my-kgapp-neptune \ + --region us-east-1 + +3. **Get configuration values**: + + .. code-block:: bash + + aws cloudformation describe-stacks \ + --stack-name my-kgapp-neptune \ + --region us-east-1 \ + --query 'Stacks[0].Outputs' + + The outputs provide all the values you need for ``whyis.conf`` (see Step 3 below). + +.. note:: + For detailed CloudFormation documentation, see ``CLOUDFORMATION.md`` in your application directory. It includes: + + - Complete parameter descriptions + - AWS Console deployment instructions + - Cost estimates and optimization tips + - Security best practices + - Troubleshooting guide + +Step 1: Enable the Neptune Plugin +---------------------------------- + +Add the Neptune plugin to your application's configuration file (``whyis.conf`` or ``system.conf``): + +.. code-block:: python + + # Enable the Neptune plugin + PLUGINENGINE_PLUGINS = ['neptune'] + + # Or if you already have other plugins enabled: + PLUGINENGINE_PLUGINS = ['neptune', 'other_plugin'] + +Step 2: Install Required Dependencies +-------------------------------------- + +The Neptune plugin requires additional Python packages that are **not** included in core Whyis. + +Add these packages to your application's ``requirements.txt``: + +.. code-block:: text + + aws_requests_auth + +Then install them in your application environment: + +.. code-block:: bash + + pip install -r requirements.txt + +.. note:: + This dependency is only needed when using Neptune with IAM authentication. It is not required for core Whyis functionality or other database backends. + +Step 3: Configure Neptune Connection +------------------------------------- + +Configuring the Knowledge Database Endpoint +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Whyis uses a "knowledge database" to store and query RDF data. To use Neptune as your knowledge database, add the following configuration to your application's ``whyis.conf`` or ``system.conf``: + +.. code-block:: python + + # Configure Neptune as the knowledge database backend + KNOWLEDGE_TYPE = 'neptune' + + # Neptune SPARQL endpoint (required) + # This is the main endpoint for SPARQL queries and updates + KNOWLEDGE_ENDPOINT = 'https://my-cluster.cluster-xxx.us-east-1.neptune.amazonaws.com:8182/sparql' + + # AWS region where your Neptune cluster is located (required for IAM auth) + KNOWLEDGE_REGION = 'us-east-1' + +**Finding Your Neptune Endpoint:** + +1. Log into the AWS Console +2. Navigate to Amazon Neptune +3. Select your Neptune cluster +4. Copy the "Cluster endpoint" from the cluster details +5. Append the port and path: ``https://:8182/sparql`` + +Example: If your cluster endpoint is ``my-cluster.cluster-abc123.us-east-1.neptune.amazonaws.com``, your ``KNOWLEDGE_ENDPOINT`` would be: + +.. code-block:: python + + KNOWLEDGE_ENDPOINT = 'https://my-cluster.cluster-abc123.us-east-1.neptune.amazonaws.com:8182/sparql' + +Configuring Full-Text Search +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Neptune supports full-text search through Amazon OpenSearch Service (formerly Elasticsearch). To enable full-text search queries in your knowledge graph: + +.. code-block:: python + + # Neptune Full-Text Search endpoint (required for FTS queries) + # This is your OpenSearch Service domain endpoint + neptune_fts_endpoint = 'https://search-my-domain.us-east-1.es.amazonaws.com' + +**Finding Your OpenSearch Endpoint:** + +1. Log into the AWS Console +2. Navigate to Amazon OpenSearch Service +3. Select your domain that's integrated with Neptune +4. Copy the "Domain endpoint" from the domain overview +5. Use the HTTPS URL directly (no additional path needed) + +**How Full-Text Search Works:** + +When you execute SPARQL queries with Neptune FTS SERVICE blocks like this: + +.. code-block:: sparql + + PREFIX fts: + + SELECT ?resource ?label WHERE { + SERVICE fts:search { + fts:config neptune-fts:query "search term" . + fts:config neptune-fts:endpoint "https://search-my-domain.us-east-1.es.amazonaws.com" . + fts:config neptune-fts:field rdfs:label . + fts:config neptune-fts:return ?resource . + } + ?resource rdfs:label ?label . + } + +The Neptune plugin automatically passes AWS IAM authentication to both the Neptune SPARQL endpoint and the OpenSearch endpoint, enabling secure full-text search across your knowledge graph. + +Optional Configuration Parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Additional optional parameters for advanced configurations: + +.. code-block:: python + + # Optional: Custom AWS service name for SigV4 signing (defaults to 'neptune-db') + KNOWLEDGE_SERVICE_NAME = 'neptune-db' + + # Optional: Separate Graph Store Protocol endpoint for graph operations + # If not specified, uses KNOWLEDGE_ENDPOINT + KNOWLEDGE_GSP_ENDPOINT = 'https://my-cluster.cluster-xxx.us-east-1.neptune.amazonaws.com:8182/data' + + # Optional: Default graph URI for RDF data + KNOWLEDGE_DEFAULT_GRAPH = 'http://example.org/default-graph' + +Complete Configuration Example +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here's a complete configuration example for your ``whyis.conf`` or ``system.conf``: + +.. code-block:: python + + # Enable Neptune plugin + PLUGINENGINE_PLUGINS = ['neptune'] + + # Neptune as knowledge database + KNOWLEDGE_TYPE = 'neptune' + KNOWLEDGE_ENDPOINT = 'https://my-cluster.cluster-abc123.us-east-1.neptune.amazonaws.com:8182/sparql' + KNOWLEDGE_REGION = 'us-east-1' + + # Full-text search endpoint + neptune_fts_endpoint = 'https://search-my-domain.us-east-1.es.amazonaws.com' + + # Optional: Graph Store Protocol endpoint + KNOWLEDGE_GSP_ENDPOINT = 'https://my-cluster.cluster-abc123.us-east-1.neptune.amazonaws.com:8182/data' + +.. important:: + Replace all endpoint URLs and region names with your actual Neptune cluster and OpenSearch domain endpoints. + +Step 4: Configure AWS Credentials +---------------------------------- + +The Neptune driver uses ``boto3`` for AWS credential management. Credentials can be provided in several ways: + +Environment Variables +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + export AWS_ACCESS_KEY_ID=your_access_key + export AWS_SECRET_ACCESS_KEY=your_secret_key + export AWS_SESSION_TOKEN=your_session_token # Optional, for temporary credentials + +IAM Roles (Recommended for EC2/ECS) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your Whyis application runs on EC2 or ECS, the driver will automatically use the instance or task IAM role. This is the recommended approach as it avoids managing credentials directly. + +AWS Credentials File +~~~~~~~~~~~~~~~~~~~~ + +Create or edit ``~/.aws/credentials``: + +.. code-block:: ini + + [default] + aws_access_key_id = your_access_key + aws_secret_access_key = your_secret_key + +And ``~/.aws/config``: + +.. code-block:: ini + + [default] + region = us-east-1 + +Step 5: Configure IAM Permissions +---------------------------------- + +Ensure your AWS credentials or IAM role have the necessary Neptune permissions. Example IAM policy: + +.. code-block:: json + + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "neptune-db:connect", + "neptune-db:ReadDataViaQuery", + "neptune-db:WriteDataViaQuery" + ], + "Resource": "arn:aws:neptune-db:us-east-1:123456789012:cluster-XXXXX/*" + } + ] + } + +Step 6: Verify the Configuration +--------------------------------- + +Start your Whyis application and verify the Neptune connection: + +.. code-block:: bash + + cd /apps/your-app + ./run + +Check the application logs for successful Neptune driver registration and database connection. + +How It Works +------------ + +Request Signing +~~~~~~~~~~~~~~~ + +All HTTP requests to Neptune are automatically signed with AWS SigV4: + +- The Neptune connector creates a ``requests.Session`` with ``AWS4Auth`` +- AWS credentials are fetched via ``boto3.Session().get_credentials()`` +- Each request includes signed headers for authentication +- Credentials are automatically refreshed when using IAM roles + +Full-Text Search Authentication +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Full-text search queries work seamlessly with authentication: + +.. code-block:: sparql + + PREFIX fts: + PREFIX dc: + + SELECT ?node ?label WHERE { + SERVICE fts:search { + fts:config neptune-fts:query "search term" . + fts:config neptune-fts:endpoint "https://your-fts-endpoint" . + fts:config neptune-fts:field dc:title . + fts:config neptune-fts:return ?node . + } + ?node dc:title ?label . + } + +The Neptune driver ensures AWS credentials are attached to full-text search requests. + +Troubleshooting +--------------- + +Authentication Errors +~~~~~~~~~~~~~~~~~~~~~ + +If you encounter authentication errors: + +1. Verify AWS credentials are properly configured +2. Check IAM policy grants Neptune access (see Step 5) +3. Ensure the region matches your Neptune cluster +4. Verify the Neptune endpoint URL is correct + +Connection Errors +~~~~~~~~~~~~~~~~~ + +If you cannot connect to Neptune: + +1. Check VPC security groups allow access from your application +2. Verify network connectivity to Neptune endpoint +3. Ensure the endpoint URL includes the port (typically 8182) +4. Verify your Neptune cluster is available + +Import Errors +~~~~~~~~~~~~~ + +If you see ``ModuleNotFoundError: No module named 'boto3'`` or similar: + +1. Ensure ``boto3`` and ``requests-aws4auth`` are in your application's ``requirements.txt`` +2. Run ``pip install -r requirements.txt`` in your application environment +3. Restart your application + +Security Considerations +----------------------- + +- **Never commit AWS credentials to source control** +- Use IAM roles when running on AWS infrastructure (EC2, ECS, Lambda) +- Use temporary credentials (STS) when possible +- Always use HTTPS endpoints for Neptune connections +- Restrict IAM policies to minimum required permissions +- Consider using VPC endpoints for Neptune access within AWS + +Additional Resources +-------------------- + +- `AWS Neptune IAM Authentication `_ +- `AWS Neptune Full-Text Search `_ +- `AWS SigV4 Signing `_ +- `boto3 Credentials `_ diff --git a/setup.py b/setup.py index 45ea47f3c..d6bf51015 100644 --- a/setup.py +++ b/setup.py @@ -184,7 +184,7 @@ def run(self): #'mod-wsgi==4.9.0', 'nltk==3.6.5', 'numpy', - 'oxrdflib==0.3.1', + 'oxrdflib==0.3.7', 'pandas', 'PyJWT', 'pyparsing', @@ -192,8 +192,7 @@ def run(self): 'python-dateutil', 'puremagic==1.14', 'python-slugify', - 'rdflib==6.3.2', - 'rdflib-jsonld==0.6.2', + 'rdflib==7.1.1', 'redislite>=6', 'requests[security]', 'sadi', diff --git a/tests/unit/test_neptune_plugin.py b/tests/unit/test_neptune_plugin.py new file mode 100644 index 000000000..59d5184bc --- /dev/null +++ b/tests/unit/test_neptune_plugin.py @@ -0,0 +1,230 @@ +""" +Unit tests for Neptune plugin with IAM authentication. + +Tests the Neptune driver that supports AWS IAM authentication for Amazon Neptune. +""" + +import pytest +from unittest.mock import Mock, patch, MagicMock +from io import BytesIO + +# Skip all tests if dependencies not available +pytest.importorskip("flask_security") +pytest.importorskip("aws_requests_auth") + +from rdflib import URIRef, Namespace, Literal +from rdflib.graph import ConjunctiveGraph +from whyis.database.database_utils import drivers, node_to_sparql + + +class TestNeptuneDriver: + """Test the Neptune driver registration and functionality.""" + + def test_neptune_driver_function_exists(self): + """Test that neptune driver function exists and is callable.""" + from whyis.plugins.neptune.plugin import neptune_driver + + # Verify the function exists and is callable + assert callable(neptune_driver) + + def test_neptune_driver_registered_via_plugin_init(self): + """Test that neptune driver gets registered in drivers dict during plugin init.""" + from whyis.plugins.neptune.plugin import neptune_driver + from whyis.database.database_utils import drivers + + # Store original state + had_neptune = 'neptune' in drivers + original_neptune = drivers.get('neptune') + + # Clear neptune from drivers if it exists + if 'neptune' in drivers: + del drivers['neptune'] + + # Verify neptune driver is not registered + assert 'neptune' not in drivers + + # Simulate what plugin.init() does - directly register the driver + # This is what happens in NeptuneSearchPlugin.init() + drivers['neptune'] = neptune_driver + + # Verify neptune driver is now registered + assert 'neptune' in drivers + assert callable(drivers['neptune']) + assert drivers['neptune'] is neptune_driver + + # Restore original state + if had_neptune: + drivers['neptune'] = original_neptune + elif 'neptune' in drivers: + del drivers['neptune'] + + @patch('whyis.plugins.neptune.plugin.os.environ', {'AWS_ACCESS_KEY_ID': 'test_key', 'AWS_SECRET_ACCESS_KEY': 'test_secret'}) + def test_neptune_driver_requires_region(self): + """Test that neptune driver requires region configuration.""" + from whyis.plugins.neptune.plugin import neptune_driver + + config = { + '_endpoint': 'https://neptune.example.com/sparql' + } + + with pytest.raises(ValueError, match="requires '_region'"): + neptune_driver(config) + + @patch('whyis.plugins.neptune.plugin.os.environ', {'AWS_ACCESS_KEY_ID': 'test_key', 'AWS_SECRET_ACCESS_KEY': 'test_secret'}) + def test_neptune_driver_returns_graph(self): + """Test that neptune driver returns a ConjunctiveGraph.""" + from whyis.plugins.neptune.plugin import neptune_driver + + config = { + '_endpoint': 'https://neptune.example.com/sparql', + '_region': 'us-east-1' + } + + graph = neptune_driver(config) + + assert isinstance(graph, ConjunctiveGraph) + # Store should have gsp_endpoint set + assert hasattr(graph.store, 'gsp_endpoint') + assert graph.store.gsp_endpoint == 'https://neptune.example.com/sparql' + + @patch('whyis.plugins.neptune.plugin.os.environ', {'AWS_ACCESS_KEY_ID': 'test_key', 'AWS_SECRET_ACCESS_KEY': 'test_secret'}) + def test_neptune_driver_with_custom_service_name(self): + """Test that neptune driver accepts custom service name.""" + from whyis.plugins.neptune.plugin import neptune_driver + + config = { + '_endpoint': 'https://neptune.example.com/sparql', + '_region': 'us-west-2', + '_service_name': 'custom-service' + } + + graph = neptune_driver(config) + + # Graph should be created successfully + assert isinstance(graph, ConjunctiveGraph) + + @patch('whyis.plugins.neptune.plugin.os.environ', {'AWS_ACCESS_KEY_ID': 'test_key', 'AWS_SECRET_ACCESS_KEY': 'test_secret'}) + def test_neptune_driver_with_gsp_endpoint(self): + """Test that neptune driver uses separate GSP endpoint if provided.""" + from whyis.plugins.neptune.plugin import neptune_driver + + config = { + '_endpoint': 'https://neptune.example.com/sparql', + '_gsp_endpoint': 'https://neptune.example.com/data', + '_region': 'us-east-1' + } + + graph = neptune_driver(config) + + assert graph.store.gsp_endpoint == 'https://neptune.example.com/data' + + +class TestNeptuneGSPOperations: + """Test Neptune Graph Store Protocol operations with AWS auth.""" + + @patch('whyis.plugins.neptune.plugin.os.environ', {'AWS_ACCESS_KEY_ID': 'test_key', 'AWS_SECRET_ACCESS_KEY': 'test_secret'}) + @patch('whyis.plugins.neptune.plugin.requests.Session') + def test_gsp_operations_use_aws_auth(self, mock_requests_session): + """Test that GSP operations (publish, put, post, delete) use AWS auth.""" + from whyis.plugins.neptune.plugin import neptune_driver + + # Mock requests session + mock_session_instance = Mock() + mock_response = Mock() + mock_response.ok = True + mock_session_instance.post.return_value = mock_response + mock_session_instance.put.return_value = mock_response + mock_session_instance.delete.return_value = mock_response + mock_requests_session.return_value = mock_session_instance + + config = { + '_endpoint': 'https://neptune.example.com/sparql', + '_region': 'us-east-1' + } + + graph = neptune_driver(config) + + # Test that publish method exists and has auth + assert hasattr(graph.store, 'publish') + assert hasattr(graph.store, 'put') + assert hasattr(graph.store, 'post') + assert hasattr(graph.store, 'delete') + + # Call publish to verify it works + graph.store.publish(b'test data') + + # Verify a session was created + assert mock_requests_session.called + + +class TestNeptuneEntityResolver: + """Test the NeptuneEntityResolver class.""" + + def test_escape_sparql_string(self): + """Test that SPARQL string escaping works correctly.""" + from whyis.plugins.neptune.plugin import NeptuneEntityResolver + + resolver = NeptuneEntityResolver() + + # Test basic string + assert resolver._escape_sparql_string("test") == "test" + + # Test string with quotes + assert resolver._escape_sparql_string('test "quoted"') == 'test \\"quoted\\"' + + # Test string with backslashes + assert resolver._escape_sparql_string('test\\path') == 'test\\\\path' + + # Test string with newlines + assert resolver._escape_sparql_string('test\nline') == 'test\\nline' + + # Test string with carriage returns + assert resolver._escape_sparql_string('test\rline') == 'test\\rline' + + # Test complex string with multiple special characters + assert resolver._escape_sparql_string('test "quote" and\\path\nline') == 'test \\"quote\\" and\\\\path\\nline' + + # Test None + assert resolver._escape_sparql_string(None) == "" + + def test_fts_query_format(self): + """Test that the FTS query is correctly formatted.""" + from whyis.plugins.neptune.plugin import NeptuneEntityResolver + + resolver = NeptuneEntityResolver() + + # Check that the query uses full URIs for Neptune FTS + assert '' in resolver.query + assert '' in resolver.query + assert '' in resolver.query + assert '' in resolver.query + + # Check that query uses string substitution for search term (not variable binding) + assert '"%s"' in resolver.query # Search term should be inserted as quoted string + + def test_on_resolve_escapes_search_term(self): + """Test that on_resolve properly escapes the search term and type.""" + from whyis.plugins.neptune.plugin import NeptuneEntityResolver + + resolver = NeptuneEntityResolver() + + # Test that the query will safely escape special characters in search term + term_with_quotes = 'test "injection" attempt' + escaped = resolver._escape_sparql_string(term_with_quotes) + + # Verify the quotes were escaped + assert escaped == 'test \\"injection\\" attempt' + + # Verify that when formatted into the query, it's safe + test_query = 'SELECT * WHERE { ?s ?p "%s" }' % escaped + + # The query should contain the escaped version + assert 'test \\"injection\\" attempt' in test_query + + # And should not contain the unescaped quotes that could break out + assert 'test "injection" attempt' not in test_query + + # Test escaping type parameter as well + type_with_special_chars = 'http://example.org/Test"Type' + escaped_type = resolver._escape_sparql_string(type_with_special_chars) + assert escaped_type == 'http://example.org/Test\\"Type' diff --git a/whyis/config-template/{{cookiecutter.project_slug}}/CLOUDFORMATION.md b/whyis/config-template/{{cookiecutter.project_slug}}/CLOUDFORMATION.md new file mode 100644 index 000000000..f515cabae --- /dev/null +++ b/whyis/config-template/{{cookiecutter.project_slug}}/CLOUDFORMATION.md @@ -0,0 +1,349 @@ +# Setting Up AWS Neptune with CloudFormation + +This directory contains a CloudFormation template (`cloudformation-neptune.json`) that automates the deployment of AWS Neptune Serverless with Full-Text Search capabilities for your Whyis knowledge graph application. + +## What This Template Creates + +The CloudFormation template provisions: + +1. **Neptune Serverless Cluster**: A scalable Neptune database cluster with IAM authentication enabled +2. **OpenSearch Domain**: For full-text search capabilities integrated with Neptune +3. **Security Groups**: Proper network security for both Neptune and OpenSearch +4. **IAM Role**: With necessary permissions to access both Neptune and OpenSearch +5. **VPC Configuration**: Subnet groups for secure deployment + +## Prerequisites + +Before deploying this template, you need: + +1. **AWS Account** with appropriate permissions to create: + - Neptune clusters + - OpenSearch domains + - IAM roles and policies + - EC2 security groups + - VPC subnet groups + +2. **Existing VPC** with: + - At least 2 private subnets in different Availability Zones + - Proper routing configuration + - NAT Gateway (if your application needs internet access) + +3. **AWS CLI** installed and configured (or use AWS Console) + +## Deployment Steps + +### Option 1: Using AWS CLI + +1. **Prepare your parameters** by creating a `parameters.json` file: + +```json +[ + { + "ParameterKey": "DBClusterIdentifier", + "ParameterValue": "my-kgapp-neptune" + }, + { + "ParameterKey": "VPCId", + "ParameterValue": "vpc-xxxxxxxxx" + }, + { + "ParameterKey": "PrivateSubnetIds", + "ParameterValue": "subnet-xxxxxxxx,subnet-yyyyyyyy" + }, + { + "ParameterKey": "AllowedCIDR", + "ParameterValue": "10.0.0.0/16" + }, + { + "ParameterKey": "IAMRoleName", + "ParameterValue": "my-kgapp-neptune-access" + }, + { + "ParameterKey": "MinNCUs", + "ParameterValue": "2.5" + }, + { + "ParameterKey": "MaxNCUs", + "ParameterValue": "128" + }, + { + "ParameterKey": "OpenSearchInstanceType", + "ParameterValue": "t3.small.search" + }, + { + "ParameterKey": "OpenSearchInstanceCount", + "ParameterValue": "1" + } +] +``` + +2. **Deploy the stack**: + +```bash +aws cloudformation create-stack \ + --stack-name my-kgapp-neptune-stack \ + --template-body file://cloudformation-neptune.json \ + --parameters file://parameters.json \ + --capabilities CAPABILITY_NAMED_IAM \ + --region us-east-1 +``` + +3. **Monitor the deployment**: + +```bash +aws cloudformation describe-stacks \ + --stack-name my-kgapp-neptune-stack \ + --region us-east-1 \ + --query 'Stacks[0].StackStatus' +``` + +The deployment typically takes 20-30 minutes to complete. + +4. **Get the outputs**: + +```bash +aws cloudformation describe-stacks \ + --stack-name my-kgapp-neptune-stack \ + --region us-east-1 \ + --query 'Stacks[0].Outputs' +``` + +### Option 2: Using AWS Console + +1. Log into the AWS Console +2. Navigate to CloudFormation service +3. Click "Create Stack" → "With new resources" +4. Select "Upload a template file" +5. Upload the `cloudformation-neptune.json` file +6. Fill in the required parameters: + - **DBClusterIdentifier**: Unique name for your Neptune cluster + - **VPCId**: Select your VPC + - **PrivateSubnetIds**: Select at least 2 private subnets in different AZs + - **AllowedCIDR**: IP range that can access Neptune and OpenSearch + - **IAMRoleName**: Name for the IAM role (must be unique) + - **MinNCUs/MaxNCUs**: Capacity settings for Neptune Serverless + - **OpenSearchInstanceType**: Instance type for OpenSearch + - **OpenSearchInstanceCount**: Number of OpenSearch nodes +7. Acknowledge IAM resource creation +8. Click "Create Stack" + +## Configuring Your Whyis Application + +After the CloudFormation stack completes, configure your Whyis application: + +### 1. Get Configuration Values from Stack Outputs + +The CloudFormation outputs provide all the values you need. Key outputs: + +- `NeptuneSPARQLEndpoint`: Neptune SPARQL endpoint URL +- `OpenSearchFTSEndpoint`: OpenSearch full-text search endpoint +- `Region`: AWS region +- `NeptuneAccessRoleArn`: IAM role ARN for accessing Neptune +- `WhyisConfigSummary`: Quick reference of all configuration values + +### 2. Update whyis.conf + +Add these lines to your `whyis.conf`: + +```python +# Enable Neptune plugin +PLUGINENGINE_PLUGINS = ['neptune'] + +# Neptune configuration +KNOWLEDGE_TYPE = 'neptune' +KNOWLEDGE_ENDPOINT = 'https://:8182/sparql' # From NeptuneSPARQLEndpoint output +KNOWLEDGE_REGION = 'us-east-1' # From Region output + +# Full-text search configuration +neptune_fts_endpoint = 'https://' # From OpenSearchFTSEndpoint output +``` + +### 3. Add Dependencies to requirements.txt + +``` +aws_requests_auth +``` + +Install dependencies: + +```bash +pip install -r requirements.txt +``` + +### 4. Configure AWS Credentials + +Your application needs AWS credentials to access Neptune. Choose one option: + +#### Option A: Using IAM Role (Recommended for EC2/ECS) + +If running on EC2, attach the instance profile to your instance: + +```bash +# Get the instance profile ARN from CloudFormation outputs +aws ec2 associate-iam-instance-profile \ + --instance-id i-xxxxxxxxx \ + --iam-instance-profile Arn= +``` + +#### Option B: Using Environment Variables (For local development) + +Create an IAM user with permissions to assume the Neptune access role, then: + +```bash +export AWS_ACCESS_KEY_ID=your_access_key +export AWS_SECRET_ACCESS_KEY=your_secret_key +export AWS_REGION=us-east-1 +``` + +#### Option C: Using AWS CLI Profile + +```bash +aws configure --profile neptune +# Enter your credentials +export AWS_PROFILE=neptune +``` + +### 5. Verify the Configuration + +Start your Whyis application and verify Neptune connection: + +```bash +./run +``` + +Check the logs for successful Neptune plugin initialization and database connection. + +## Configuration Parameters Explained + +### Required Parameters + +- **DBClusterIdentifier**: Unique identifier for your Neptune cluster (3-63 characters, alphanumeric and hyphens) +- **VPCId**: The VPC where Neptune and OpenSearch will be deployed +- **PrivateSubnetIds**: At least 2 private subnets in different Availability Zones for high availability +- **AllowedCIDR**: CIDR block that can access Neptune and OpenSearch (e.g., your VPC CIDR) +- **IAMRoleName**: Name for the IAM role that grants access to Neptune and OpenSearch + +### Optional Parameters (with defaults) + +- **MinNCUs**: Minimum Neptune Capacity Units (default: 2.5) - Lowest cost option +- **MaxNCUs**: Maximum Neptune Capacity Units (default: 128) - Allows scaling to high workloads +- **OpenSearchInstanceType**: Instance type for OpenSearch (default: t3.small.search) - Good for development +- **OpenSearchInstanceCount**: Number of OpenSearch instances (default: 1) - Use 2+ for production + +## Cost Considerations + +### Neptune Serverless Costs + +- **NCU-hours**: Charged per NCU-hour when cluster is active +- **Storage**: Charged per GB-month +- **I/O**: Charged per million requests +- **Backups**: Automated backups included, additional snapshots charged + +Estimated monthly cost (with 2.5 NCUs average, 10GB data): +- ~$150-300/month depending on usage patterns + +### OpenSearch Costs + +- **Instance hours**: Based on instance type (t3.small.search ~$35/month) +- **Storage**: Charged per GB (20GB included in template) + +### Cost Optimization Tips + +1. **Development**: Use MinNCUs=1, t3.small.search, single instance +2. **Production**: Use MinNCUs=2.5, larger instance types, multiple instances for HA +3. **Stop when not in use**: Neptune Serverless automatically scales to zero after inactivity +4. **Monitor usage**: Use AWS Cost Explorer to track actual costs + +## Security Best Practices + +1. **Network Security**: + - Deploy in private subnets only + - Use restrictive security groups + - Set AllowedCIDR to minimum required range + +2. **IAM Authentication**: + - Always use IAM authentication (enabled by default in template) + - Rotate credentials regularly + - Use IAM roles instead of long-term credentials when possible + +3. **Encryption**: + - Encryption at rest enabled by default + - TLS/HTTPS enforced for all connections + - Node-to-node encryption enabled for OpenSearch + +4. **Least Privilege**: + - Use the provided IAM role with minimal permissions + - Create separate roles for different access patterns if needed + +## Troubleshooting + +### Stack Creation Failed + +1. **Check CloudFormation Events**: + ```bash + aws cloudformation describe-stack-events \ + --stack-name my-kgapp-neptune-stack \ + --region us-east-1 + ``` + +2. **Common Issues**: + - Insufficient IAM permissions + - VPC/Subnet configuration issues + - Resource naming conflicts + - Service limits exceeded + +### Connection Issues + +1. **Verify Security Groups**: Ensure your application's security group can reach Neptune (port 8182) and OpenSearch (port 443) + +2. **Check IAM Permissions**: Verify the IAM role has neptune-db:* and es:* permissions + +3. **Test Connectivity**: + ```bash + # From an instance in the same VPC + curl -k https://:8182/sparql + ``` + +### OpenSearch Access Issues + +1. **Fine-grained Access Control**: Ensure the IAM role ARN is configured as master user +2. **VPC Configuration**: Verify OpenSearch is in the correct subnets +3. **Domain Policy**: Check the access policy allows your CIDR range + +## Updating the Stack + +To update configuration (e.g., increase capacity): + +```bash +aws cloudformation update-stack \ + --stack-name my-kgapp-neptune-stack \ + --template-body file://cloudformation-neptune.json \ + --parameters file://updated-parameters.json \ + --capabilities CAPABILITY_NAMED_IAM \ + --region us-east-1 +``` + +## Deleting the Stack + +To remove all resources: + +```bash +aws cloudformation delete-stack \ + --stack-name my-kgapp-neptune-stack \ + --region us-east-1 +``` + +**Warning**: This will permanently delete: +- All data in Neptune +- All data in OpenSearch +- Security groups and IAM roles + +Create a backup before deletion if you need to preserve data. + +## Additional Resources + +- [AWS Neptune Documentation](https://docs.aws.amazon.com/neptune/latest/userguide/) +- [Neptune IAM Authentication](https://docs.aws.amazon.com/neptune/latest/userguide/iam-auth.html) +- [Neptune Full-Text Search](https://docs.aws.amazon.com/neptune/latest/userguide/full-text-search.html) +- [OpenSearch Documentation](https://docs.aws.amazon.com/opensearch-service/) +- [CloudFormation Best Practices](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/best-practices.html) diff --git a/whyis/config-template/{{cookiecutter.project_slug}}/cloudformation-neptune.json b/whyis/config-template/{{cookiecutter.project_slug}}/cloudformation-neptune.json new file mode 100644 index 000000000..0d6d5b20b --- /dev/null +++ b/whyis/config-template/{{cookiecutter.project_slug}}/cloudformation-neptune.json @@ -0,0 +1,505 @@ +{ + "AWSTemplateFormatVersion": "2010-09-09", + "Description": "CloudFormation template for AWS Neptune Serverless cluster with Full-Text Search (OpenSearch) for Whyis Knowledge Graph Application", + "Parameters": { + "DBClusterIdentifier": { + "Type": "String", + "Default": "{{cookiecutter.project_slug}}-neptune", + "Description": "Neptune DB cluster identifier", + "MinLength": 1, + "MaxLength": 63, + "AllowedPattern": "^[a-zA-Z][a-zA-Z0-9-]*$", + "ConstraintDescription": "Must begin with a letter and contain only alphanumeric characters and hyphens" + }, + "MinNCUs": { + "Type": "Number", + "Default": 2.5, + "Description": "Minimum Neptune Capacity Units (NCUs) for serverless cluster", + "AllowedValues": [1, 2.5] + }, + "MaxNCUs": { + "Type": "Number", + "Default": 128, + "Description": "Maximum Neptune Capacity Units (NCUs) for serverless cluster", + "AllowedValues": [2.5, 128] + }, + "OpenSearchInstanceType": { + "Type": "String", + "Default": "t3.small.search", + "Description": "OpenSearch instance type for Full-Text Search", + "AllowedValues": [ + "t3.small.search", + "t3.medium.search", + "r6g.large.search", + "r6g.xlarge.search" + ] + }, + "OpenSearchInstanceCount": { + "Type": "Number", + "Default": 1, + "Description": "Number of OpenSearch instances", + "MinValue": 1, + "MaxValue": 10 + }, + "VPCId": { + "Type": "AWS::EC2::VPC::Id", + "Description": "VPC ID where Neptune and OpenSearch will be deployed" + }, + "PrivateSubnetIds": { + "Type": "List", + "Description": "List of private subnet IDs for Neptune and OpenSearch (at least 2 in different AZs)" + }, + "AllowedCIDR": { + "Type": "String", + "Default": "10.0.0.0/8", + "Description": "CIDR block allowed to access Neptune and OpenSearch", + "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})", + "ConstraintDescription": "Must be a valid CIDR range" + }, + "IAMRoleName": { + "Type": "String", + "Default": "{{cookiecutter.project_slug}}-neptune-access-role", + "Description": "Name for the IAM role that will access Neptune", + "MinLength": 1, + "MaxLength": 64 + } + }, + "Resources": { + "NeptuneSecurityGroup": { + "Type": "AWS::EC2::SecurityGroup", + "Properties": { + "GroupDescription": "Security group for Neptune cluster", + "VpcId": { + "Ref": "VPCId" + }, + "SecurityGroupIngress": [ + { + "IpProtocol": "tcp", + "FromPort": 8182, + "ToPort": 8182, + "CidrIp": { + "Ref": "AllowedCIDR" + }, + "Description": "Allow Neptune access from specified CIDR" + } + ], + "Tags": [ + { + "Key": "Name", + "Value": { + "Fn::Sub": "${DBClusterIdentifier}-sg" + } + } + ] + } + }, + "OpenSearchSecurityGroup": { + "Type": "AWS::EC2::SecurityGroup", + "Properties": { + "GroupDescription": "Security group for OpenSearch domain", + "VpcId": { + "Ref": "VPCId" + }, + "SecurityGroupIngress": [ + { + "IpProtocol": "tcp", + "FromPort": 443, + "ToPort": 443, + "SourceSecurityGroupId": { + "Ref": "NeptuneSecurityGroup" + }, + "Description": "Allow HTTPS from Neptune security group" + }, + { + "IpProtocol": "tcp", + "FromPort": 443, + "ToPort": 443, + "CidrIp": { + "Ref": "AllowedCIDR" + }, + "Description": "Allow HTTPS from specified CIDR" + } + ], + "Tags": [ + { + "Key": "Name", + "Value": { + "Fn::Sub": "${DBClusterIdentifier}-opensearch-sg" + } + } + ] + } + }, + "NeptuneDBSubnetGroup": { + "Type": "AWS::Neptune::DBSubnetGroup", + "Properties": { + "DBSubnetGroupName": { + "Fn::Sub": "${DBClusterIdentifier}-subnet-group" + }, + "DBSubnetGroupDescription": "Subnet group for Neptune cluster", + "SubnetIds": { + "Ref": "PrivateSubnetIds" + }, + "Tags": [ + { + "Key": "Name", + "Value": { + "Fn::Sub": "${DBClusterIdentifier}-subnet-group" + } + } + ] + } + }, + "NeptuneDBCluster": { + "Type": "AWS::Neptune::DBCluster", + "Properties": { + "DBClusterIdentifier": { + "Ref": "DBClusterIdentifier" + }, + "Engine": "neptune", + "EngineVersion": "1.3.2.0", + "ServerlessScalingConfiguration": { + "MinCapacity": { + "Ref": "MinNCUs" + }, + "MaxCapacity": { + "Ref": "MaxNCUs" + } + }, + "DBSubnetGroupName": { + "Ref": "NeptuneDBSubnetGroup" + }, + "VpcSecurityGroupIds": [ + { + "Ref": "NeptuneSecurityGroup" + } + ], + "IamAuthEnabled": true, + "BackupRetentionPeriod": 7, + "PreferredBackupWindow": "03:00-04:00", + "PreferredMaintenanceWindow": "mon:04:00-mon:05:00", + "Tags": [ + { + "Key": "Name", + "Value": { + "Ref": "DBClusterIdentifier" + } + } + ] + } + }, + "OpenSearchDomain": { + "Type": "AWS::OpenSearchService::Domain", + "Properties": { + "DomainName": { + "Fn::Sub": "${DBClusterIdentifier}-fts" + }, + "EngineVersion": "OpenSearch_2.11", + "ClusterConfig": { + "InstanceType": { + "Ref": "OpenSearchInstanceType" + }, + "InstanceCount": { + "Ref": "OpenSearchInstanceCount" + }, + "DedicatedMasterEnabled": false, + "ZoneAwarenessEnabled": { + "Fn::If": [ + "MultipleInstances", + true, + false + ] + } + }, + "EBSOptions": { + "EBSEnabled": true, + "VolumeType": "gp3", + "VolumeSize": 20 + }, + "VPCOptions": { + "SubnetIds": [ + { + "Fn::Select": [ + 0, + { + "Ref": "PrivateSubnetIds" + } + ] + } + ], + "SecurityGroupIds": [ + { + "Ref": "OpenSearchSecurityGroup" + } + ] + }, + "AccessPolicies": { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": "es:*", + "Resource": { + "Fn::Sub": "arn:aws:es:${AWS::Region}:${AWS::AccountId}:domain/${DBClusterIdentifier}-fts/*" + }, + "Condition": { + "IpAddress": { + "aws:SourceIp": { + "Ref": "AllowedCIDR" + } + } + } + } + ] + }, + "AdvancedSecurityOptions": { + "Enabled": true, + "InternalUserDatabaseEnabled": false, + "MasterUserOptions": { + "MasterUserARN": { + "Fn::GetAtt": [ + "NeptuneAccessRole", + "Arn" + ] + } + } + }, + "NodeToNodeEncryptionOptions": { + "Enabled": true + }, + "EncryptionAtRestOptions": { + "Enabled": true + }, + "DomainEndpointOptions": { + "EnforceHTTPS": true, + "TLSSecurityPolicy": "Policy-Min-TLS-1-2-2019-07" + }, + "Tags": [ + { + "Key": "Name", + "Value": { + "Fn::Sub": "${DBClusterIdentifier}-fts" + } + } + ] + } + }, + "NeptuneAccessRole": { + "Type": "AWS::IAM::Role", + "Properties": { + "RoleName": { + "Ref": "IAMRoleName" + }, + "AssumeRolePolicyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": [ + "ec2.amazonaws.com", + "ecs-tasks.amazonaws.com", + "lambda.amazonaws.com" + ] + }, + "Action": "sts:AssumeRole" + } + ] + }, + "ManagedPolicyArns": [ + "arn:aws:iam::aws:policy/NeptuneReadOnlyAccess" + ], + "Policies": [ + { + "PolicyName": "NeptuneIAMAccess", + "PolicyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "neptune-db:connect", + "neptune-db:ReadDataViaQuery", + "neptune-db:WriteDataViaQuery", + "neptune-db:DeleteDataViaQuery" + ], + "Resource": { + "Fn::Sub": "arn:aws:neptune-db:${AWS::Region}:${AWS::AccountId}:${NeptuneDBCluster}/*" + } + } + ] + } + }, + { + "PolicyName": "OpenSearchAccess", + "PolicyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "es:ESHttpGet", + "es:ESHttpPost", + "es:ESHttpPut", + "es:ESHttpDelete", + "es:ESHttpHead" + ], + "Resource": { + "Fn::Sub": "arn:aws:es:${AWS::Region}:${AWS::AccountId}:domain/${DBClusterIdentifier}-fts/*" + } + } + ] + } + } + ], + "Tags": [ + { + "Key": "Name", + "Value": { + "Ref": "IAMRoleName" + } + } + ] + } + }, + "NeptuneAccessInstanceProfile": { + "Type": "AWS::IAM::InstanceProfile", + "Properties": { + "InstanceProfileName": { + "Fn::Sub": "${IAMRoleName}-instance-profile" + }, + "Roles": [ + { + "Ref": "NeptuneAccessRole" + } + ] + } + } + }, + "Conditions": { + "MultipleInstances": { + "Fn::Not": [ + { + "Fn::Equals": [ + { + "Ref": "OpenSearchInstanceCount" + }, + 1 + ] + } + ] + } + }, + "Outputs": { + "NeptuneClusterEndpoint": { + "Description": "Neptune cluster endpoint", + "Value": { + "Fn::GetAtt": [ + "NeptuneDBCluster", + "Endpoint" + ] + }, + "Export": { + "Name": { + "Fn::Sub": "${AWS::StackName}-NeptuneEndpoint" + } + } + }, + "NeptuneClusterPort": { + "Description": "Neptune cluster port", + "Value": { + "Fn::GetAtt": [ + "NeptuneDBCluster", + "Port" + ] + }, + "Export": { + "Name": { + "Fn::Sub": "${AWS::StackName}-NeptunePort" + } + } + }, + "NeptuneSPARQLEndpoint": { + "Description": "Neptune SPARQL endpoint URL for Whyis configuration", + "Value": { + "Fn::Sub": "https://${NeptuneDBCluster.Endpoint}:${NeptuneDBCluster.Port}/sparql" + } + }, + "OpenSearchDomainEndpoint": { + "Description": "OpenSearch domain endpoint", + "Value": { + "Fn::GetAtt": [ + "OpenSearchDomain", + "DomainEndpoint" + ] + }, + "Export": { + "Name": { + "Fn::Sub": "${AWS::StackName}-OpenSearchEndpoint" + } + } + }, + "OpenSearchFTSEndpoint": { + "Description": "OpenSearch FTS endpoint URL for Whyis configuration", + "Value": { + "Fn::Sub": "https://${OpenSearchDomain.DomainEndpoint}" + } + }, + "NeptuneAccessRoleArn": { + "Description": "ARN of the IAM role for accessing Neptune and OpenSearch", + "Value": { + "Fn::GetAtt": [ + "NeptuneAccessRole", + "Arn" + ] + }, + "Export": { + "Name": { + "Fn::Sub": "${AWS::StackName}-AccessRoleArn" + } + } + }, + "NeptuneAccessInstanceProfileArn": { + "Description": "ARN of the instance profile for EC2 instances", + "Value": { + "Fn::GetAtt": [ + "NeptuneAccessInstanceProfile", + "Arn" + ] + }, + "Export": { + "Name": { + "Fn::Sub": "${AWS::StackName}-InstanceProfileArn" + } + } + }, + "Region": { + "Description": "AWS Region where resources are deployed", + "Value": { + "Ref": "AWS::Region" + } + }, + "WhyisConfigSummary": { + "Description": "Configuration values for whyis.conf", + "Value": { + "Fn::Sub": [ + "KNOWLEDGE_TYPE=neptune | KNOWLEDGE_ENDPOINT=${Endpoint} | KNOWLEDGE_REGION=${Region} | neptune_fts_endpoint=${FTSEndpoint}", + { + "Endpoint": { + "Fn::Sub": "https://${NeptuneDBCluster.Endpoint}:${NeptuneDBCluster.Port}/sparql" + }, + "Region": { + "Ref": "AWS::Region" + }, + "FTSEndpoint": { + "Fn::Sub": "https://${OpenSearchDomain.DomainEndpoint}" + } + } + ] + } + } + } +} diff --git a/whyis/database/database_utils.py b/whyis/database/database_utils.py index a36573558..22c8fc9a6 100644 --- a/whyis/database/database_utils.py +++ b/whyis/database/database_utils.py @@ -209,11 +209,23 @@ def sparql_driver(config): return graph def create_query_store(store): - new_store = WhyisSPARQLStore(endpoint=store.query_endpoint, - query_endpoint=store.query_endpoint, -# method="POST", -# returnFormat='json', - node_to_sparql=node_to_sparql) + """ + Create a read-only query store from an existing store. + + This function creates a query-only store that can be used for read operations + without update capabilities. + + Args: + store: The source store object + + Returns: + A new store configured for queries only + """ + new_store = WhyisSPARQLStore( + endpoint=store.query_endpoint, + query_endpoint=store.query_endpoint, + node_to_sparql=node_to_sparql + ) return new_store # memory_graphs = collections.defaultdict(ConjunctiveGraph) diff --git a/whyis/plugins/neptune/README.md b/whyis/plugins/neptune/README.md new file mode 100644 index 000000000..d2e07e435 --- /dev/null +++ b/whyis/plugins/neptune/README.md @@ -0,0 +1,200 @@ +# Neptune Plugin - AWS IAM Authentication Support + +## Overview + +This plugin extends the Neptune full-text search capabilities to include AWS IAM authentication support for Amazon Neptune databases. It registers a "neptune" database driver that uses AWS SigV4 request signing for all SPARQL queries, updates, and Graph Store Protocol operations. + +## Features + +- **AWS IAM Authentication**: Uses AWS SigV4 request signing for secure access to Neptune databases +- **Automatic Credential Management**: Leverages boto3 for AWS credential discovery (environment variables, IAM roles, etc.) +- **Full Text Search Support**: Passes authentication through to Neptune's full-text search queries +- **Graph Store Protocol**: Supports authenticated PUT, POST, DELETE, and publish operations +- **Configuration-Based**: Easy setup via Flask configuration + +## Installation and Setup + +### 1. Enable the Neptune Plugin + +To enable the Neptune plugin in your Whyis knowledge graph application, add it to your application's configuration file (typically `whyis.conf` or `system.conf`): + +```python +# Enable the Neptune plugin +PLUGINENGINE_PLUGINS = ['neptune'] + +# Or if you already have other plugins enabled: +PLUGINENGINE_PLUGINS = ['neptune', 'other_plugin'] +``` + +### 2. Install Required Dependencies + +The Neptune plugin with IAM authentication requires additional Python packages that are not included in the core Whyis dependencies. Add these to your knowledge graph application's `requirements.txt`: + +``` +aws_requests_auth +``` + +Then install them in your application environment: + +```bash +pip install -r requirements.txt +``` + +**Note**: This dependency is only needed if you're using Neptune with IAM authentication. It is not required for core Whyis functionality. + +## Configuration + +After enabling the plugin and installing dependencies, configure your Whyis application to use Neptune with IAM authentication: + +### System Configuration (system.conf) + +```python +# Neptune SPARQL endpoint +KNOWLEDGE_TYPE = 'neptune' +KNOWLEDGE_ENDPOINT = 'https://my-cluster.cluster-xxx.us-east-1.neptune.amazonaws.com:8182/sparql' + +# AWS region (required for Neptune driver) +KNOWLEDGE_REGION = 'us-east-1' + +# Optional: Custom service name (defaults to 'neptune-db') +KNOWLEDGE_SERVICE_NAME = 'neptune-db' + +# Optional: Separate Graph Store Protocol endpoint +KNOWLEDGE_GSP_ENDPOINT = 'https://my-cluster.cluster-xxx.us-east-1.neptune.amazonaws.com:8182/data' + +# Optional: Default graph URI +KNOWLEDGE_DEFAULT_GRAPH = 'http://example.org/default-graph' + +# Neptune Full-Text Search endpoint +neptune_fts_endpoint = 'https://search-my-domain.us-east-1.es.amazonaws.com' +``` + +### AWS Credentials + +The Neptune driver uses environment variables for AWS credential management. Credentials can be provided via: + +1. **Environment Variables** (required): + ```bash + export AWS_ACCESS_KEY_ID=your_access_key + export AWS_SECRET_ACCESS_KEY=your_secret_key + export AWS_SESSION_TOKEN=your_session_token # Optional, for temporary credentials + ``` + +2. **IAM Roles**: If running on EC2 or ECS with an IAM role, set the environment variables from the role's credentials + +3. **AWS Credentials File** (`~/.aws/credentials`): + ```ini + [default] + aws_access_key_id = your_access_key + aws_secret_access_key = your_secret_key + ``` + Then export them: + ```bash + export AWS_ACCESS_KEY_ID=$(aws configure get aws_access_key_id) + export AWS_SECRET_ACCESS_KEY=$(aws configure get aws_secret_access_key) + ``` + +## How It Works + +### Driver Registration + +The Neptune plugin automatically registers a "neptune" database driver when initialized. This driver: + +1. Creates Neptune SPARQL stores with AWS IAM authentication +2. Signs all HTTP requests with AWS SigV4 signatures +3. Passes authentication to full-text search queries +4. Provides authenticated Graph Store Protocol operations + +### Request Signing + +All requests to Neptune are automatically signed with AWS SigV4: + +- **SPARQL Queries**: SELECT, ASK, CONSTRUCT, DESCRIBE queries +- **SPARQL Updates**: INSERT, DELETE, MODIFY operations +- **Graph Store Protocol**: GET, PUT, POST, DELETE on named graphs +- **Full-Text Search**: Neptune FTS queries via SERVICE blocks + +### Usage in SPARQL Queries + +Full-text search queries work seamlessly with authentication: + +```sparql +PREFIX fts: +PREFIX dc: + +SELECT ?node ?label WHERE { + SERVICE fts:search { + fts:config neptune-fts:query "search term" . + fts:config neptune-fts:endpoint "https://your-fts-endpoint" . + fts:config neptune-fts:field dc:title . + fts:config neptune-fts:return ?node . + } + ?node dc:title ?label . +} +``` + +The Neptune driver ensures that AWS credentials are attached to the full-text search requests. + +## API + +### Neptune Driver Function + +```python +from whyis.plugins.neptune.plugin import neptune_driver + +config = { + '_endpoint': 'https://neptune.amazonaws.com:8182/sparql', + '_region': 'us-east-1', + '_service_name': 'neptune-db', # Optional + '_gsp_endpoint': 'https://neptune.amazonaws.com:8182/data', # Optional + '_default_graph': 'http://example.org/graph' # Optional +} + +graph = neptune_driver(config) +``` + +## Security Considerations + +- **Credentials**: Never commit AWS credentials to source control +- **IAM Policies**: Ensure Neptune IAM policies grant only necessary permissions +- **Temporary Credentials**: Use STS temporary credentials or IAM roles when possible +- **HTTPS**: Always use HTTPS endpoints for Neptune +- **VPC**: Consider using VPC endpoints for Neptune access within AWS + +## Troubleshooting + +### Authentication Errors + +If you see authentication errors: + +1. Verify AWS credentials are properly configured +2. Check that the IAM policy grants Neptune access: + ```json + { + "Effect": "Allow", + "Action": [ + "neptune-db:connect", + "neptune-db:ReadDataViaQuery", + "neptune-db:WriteDataViaQuery" + ], + "Resource": "arn:aws:neptune-db:region:account:cluster-id/*" + } + ``` +3. Ensure the region is correctly specified +4. Verify the Neptune endpoint URL is correct + +### Connection Errors + +If you cannot connect to Neptune: + +1. Check VPC security groups allow access +2. Verify network connectivity to Neptune endpoint +3. Ensure the endpoint URL includes the port (typically 8182) +4. Check that Neptune cluster is available + +## References + +- [AWS Neptune IAM Authentication](https://docs.aws.amazon.com/neptune/latest/userguide/iam-auth.html) +- [AWS Neptune Full-Text Search](https://docs.aws.amazon.com/neptune/latest/userguide/full-text-search.html) +- [AWS SigV4 Signing](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html) +- [boto3 Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) diff --git a/whyis/plugins/neptune/plugin.py b/whyis/plugins/neptune/plugin.py index 2b5edbb6d..46247cda8 100644 --- a/whyis/plugins/neptune/plugin.py +++ b/whyis/plugins/neptune/plugin.py @@ -1,7 +1,16 @@ from whyis.plugin import Plugin, EntityResolverListener +from whyis.namespace import NS import rdflib from flask import current_app from flask_pluginengine import PluginBlueprint, current_plugin +from rdflib import URIRef +from rdflib.graph import ConjunctiveGraph +import requests +import logging +import os +from aws_requests_auth.aws_auth import AWSRequestsAuth + +logger = logging.getLogger(__name__) prefixes = dict( @@ -35,19 +44,19 @@ class NeptuneEntityResolver(EntityResolverListener): (group_concat(distinct ?type; separator="||") as ?types) (0.9 as ?score) where { - SERVICE fts:search { - fts:config neptune-fts:query '''%s''' . - fts:config neptune-fts:endpoint '%s' . - fts:config neptune-fts:queryType 'match' . - fts:config neptune-fts:field dc:title . - fts:config neptune-fts:field rdfs:label . - fts:config neptune-fts:field skos:prefLabel . - fts:config neptune-fts:field skos:altLabel . - fts:config neptune-fts:field foaf:name . - fts:config neptune-fts:field dc:identifier . - fts:config neptune-fts:field schema:name . - fts:config neptune-fts:field skos:notation . - fts:config neptune-fts:return ?node . + SERVICE { + "%s" . + "%s" . + "match" . + dc:title . + rdfs:label . + skos:prefLabel . + skos:altLabel . + foaf:name . + dc:identifier . + schema:name . + skos:notation . + ?node . } optional { @@ -75,21 +84,39 @@ class NeptuneEntityResolver(EntityResolverListener): def __init__(self, database="knowledge"): self.database = database + + def _escape_sparql_string(self, s): + """ + Escape a string for safe inclusion in a SPARQL query. + + This prevents SPARQL injection by escaping special characters. + """ + if s is None: + return "" + # Escape backslashes first, then quotes, then newlines/returns + s = str(s).replace('\\', '\\\\') + s = s.replace('"', '\\"') + s = s.replace('\n', '\\n') + s = s.replace('\r', '\\r') + return s def on_resolve(self, term, type=None, context=None, label=True): - print(f'Searching {self.database} for {term}') + logger.info(f'Searching {self.database} for {term}') graph = current_app.databases[self.database] fts_endpoint = current_app.config['neptune_fts_endpoint'] - #context_query = '' - #if context is not None: - # context_query = self.context_query % context - + + # Safely escape the search term for inclusion in SPARQL query + escaped_term = self._escape_sparql_string(term) + escaped_endpoint = self._escape_sparql_string(fts_endpoint) + type_query = '' if type is not None: - type_query = self.type_query% type + # Escape the type URI to prevent SPARQL injection + escaped_type = self._escape_sparql_string(type) + type_query = self.type_query % escaped_type - query = self.query % (term, fts_endpoint, type_query) - #print(query) + query = self.query % (escaped_term, escaped_endpoint, type_query) + results = [] for hit in graph.query(query, initNs=prefixes): result = hit.asdict() @@ -105,6 +132,160 @@ def on_resolve(self, term, type=None, context=None, label=True): plugin_blueprint = PluginBlueprint('neptune', __name__) + +def neptune_driver(config): + """ + Create an AWS Neptune SPARQL-based RDF graph store with IAM authentication. + + Uses WhyisSPARQLUpdateStore with a custom requests session for AWS SigV4 auth. + + Configuration options (via Flask config with prefix like KNOWLEDGE_ or ADMIN_): + - _endpoint: Neptune SPARQL query/update endpoint (required) + - _gsp_endpoint: Graph Store Protocol endpoint (optional, defaults to _endpoint) + - _region: AWS region where Neptune instance is located (required) + - _service_name: AWS service name for signing (optional, default: 'neptune-db') + - _default_graph: Default graph URI (optional) + + Example configuration in system.conf: + KNOWLEDGE_ENDPOINT = 'https://my-neptune.cluster-xxx.us-east-1.neptune.amazonaws.com:8182/sparql' + KNOWLEDGE_REGION = 'us-east-1' + KNOWLEDGE_GSP_ENDPOINT = 'https://my-neptune.cluster-xxx.us-east-1.neptune.amazonaws.com:8182/data' + + Authentication: + Uses AWS credentials from the environment (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + or IAM roles. All requests are signed with SigV4, including full text search queries. + """ + from whyis.database.database_utils import node_to_sparql, WhyisSPARQLUpdateStore + from urllib.parse import urlparse + + defaultgraph = None + if "_default_graph" in config: + defaultgraph = URIRef(config["_default_graph"]) + + # Get AWS region (required for Neptune) + region_name = config.get("_region") + if not region_name: + raise ValueError("Neptune driver requires '_region' configuration parameter") + + service_name = config.get("_service_name", "neptune-db") + endpoint_url = config["_endpoint"] + + # Extract host from endpoint URL for AWS auth + parsed_url = urlparse(endpoint_url) + aws_host = parsed_url.netloc + + # Create AWS authentication using environment credentials + # Credentials will be automatically picked up from environment variables or ~/.aws/credentials + aws_access_key = os.environ.get('AWS_ACCESS_KEY_ID') + aws_secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY') + aws_session_token = os.environ.get('AWS_SESSION_TOKEN') + + if not aws_access_key or not aws_secret_key: + raise ValueError("Neptune driver requires AWS credentials (AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables)") + + auth = AWSRequestsAuth( + aws_access_key=aws_access_key, + aws_secret_access_key=aws_secret_key, + aws_host=aws_host, + aws_region=region_name, + aws_service=service_name, + aws_token=aws_session_token + ) + + # Create custom requests session with AWS auth + session = requests.Session() + session.auth = auth + + # Create store with standard WhyisSPARQLUpdateStore, passing custom session + store = WhyisSPARQLUpdateStore( + query_endpoint=endpoint_url, + update_endpoint=endpoint_url, + method="POST", + returnFormat='json', + node_to_sparql=node_to_sparql, + custom_requests=session # Pass custom session directly + ) + + store.query_endpoint = endpoint_url + store.gsp_endpoint = config.get("_gsp_endpoint", endpoint_url) + store.auth = None # Neptune uses AWS SigV4, not basic auth + + # Add GSP protocol methods with AWS authentication + store = _remote_sparql_store_protocol_with_aws(store, auth) + + graph = ConjunctiveGraph(store, defaultgraph) + return graph + +def _remote_sparql_store_protocol_with_aws(store, aws_auth): + """ + Add Graph Store Protocol (GSP) operations with AWS authentication. + + This is similar to _remote_sparql_store_protocol but uses AWS SigV4 auth + instead of basic auth. + + Args: + store: A SPARQL store object with gsp_endpoint attribute + aws_auth: AWSRequestsAuth object for request signing + + Returns: + The store object with GSP methods attached + """ + # Create a reusable session with AWS auth for all GSP operations + session = requests.Session() + session.auth = aws_auth + session.keep_alive = False + + def publish(data, format='text/trig;charset=utf-8'): + kwargs = dict( + headers={'Content-Type': format}, + ) + r = session.post(store.gsp_endpoint, data=data, **kwargs) + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} publish returned status {r.status_code}:\n{r.text}") + + def put(graph): + g = ConjunctiveGraph(store=graph.store) + data = g.serialize(format='turtle') + + kwargs = dict( + headers={'Content-Type': 'text/turtle;charset=utf-8'}, + ) + r = session.put(store.gsp_endpoint, + params=dict(graph=graph.identifier), + data=data, + **kwargs) + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} PUT returned status {r.status_code}:\n{r.text}") + else: + logger.debug(f"{r.text} {r.status_code}") + + def post(graph): + g = ConjunctiveGraph(store=graph.store) + data = g.serialize(format='trig') + + kwargs = dict( + headers={'Content-Type': 'text/trig;charset=utf-8'}, + ) + r = session.post(store.gsp_endpoint, data=data, **kwargs) + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} POST returned status {r.status_code}:\n{r.text}") + + def delete(c): + kwargs = dict() + r = session.delete(store.gsp_endpoint, + params=dict(graph=c), + **kwargs) + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} DELETE returned status {r.status_code}:\n{r.text}") + + store.publish = publish + store.put = put + store.post = post + store.delete = delete + + return store + + class NeptuneSearchPlugin(Plugin): resolvers = { @@ -115,7 +296,22 @@ def create_blueprint(self): return plugin_blueprint def init(self): + """ + Initialize the Neptune plugin. + + This registers the Neptune database driver and entity resolver. + """ + # Import and register the Neptune driver + from whyis.database.database_utils import driver, drivers + + # Register the Neptune driver + if 'neptune' not in drivers: + drivers['neptune'] = neptune_driver + + # Set up namespace NS.fts = rdflib.Namespace('http://aws.amazon.com/neptune/vocab/v01/services/fts#') + + # Set up entity resolver resolver_type = self.app.config.get('RESOLVER_TYPE', 'neptune') resolver_db = self.app.config.get('RESOLVER_DB', "knowledge") resolver = self.resolvers[resolver_type](resolver_db) From f3ce7e5f030e76c06ef1dbfe38fa1f8fa28aeeec Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Sun, 4 Jan 2026 15:38:50 -0500 Subject: [PATCH 4/9] Neptune GSP: Use UUID graph URIs instead of explicit default graph (#345) * Initial plan * Add temporary UUID graph functionality for Neptune GSP operations Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> * Complete Neptune GSP temporary UUID graph implementation with tests Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jpmccu <602385+jpmccu@users.noreply.github.com> --- tests/unit/test_neptune_plugin.py | 117 ++++++++++++++++++++++++++++++ whyis/plugins/neptune/README.md | 18 +++++ whyis/plugins/neptune/plugin.py | 112 ++++++++++++++++++++++++---- 3 files changed, 232 insertions(+), 15 deletions(-) diff --git a/tests/unit/test_neptune_plugin.py b/tests/unit/test_neptune_plugin.py index 59d5184bc..28f0cce9e 100644 --- a/tests/unit/test_neptune_plugin.py +++ b/tests/unit/test_neptune_plugin.py @@ -155,6 +155,123 @@ def test_gsp_operations_use_aws_auth(self, mock_requests_session): # Verify a session was created assert mock_requests_session.called + + @patch('whyis.plugins.neptune.plugin.os.environ', {'AWS_ACCESS_KEY_ID': 'test_key', 'AWS_SECRET_ACCESS_KEY': 'test_secret'}) + @patch('whyis.plugins.neptune.plugin.requests.Session') + @patch('whyis.plugins.neptune.plugin.uuid.uuid4') + def test_publish_uses_temp_graph_by_default(self, mock_uuid, mock_requests_session): + """Test that publish uses temporary UUID graph by default.""" + from whyis.plugins.neptune.plugin import neptune_driver + + # Mock UUID generation + test_uuid = 'test-uuid-1234' + mock_uuid.return_value = test_uuid + + # Mock requests session + mock_session_instance = Mock() + mock_response = Mock() + mock_response.ok = True + mock_session_instance.post.return_value = mock_response + mock_session_instance.delete.return_value = mock_response + mock_requests_session.return_value = mock_session_instance + + config = { + '_endpoint': 'https://neptune.example.com/sparql', + '_region': 'us-east-1' + } + + graph = neptune_driver(config) + + # Call publish + test_data = b' .' + graph.store.publish(test_data) + + # Verify POST was called with temporary graph parameter + assert mock_session_instance.post.called + post_call_args = mock_session_instance.post.call_args + assert post_call_args[1]['params']['graph'] == f'urn:uuid:{test_uuid}' + + # Verify DELETE was called to clean up temporary graph + assert mock_session_instance.delete.called + delete_call_args = mock_session_instance.delete.call_args + assert delete_call_args[1]['params']['graph'] == f'urn:uuid:{test_uuid}' + + @patch('whyis.plugins.neptune.plugin.os.environ', {'AWS_ACCESS_KEY_ID': 'test_key', 'AWS_SECRET_ACCESS_KEY': 'test_secret'}) + @patch('whyis.plugins.neptune.plugin.requests.Session') + def test_publish_without_temp_graph(self, mock_requests_session): + """Test that publish uses default graph when use_temp_graph=False.""" + from whyis.plugins.neptune.plugin import neptune_driver + + # Mock requests session + mock_session_instance = Mock() + mock_response = Mock() + mock_response.ok = True + mock_session_instance.post.return_value = mock_response + mock_session_instance.delete.return_value = mock_response + mock_requests_session.return_value = mock_session_instance + + config = { + '_endpoint': 'https://neptune.example.com/sparql', + '_region': 'us-east-1', + '_use_temp_graph': False + } + + graph = neptune_driver(config) + + # Call publish + test_data = b' .' + graph.store.publish(test_data) + + # Verify POST was called WITHOUT graph parameter + assert mock_session_instance.post.called + post_call_args = mock_session_instance.post.call_args + assert 'params' not in post_call_args[1] or post_call_args[1].get('params') is None + + # Verify DELETE was NOT called + assert not mock_session_instance.delete.called + + + @patch('whyis.plugins.neptune.plugin.os.environ', {'AWS_ACCESS_KEY_ID': 'test_key', 'AWS_SECRET_ACCESS_KEY': 'test_secret'}) + @patch('whyis.plugins.neptune.plugin.requests.Session') + @patch('whyis.plugins.neptune.plugin.uuid.uuid4') + def test_temp_graph_cleanup_on_error(self, mock_uuid, mock_requests_session): + """Test that temporary graph is still deleted even if POST fails.""" + from whyis.plugins.neptune.plugin import neptune_driver + + # Mock UUID generation + test_uuid = 'test-uuid-error' + mock_uuid.return_value = test_uuid + + # Mock requests session - POST fails but DELETE succeeds + mock_session_instance = Mock() + mock_post_response = Mock() + mock_post_response.ok = False + mock_post_response.status_code = 500 + mock_post_response.text = 'Internal Server Error' + mock_delete_response = Mock() + mock_delete_response.ok = True + mock_session_instance.post.return_value = mock_post_response + mock_session_instance.delete.return_value = mock_delete_response + mock_requests_session.return_value = mock_session_instance + + config = { + '_endpoint': 'https://neptune.example.com/sparql', + '_region': 'us-east-1' + } + + graph = neptune_driver(config) + + # Call publish (should fail but still clean up) + test_data = b' .' + graph.store.publish(test_data) + + # Verify POST was called + assert mock_session_instance.post.called + + # Verify DELETE was still called for cleanup despite POST failure + assert mock_session_instance.delete.called + delete_call_args = mock_session_instance.delete.call_args + assert delete_call_args[1]['params']['graph'] == f'urn:uuid:{test_uuid}' class TestNeptuneEntityResolver: diff --git a/whyis/plugins/neptune/README.md b/whyis/plugins/neptune/README.md index d2e07e435..b40818717 100644 --- a/whyis/plugins/neptune/README.md +++ b/whyis/plugins/neptune/README.md @@ -65,10 +65,15 @@ KNOWLEDGE_GSP_ENDPOINT = 'https://my-cluster.cluster-xxx.us-east-1.neptune.amazo # Optional: Default graph URI KNOWLEDGE_DEFAULT_GRAPH = 'http://example.org/default-graph' +# Optional: Use temporary UUID graphs for GSP operations (defaults to True) +# When True, ensures graph-aware semantics for RDF data with named graphs +KNOWLEDGE_USE_TEMP_GRAPH = True + # Neptune Full-Text Search endpoint neptune_fts_endpoint = 'https://search-my-domain.us-east-1.es.amazonaws.com' ``` + ### AWS Credentials The Neptune driver uses environment variables for AWS credential management. Credentials can be provided via: @@ -105,6 +110,19 @@ The Neptune plugin automatically registers a "neptune" database driver when init 3. Passes authentication to full-text search queries 4. Provides authenticated Graph Store Protocol operations +### Graph-Aware Semantics with Temporary UUID Graphs + +By default (when `KNOWLEDGE_USE_TEMP_GRAPH = True`), the Neptune driver ensures graph-aware semantics for all Graph Store Protocol (GSP) operations: + +- **Problem**: Without this feature, Neptune's GSP implementation inserts triples into an explicit default graph (using `?default` parameter), causing all RDF data to lose its graph structure even when using graph-aware formats like TriG. + +- **Solution**: The driver generates a temporary UUID-based graph URI (e.g., `urn:uuid:...`) for each GSP operation, posts/puts data to that temporary graph, and then deletes it. This ensures that: + - Named graphs from TriG data are preserved correctly + - Graph-aware RDF data maintains its structure + - Union semantics are properly applied instead of explicit default graph semantics + +- **Configuration**: Set `KNOWLEDGE_USE_TEMP_GRAPH = False` to disable this behavior and use legacy default graph semantics. + ### Request Signing All requests to Neptune are automatically signed with AWS SigV4: diff --git a/whyis/plugins/neptune/plugin.py b/whyis/plugins/neptune/plugin.py index 46247cda8..0f91c0b7a 100644 --- a/whyis/plugins/neptune/plugin.py +++ b/whyis/plugins/neptune/plugin.py @@ -8,6 +8,7 @@ import requests import logging import os +import uuid from aws_requests_auth.aws_auth import AWSRequestsAuth logger = logging.getLogger(__name__) @@ -145,11 +146,15 @@ def neptune_driver(config): - _region: AWS region where Neptune instance is located (required) - _service_name: AWS service name for signing (optional, default: 'neptune-db') - _default_graph: Default graph URI (optional) + - _use_temp_graph: Use temporary UUID graphs for GSP operations (optional, default: True) + When True, publish/put/post operations use a temporary UUID-based graph URI + to ensure graph-aware semantics instead of using the default graph. Example configuration in system.conf: KNOWLEDGE_ENDPOINT = 'https://my-neptune.cluster-xxx.us-east-1.neptune.amazonaws.com:8182/sparql' KNOWLEDGE_REGION = 'us-east-1' KNOWLEDGE_GSP_ENDPOINT = 'https://my-neptune.cluster-xxx.us-east-1.neptune.amazonaws.com:8182/data' + KNOWLEDGE_USE_TEMP_GRAPH = True # Default, ensures graph-aware semantics Authentication: Uses AWS credentials from the environment (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) @@ -170,6 +175,9 @@ def neptune_driver(config): service_name = config.get("_service_name", "neptune-db") endpoint_url = config["_endpoint"] + # Get temporary graph usage configuration (default: True) + use_temp_graph = config.get("_use_temp_graph", True) + # Extract host from endpoint URL for AWS auth parsed_url = urlparse(endpoint_url) aws_host = parsed_url.netloc @@ -211,21 +219,27 @@ def neptune_driver(config): store.auth = None # Neptune uses AWS SigV4, not basic auth # Add GSP protocol methods with AWS authentication - store = _remote_sparql_store_protocol_with_aws(store, auth) + store = _remote_sparql_store_protocol_with_aws(store, auth, use_temp_graph=use_temp_graph) graph = ConjunctiveGraph(store, defaultgraph) return graph -def _remote_sparql_store_protocol_with_aws(store, aws_auth): +def _remote_sparql_store_protocol_with_aws(store, aws_auth, use_temp_graph=True): """ Add Graph Store Protocol (GSP) operations with AWS authentication. This is similar to _remote_sparql_store_protocol but uses AWS SigV4 auth instead of basic auth. + When use_temp_graph is True (default), publish/put/post operations use a + temporary UUID-based graph URI to ensure graph-aware semantics. This prevents + triples from being inserted into an explicit default graph and instead maintains + the graph structure from the RDF data (e.g., TriG format). + Args: store: A SPARQL store object with gsp_endpoint attribute aws_auth: AWSRequestsAuth object for request signing + use_temp_graph: If True, use temporary UUID graphs for GSP operations (default: True) Returns: The store object with GSP methods attached @@ -239,9 +253,31 @@ def publish(data, format='text/trig;charset=utf-8'): kwargs = dict( headers={'Content-Type': format}, ) - r = session.post(store.gsp_endpoint, data=data, **kwargs) - if not r.ok: - logger.error(f"Error: {store.gsp_endpoint} publish returned status {r.status_code}:\n{r.text}") + + if use_temp_graph: + # Generate a temporary UUID-based graph URI + temp_graph_uri = f"urn:uuid:{uuid.uuid4()}" + + # POST to the temporary graph + r = session.post(store.gsp_endpoint, + params=dict(graph=temp_graph_uri), + data=data, + **kwargs) + + # Always delete the temporary graph to clean up, even if POST failed + delete_r = session.delete(store.gsp_endpoint, + params=dict(graph=temp_graph_uri)) + if not delete_r.ok: + logger.warning(f"Warning: Failed to delete temporary graph {temp_graph_uri}: {delete_r.status_code}:\n{delete_r.text}") + + # Log error if POST failed + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} publish returned status {r.status_code}:\n{r.text}") + else: + # Legacy behavior: POST without graph parameter + r = session.post(store.gsp_endpoint, data=data, **kwargs) + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} publish returned status {r.status_code}:\n{r.text}") def put(graph): g = ConjunctiveGraph(store=graph.store) @@ -250,14 +286,38 @@ def put(graph): kwargs = dict( headers={'Content-Type': 'text/turtle;charset=utf-8'}, ) - r = session.put(store.gsp_endpoint, - params=dict(graph=graph.identifier), - data=data, - **kwargs) - if not r.ok: - logger.error(f"Error: {store.gsp_endpoint} PUT returned status {r.status_code}:\n{r.text}") + + if use_temp_graph: + # Generate a temporary UUID-based graph URI + temp_graph_uri = f"urn:uuid:{uuid.uuid4()}" + + # PUT to the temporary graph + r = session.put(store.gsp_endpoint, + params=dict(graph=temp_graph_uri), + data=data, + **kwargs) + + # Always delete the temporary graph to clean up, even if PUT failed + delete_r = session.delete(store.gsp_endpoint, + params=dict(graph=temp_graph_uri)) + if not delete_r.ok: + logger.warning(f"Warning: Failed to delete temporary graph {temp_graph_uri}: {delete_r.status_code}:\n{delete_r.text}") + + # Log result + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} PUT returned status {r.status_code}:\n{r.text}") + else: + logger.debug(f"{r.text} {r.status_code}") else: - logger.debug(f"{r.text} {r.status_code}") + # Legacy behavior: PUT with specified graph identifier + r = session.put(store.gsp_endpoint, + params=dict(graph=graph.identifier), + data=data, + **kwargs) + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} PUT returned status {r.status_code}:\n{r.text}") + else: + logger.debug(f"{r.text} {r.status_code}") def post(graph): g = ConjunctiveGraph(store=graph.store) @@ -266,9 +326,31 @@ def post(graph): kwargs = dict( headers={'Content-Type': 'text/trig;charset=utf-8'}, ) - r = session.post(store.gsp_endpoint, data=data, **kwargs) - if not r.ok: - logger.error(f"Error: {store.gsp_endpoint} POST returned status {r.status_code}:\n{r.text}") + + if use_temp_graph: + # Generate a temporary UUID-based graph URI + temp_graph_uri = f"urn:uuid:{uuid.uuid4()}" + + # POST to the temporary graph + r = session.post(store.gsp_endpoint, + params=dict(graph=temp_graph_uri), + data=data, + **kwargs) + + # Always delete the temporary graph to clean up, even if POST failed + delete_r = session.delete(store.gsp_endpoint, + params=dict(graph=temp_graph_uri)) + if not delete_r.ok: + logger.warning(f"Warning: Failed to delete temporary graph {temp_graph_uri}: {delete_r.status_code}:\n{delete_r.text}") + + # Log error if POST failed + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} POST returned status {r.status_code}:\n{r.text}") + else: + # Legacy behavior: POST without graph parameter + r = session.post(store.gsp_endpoint, data=data, **kwargs) + if not r.ok: + logger.error(f"Error: {store.gsp_endpoint} POST returned status {r.status_code}:\n{r.text}") def delete(c): kwargs = dict() From 3e406b5221781c923ed63a434973a81c36238e65 Mon Sep 17 00:00:00 2001 From: Jamie McCusker Date: Sun, 4 Jan 2026 16:21:08 -0500 Subject: [PATCH 5/9] update UI and search --- setup.py | 3 ++- whyis/_version.py | 2 +- whyis/database/database_utils.py | 13 ++++++++----- whyis/plugins/neptune/plugin.py | 3 ++- whyis/plugins/neptune/templates/search.json | 18 ++++++++---------- whyis/static/js/whyis_vue/components/album.vue | 6 +++--- .../static/js/whyis_vue/components/kgcard.vue | 8 +++++--- 7 files changed, 29 insertions(+), 24 deletions(-) diff --git a/setup.py b/setup.py index 45ea47f3c..d147a31eb 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import os from distutils.core import setup +from setuptools import find_packages import distutils.command.build import distutils.command.sdist import subprocess @@ -135,7 +136,7 @@ def run(self): license = "Apache License 2.0", keywords = "rdf semantic knowledge graph", url = "http://tetherless-world.github.io/whyis", - packages=['whyis'], + packages=find_packages(), long_description='''Whyis is a nano-scale knowledge graph publishing, management, and analysis framework. Whyis aims to support domain-aware management and curation of knowledge from many different sources. Its primary goal is to enable diff --git a/whyis/_version.py b/whyis/_version.py index 8d6a26db9..c556f84b4 100644 --- a/whyis/_version.py +++ b/whyis/_version.py @@ -1,4 +1,4 @@ -__version__='2.4.0b1' +__version__='2.4.0b8' if __name__ == '__main__': print(__version__) diff --git a/whyis/database/database_utils.py b/whyis/database/database_utils.py index a36573558..0d326deca 100644 --- a/whyis/database/database_utils.py +++ b/whyis/database/database_utils.py @@ -93,7 +93,7 @@ def _remote_sparql_store_protocol(store): Returns: The store object with GSP methods attached """ - def publish(data, format='text/trig;charset=utf-8'): + def publish(data, format='application/trig'): s = requests.session() s.keep_alive = False @@ -102,7 +102,10 @@ def publish(data, format='text/trig;charset=utf-8'): ) if store.auth is not None: kwargs['auth'] = store.auth - r = s.post(store.gsp_endpoint, data=data, **kwargs) + r = s.post(store.gsp_endpoint, + params=dict(default='true'), + data=data, + **kwargs) if not r.ok: print(f"Error: {store.gsp_endpoint} publish returned status {r.status_code}:\n{r.text}") @@ -114,7 +117,7 @@ def put(graph): s.keep_alive = False kwargs = dict( - headers={'Content-Type':'text/turtle;charset=utf-8'}, + headers={'Content-Type':'text/turtle'}, ) if store.auth is not None: kwargs['auth'] = store.auth @@ -134,11 +137,11 @@ def post(graph): s.keep_alive = False kwargs = dict( - headers={'Content-Type':'text/trig;charset=utf-8'}, + headers={'Content-Type':'application/trig'}, ) if store.auth is not None: kwargs['auth'] = store.auth - r = s.post(store.gsp_endpoint, data=data, **kwargs) + r = s.post(store.gsp_endpoint, params=dict(default="true"), data=data, **kwargs) if not r.ok: print(f"Error: {store.gsp_endpoint} POST returned status {r.status_code}:\n{r.text}") diff --git a/whyis/plugins/neptune/plugin.py b/whyis/plugins/neptune/plugin.py index 2b5edbb6d..fd8ff5428 100644 --- a/whyis/plugins/neptune/plugin.py +++ b/whyis/plugins/neptune/plugin.py @@ -1,4 +1,5 @@ from whyis.plugin import Plugin, EntityResolverListener +from whyis.namespace import NS import rdflib from flask import current_app from flask_pluginengine import PluginBlueprint, current_plugin @@ -79,7 +80,7 @@ def __init__(self, database="knowledge"): def on_resolve(self, term, type=None, context=None, label=True): print(f'Searching {self.database} for {term}') graph = current_app.databases[self.database] - fts_endpoint = current_app.config['neptune_fts_endpoint'] + fts_endpoint = current_app.config['NEPTUNE_FTS_ENDPOINT'] #context_query = '' #if context is not None: # context_query = self.context_query % context diff --git a/whyis/plugins/neptune/templates/search.json b/whyis/plugins/neptune/templates/search.json index 6cc3413f9..6164410f5 100644 --- a/whyis/plugins/neptune/templates/search.json +++ b/whyis/plugins/neptune/templates/search.json @@ -1,22 +1,20 @@ -{{""" +{{" SELECT ?identifier (sample(?d) as ?description) (0.9 as ?score) WHERE { SERVICE fts:search { - fts:config neptune-fts:query ?query . - fts:config neptune-fts:endpoint ?endpoint . - fts:config neptune-fts:queryType 'match' . - fts:config neptune-fts:field '*' . - fts:config neptune-fts:return ?identifier . + fts:config fts:query '''"+args['query']+"''' . + fts:config fts:endpoint '"+app.config.get('NEPTUNE_FTS_ENDPOINT')+"' . + fts:config fts:queryType 'match' . + fts:config fts:field '*' . + fts:config fts:return ?identifier . } - (?o ?s) text:search ?query . - filter(lang(?d) = "" || langMatches(lang(?o), "en")) ?identifier ?p ?o . filter(!isBlank(?identifier)) OPTIONAL { ?identifier dc:description|skos:definition|rdfs:comment|sioc:content|dc:abstract|dc:summary|rdfs:comment|dcelements:description||prov:value|sio:hasValue| ?d. - filter(lang(?d) = "" || langMatches(lang(?d), "en")) + filter(lang(?d) = '' || langMatches(lang(?d), 'en')) } } group by ?identifier - LIMIT 1000""" | query(values={"query":rdflib.Literal(args['query'])}) | iter_labelize("identifier","label") | tojson }} + LIMIT 1000" | query | iter_labelize("identifier","label") | tojson }} diff --git a/whyis/static/js/whyis_vue/components/album.vue b/whyis/static/js/whyis_vue/components/album.vue index 556c22ce0..8272c63cb 100644 --- a/whyis/static/js/whyis_vue/components/album.vue +++ b/whyis/static/js/whyis_vue/components/album.vue @@ -1,9 +1,9 @@