diff --git a/bin/owlapi-wrapper-1.4.1.jar b/bin/owlapi-wrapper-1.4.1.jar new file mode 100644 index 00000000..a834d38b Binary files /dev/null and b/bin/owlapi-wrapper-1.4.1.jar differ diff --git a/lib/ontologies_linked_data.rb b/lib/ontologies_linked_data.rb index 678e0e89..6c3b2c9b 100644 --- a/lib/ontologies_linked_data.rb +++ b/lib/ontologies_linked_data.rb @@ -7,6 +7,13 @@ # Setup Goo (repo connection and namespaces) require "ontologies_linked_data/config/config" +project_root = File.dirname(File.absolute_path(__FILE__)) + +models = Dir.glob("#{project_root}/ontologies_linked_data/concerns/**/*.rb").sort +models.each do |m| + require m +end + # Include other dependent code require "ontologies_linked_data/security/authorization" require "ontologies_linked_data/security/access_control" @@ -33,7 +40,6 @@ require "ontologies_linked_data/models/base" # Require all models -project_root = File.dirname(File.absolute_path(__FILE__)) # We need to require deterministic - that is why we have the sort. models = Dir.glob(project_root + '/ontologies_linked_data/models/**/*.rb').sort diff --git a/lib/ontologies_linked_data/concerns/mappings/mapping_bulk_load.rb b/lib/ontologies_linked_data/concerns/mappings/mapping_bulk_load.rb new file mode 100644 index 00000000..373cc7d9 --- /dev/null +++ b/lib/ontologies_linked_data/concerns/mappings/mapping_bulk_load.rb @@ -0,0 +1,32 @@ +module LinkedData + module Concerns + module Mappings + module BulkLoad + # A method to easily add a new mapping without using ontologies_api + # Where the mapping hash contain classes, relation, creator and comment) + + def bulk_load_mappings(mappings_hash, user_creator, check_exist: true) + errors = {} + loaded = [] + mappings_hash&.each_with_index do |mapping, index| + loaded << load_mapping(mapping, user_creator, check_exist: check_exist) + rescue ArgumentError => e + errors[index] = e.message + end + [loaded, errors] + end + + def load_mapping(mapping_hash, user_creator, check_exist: true) + LinkedData::Mappings.create_mapping(mapping_hash: mapping_hash, user_creator: user_creator, + check_exist: check_exist) + end + + end + end + end +end + + + + + diff --git a/lib/ontologies_linked_data/concerns/mappings/mapping_creator.rb b/lib/ontologies_linked_data/concerns/mappings/mapping_creator.rb new file mode 100644 index 00000000..a1f46f07 --- /dev/null +++ b/lib/ontologies_linked_data/concerns/mappings/mapping_creator.rb @@ -0,0 +1,171 @@ +module LinkedData + module Concerns + module Mappings + module Creator + + def create_mapping(mapping_hash:, user_creator:, check_exist: false) + object_class, object_submission, + subject_class, subject_submission = get_mapping_classes(subject_id: mapping_hash[:subject_source_id], + object_id: mapping_hash[:object_source_id], + classes: mapping_hash[:classes]) + + process = create_mapping_process(mapping_hash, subject_submission&.uri, object_submission&.uri, user_creator) + classes = [subject_class, object_class] + + if check_exist && LinkedData::Mappings.check_mapping_exist(classes, process.relation) + raise ArgumentError, 'Mapping already exists' + end + + save_process(process) + save_rest_mapping(classes, process) + end + + def create_rest_mapping(classes, process) + begin + backup_mapping = LinkedData::Models::RestBackupMapping.new + backup_mapping.uuid = UUID.new.generate + backup_mapping.process = process + class_urns = generate_class_urns(classes) + backup_mapping.class_urns = class_urns + # Insert backup into 4store + + raise StandardError, backup_mapping.errors unless backup_mapping.valid? + + backup_mapping.save + + rescue StandardError => e + raise IOError, "Saving backup mapping has failed. Message: #{e.message.to_s}" + end + + #second add the mapping id to current submission graphs + rest_predicate = mapping_predicates()['REST'][0] + begin + classes.each do |c| + sub = c.submission + unless sub.id.to_s['latest'].nil? + #the submission in the class might point to latest + sub = LinkedData::Models::Ontology.find(c.submission.ontology.id).first.latest_submission + end + c_id = c.id + graph_id = sub.id + graph_insert = RDF::Graph.new + graph_insert << [c_id, RDF::URI.new(rest_predicate), backup_mapping.id] + Goo.sparql_update_client.insert_data(graph_insert, graph: graph_id) + end + rescue StandardError => e + # Remove the created backup if the following steps of the mapping fail + backup_mapping.delete + raise StandardError, "Inserting the mapping ID in the submission graphs has failed. Message: #{e.message.to_s}" + end + + LinkedData::Models::Mapping.new(classes, 'REST', process, backup_mapping.id) + end + + def create_mapping_process(mapping_process_hash, source_uri, object_uri, user) + process = LinkedData::Models::MappingProcess.new + relations_array = Array(mapping_process_hash[:relation]).map { |r| RDF::URI.new(r) } + process.relation = relations_array.first + process.creator = user + process.subject_source_id = RDF::URI.new(source_uri || mapping_process_hash[:subject_source_id]) + process.object_source_id = RDF::URI.new(object_uri || mapping_process_hash[:object_source_id]) + process.date = mapping_process_hash[:date] ? DateTime.parse(mapping_process_hash[:date]) : DateTime.now + process_fields = %i[source source_name comment name source_contact_info] + process_fields.each do |att| + process.send("#{att}=", mapping_process_hash[att]) if mapping_process_hash[att] + end + process + end + + private + + def save_rest_mapping(classes, process) + LinkedData::Mappings.create_rest_mapping(classes, process) + rescue StandardError => e + # Remove the created process if the following steps of the mapping fail + process.delete + raise ArgumentError, "Loading mapping has failed. Message: #{e.message.to_s}" + end + + def save_process(process) + process.save + rescue StandardError => e + raise ArgumentError, "Loading mapping has failed. Message: #{e.message.to_s} : #{process.errors}" + end + + def get_mapping_classes(classes:, subject_id:, object_id:) + subject_submission = find_submission_by_ontology_id(subject_id) + subject_class, subject_submission = find_class(classes.first, subject_submission) + + object_submission = find_submission_by_ontology_id(object_id) + object_class, object_submission = find_class(classes.last, object_submission) + + [object_class, object_submission, subject_class, subject_submission] + end + + # Generate URNs for class mapping (urn:ONT_ACRO:CLASS_URI) + def generate_class_urns(classes) + class_urns = [] + classes.each do |c| + next if c.nil? + + if c.instance_of? LinkedData::Models::Class + acronym = c.submission.id.to_s.split('/')[-3] + class_urns << RDF::URI.new(LinkedData::Models::Class.urn_id(acronym, c.id.to_s)) + else + class_urns << RDF::URI.new(c.urn_id()) + end + end + class_urns + end + + def find_submission_by_ontology_id(ontology_id) + return nil if ontology_id.nil? + + o = LinkedData::Models::Ontology.where(submissions: { uri: ontology_id }) + .include(submissions: %i[submissionId submissionStatus uri]) + .first + o.nil? ? nil : o.latest_submission + end + + def find_ontology_by_class(class_instance) + class_instance.submission.bring :ontology + class_instance.submission.ontology + end + + def find_submission_by_class_id(class_id) + params = { + require_exact_match: true, + defType: 'edismax', + qf: 'resource_id' + } + query = class_id + search_response = LinkedData::Models::Class.search(query, params) + search_response = search_response['response']['docs'] + search_response.each do |resp| + submission_id = resp['ontologyId'] + class_instance = LinkedData::Models::OntologySubmission.find(RDF::URI.new(submission_id)).include(:uri).first + return class_instance unless class_instance.nil? + end + nil + end + + def find_class(class_id, submission) + submission = find_submission_by_class_id(class_id) if submission.nil? + c = nil + unless submission.nil? + c = LinkedData::Models::Class.find(RDF::URI.new(class_id)) + .in(submission) + .first + if c + c.submission.bring :ontology if c.submission.bring?(:ontology) + c.submission.ontology.bring :acronym if c.submission.ontology.bring?(:acronym) + end + + end + [c, submission] + end + end + end + end +end + diff --git a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb new file mode 100644 index 00000000..d8bf8019 --- /dev/null +++ b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb @@ -0,0 +1,39 @@ +module LinkedData + module Concerns + module OntologySubmission + module MetadataExtractor + + def extract_metadata + version_info = extract_version + ontology_iri = extract_ontology_iri + + self.version = version_info if version_info + self.uri = ontology_iri if ontology_iri + + end + + def extract_version + + query = Goo.sparql_query_client.select(:versionInfo).distinct + .from(self.id) + .where([RDF::URI.new('http://bioportal.bioontology.org/ontologies/versionSubject'), + RDF::URI.new('http://www.w3.org/2002/07/owl#versionInfo'), + :versionInfo]) + + sol = query.each_solution.first || {} + sol[:versionInfo]&.to_s + end + + def extract_ontology_iri + query = Goo.sparql_query_client.select(:uri).distinct + .from(self.id) + .where([:uri, + RDF::URI.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), + RDF::URI.new('http://www.w3.org/2002/07/owl#Ontology')]) + sol = query.each_solution.first || {} + sol[:uri]&.to_s + end + end + end + end +end diff --git a/lib/ontologies_linked_data/mappings/mappings.rb b/lib/ontologies_linked_data/mappings/mappings.rb index 33272848..34535aae 100644 --- a/lib/ontologies_linked_data/mappings/mappings.rb +++ b/lib/ontologies_linked_data/mappings/mappings.rb @@ -2,73 +2,78 @@ require 'tmpdir' module LinkedData -module Mappings - OUTSTANDING_LIMIT = 30 - - def self.mapping_predicates() - predicates = {} - predicates["CUI"] = ["http://bioportal.bioontology.org/ontologies/umls/cui"] - predicates["SAME_URI"] = - ["http://data.bioontology.org/metadata/def/mappingSameURI"] - predicates["LOOM"] = - ["http://data.bioontology.org/metadata/def/mappingLoom"] - predicates["REST"] = - ["http://data.bioontology.org/metadata/def/mappingRest"] - return predicates - end - - def self.handle_triple_store_downtime(logger=nil) - epr = Goo.sparql_query_client(:main) - status = epr.status + module Mappings + OUTSTANDING_LIMIT = 30 + + extend LinkedData::Concerns::Mappings::Creator + extend LinkedData::Concerns::Mappings::BulkLoad + + def self.mapping_predicates() + predicates = {} + predicates['CUI'] = ['http://bioportal.bioontology.org/ontologies/umls/cui'] + predicates['SAME_URI'] = + ['http://data.bioontology.org/metadata/def/mappingSameURI'] + predicates['LOOM'] = + ['http://data.bioontology.org/metadata/def/mappingLoom'] + predicates['REST'] = + ['http://data.bioontology.org/metadata/def/mappingRest'] + return predicates + end + + def self.handle_triple_store_downtime(logger = nil) + epr = Goo.sparql_query_client(:main) + status = epr.status + + if status[:exception] + logger.info(status[:exception]) if logger + exit(1) + end - if status[:exception] - logger.info(status[:exception]) if logger - exit(1) + if status[:outstanding] > OUTSTANDING_LIMIT + if logger + logger.info("The triple store number of outstanding queries exceeded #{OUTSTANDING_LIMIT}. Exiting...") + end + exit(1) + end end - if status[:outstanding] > OUTSTANDING_LIMIT - logger.info("The triple store number of outstanding queries exceeded #{OUTSTANDING_LIMIT}. Exiting...") if logger - exit(1) - end - end + def self.mapping_counts(enable_debug = false, logger = nil, reload_cache = false, arr_acronyms = []) + logger = nil unless enable_debug + t = Time.now + latest = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms }) + counts = {} + i = 0 + epr = Goo.sparql_query_client(:main) + + latest.each do |acro, sub| + self.handle_triple_store_downtime(logger) if LinkedData.settings.goo_backend_name === '4store' + t0 = Time.now + s_counts = self.mapping_ontologies_count(sub, nil, reload_cache = reload_cache) + s_total = 0 + + s_counts.each do |k, v| + s_total += v + end + counts[acro] = s_total + i += 1 - def self.mapping_counts(enable_debug=false, logger=nil, reload_cache=false, arr_acronyms=[]) - logger = nil unless enable_debug - t = Time.now - latest = self.retrieve_latest_submissions(options={acronyms:arr_acronyms}) - counts = {} - i = 0 - epr = Goo.sparql_query_client(:main) - - latest.each do |acro, sub| - self.handle_triple_store_downtime(logger) if LinkedData.settings.goo_backend_name === '4store' - t0 = Time.now - s_counts = self.mapping_ontologies_count(sub, nil, reload_cache=reload_cache) - s_total = 0 - - s_counts.each do |k,v| - s_total += v + if enable_debug + logger.info("#{i}/#{latest.count} " + + "Retrieved #{s_total} records for #{acro} in #{Time.now - t0} seconds.") + logger.flush + end + sleep(5) end - counts[acro] = s_total - i += 1 if enable_debug - logger.info("#{i}/#{latest.count} " + - "Retrieved #{s_total} records for #{acro} in #{Time.now - t0} seconds.") + logger.info("Total time #{Time.now - t} sec.") logger.flush end - sleep(5) + return counts end - if enable_debug - logger.info("Total time #{Time.now - t} sec.") - logger.flush - end - return counts - end - - def self.mapping_ontologies_count(sub1, sub2, reload_cache=false) - template = <<-eos + def self.mapping_ontologies_count(sub1, sub2, reload_cache = false) + template = <<-eos { GRAPH <#{sub1.id.to_s}> { ?s1 ?o . @@ -77,76 +82,73 @@ def self.mapping_ontologies_count(sub1, sub2, reload_cache=false) ?s2 ?o . } } -eos - group_count = sub2.nil? ? {} : nil - count = 0 - latest_sub_ids = self.retrieve_latest_submission_ids - epr = Goo.sparql_query_client(:main) - - mapping_predicates().each do |_source, mapping_predicate| - block = template.gsub("predicate", mapping_predicate[0]) - query_template = <<-eos + eos + group_count = sub2.nil? ? {} : nil + count = 0 + latest_sub_ids = self.retrieve_latest_submission_ids + epr = Goo.sparql_query_client(:main) + + mapping_predicates().each do |_source, mapping_predicate| + block = template.gsub('predicate', mapping_predicate[0]) + query_template = <<-eos SELECT variables WHERE { block filter } group - eos - query = query_template.sub("block", block) - filter = _source == "SAME_URI" ? '' : 'FILTER (?s1 != ?s2)' - - if sub2.nil? - ont_id = sub1.id.to_s.split("/")[0..-3].join("/") - #STRSTARTS is used to not count older graphs - filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}'))" - query = query.sub("graph","?g") - query = query.sub("filter",filter) - query = query.sub("variables","?g (count(?s1) as ?c)") - query = query.sub("group","GROUP BY ?g") - else - query = query.sub("graph","<#{sub2.id.to_s}>") - query = query.sub("filter",filter) - query = query.sub("variables","(count(?s1) as ?c)") - query = query.sub("group","") - end - graphs = [sub1.id, LinkedData::Models::MappingProcess.type_uri] - graphs << sub2.id unless sub2.nil? + eos + query = query_template.sub('block', block) + filter = _source == 'SAME_URI' ? '' : 'FILTER (?s1 != ?s2)' + + if sub2.nil? + ont_id = sub1.id.to_s.split('/')[0..-3].join('/') + #STRSTARTS is used to not count older graphs + filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}'))" + query = query.sub('graph', '?g') + query = query.sub('filter', filter) + query = query.sub('variables', '?g (count(?s1) as ?c)') + query = query.sub('group', 'GROUP BY ?g') + else + query = query.sub('graph', "<#{sub2.id.to_s}>") + query = query.sub('filter', filter) + query = query.sub('variables', '(count(?s1) as ?c)') + query = query.sub('group', '') + end + graphs = [sub1.id, LinkedData::Models::MappingProcess.type_uri] + graphs << sub2.id unless sub2.nil? - if sub2.nil? - solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) + if sub2.nil? + solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) - solutions.each do |sol| - acr = sol[:g].to_s.split("/")[-3] - next unless latest_sub_ids[acr] == sol[:g].to_s + solutions.each do |sol| + acr = sol[:g].to_s.split('/')[-3] + next unless latest_sub_ids[acr] == sol[:g].to_s - if group_count[acr].nil? - group_count[acr] = 0 + group_count[acr] = 0 if group_count[acr].nil? + group_count[acr] += sol[:c].object + end + else + solutions = epr.query(query, + graphs: graphs) + solutions.each do |sol| + count += sol[:c].object end - group_count[acr] += sol[:c].object - end - else - solutions = epr.query(query, - graphs: graphs ) - solutions.each do |sol| - count += sol[:c].object end - end - end #per predicate query + end #per predicate query + + return group_count if sub2.nil? - if sub2.nil? - return group_count + return count end - return count - end - def self.empty_page(page,size) - p = Goo::Base::Page.new(page,size,nil,[]) + def self.empty_page(page, size) + p = Goo::Base::Page.new(page, size, nil, []) p.aggregate = 0 return p - end + end - def self.mappings_ontologies(sub1,sub2,page,size,classId=nil,reload_cache=false) - union_template = <<-eos + def self.mappings_ontologies(sub1, sub2, page, size, classId = nil, reload_cache = false) + union_template = <<-eos { GRAPH <#{sub1.id.to_s}> { classId ?o . @@ -156,220 +158,213 @@ def self.mappings_ontologies(sub1,sub2,page,size,classId=nil,reload_cache=false) } bind } -eos - blocks = [] - mappings = [] - persistent_count = 0 - acr1 = sub1.id.to_s.split("/")[-3] - - if classId.nil? - acr2 = nil - acr2 = sub2.id.to_s.split("/")[-3] unless sub2.nil? - pcount = LinkedData::Models::MappingCount.where(ontologies: acr1) - pcount = pcount.and(ontologies: acr2) unless acr2.nil? - f = Goo::Filter.new(:pair_count) == (not acr2.nil?) - pcount = pcount.filter(f) - pcount = pcount.include(:count) - pcount_arr = pcount.all - persistent_count = pcount_arr.length == 0 ? 0 : pcount_arr.first.count - - return LinkedData::Mappings.empty_page(page,size) if persistent_count == 0 - end - - if classId.nil? - union_template = union_template.gsub("classId", "?s1") - else - union_template = union_template.gsub("classId", "<#{classId.to_s}>") - end - # latest_sub_ids = self.retrieve_latest_submission_ids + eos + blocks = [] + mappings = [] + persistent_count = 0 + acr1 = sub1.id.to_s.split('/')[-3] - mapping_predicates().each do |_source,mapping_predicate| - union_block = union_template.gsub("predicate", mapping_predicate[0]) - union_block = union_block.gsub("bind","BIND ('#{_source}' AS ?source)") + if classId.nil? + acr2 = nil + acr2 = sub2.id.to_s.split('/')[-3] unless sub2.nil? + pcount = LinkedData::Models::MappingCount.where(ontologies: acr1) + pcount = pcount.and(ontologies: acr2) unless acr2.nil? + f = Goo::Filter.new(:pair_count) == (not acr2.nil?) + pcount = pcount.filter(f) + pcount = pcount.include(:count) + pcount_arr = pcount.all + persistent_count = pcount_arr.length == 0 ? 0 : pcount_arr.first.count + + return LinkedData::Mappings.empty_page(page, size) if persistent_count == 0 + end - if sub2.nil? - union_block = union_block.gsub("graph","?g") + if classId.nil? + union_template = union_template.gsub('classId', '?s1') else - union_block = union_block.gsub("graph","<#{sub2.id.to_s}>") + union_template = union_template.gsub('classId', "<#{classId.to_s}>") end - blocks << union_block - end - unions = blocks.join("\nUNION\n") + # latest_sub_ids = self.retrieve_latest_submission_ids + + mapping_predicates().each do |_source, mapping_predicate| + union_block = union_template.gsub('predicate', mapping_predicate[0]) + union_block = union_block.gsub('bind', "BIND ('#{_source}' AS ?source)") - mappings_in_ontology = <<-eos + if sub2.nil? + union_block = union_block.gsub('graph', '?g') + else + union_block = union_block.gsub('graph', "<#{sub2.id.to_s}>") + end + blocks << union_block + end + unions = blocks.join("\nUNION\n") + + mappings_in_ontology = <<-eos SELECT DISTINCT variables WHERE { unions filter } page_group -eos - query = mappings_in_ontology.gsub("unions", unions) - variables = "?s2 graph ?source ?o" - variables = "?s1 " + variables if classId.nil? - query = query.gsub("variables", variables) - filter = classId.nil? ? "FILTER ((?s1 != ?s2) || (?source = 'SAME_URI'))" : '' - - if sub2.nil? - query = query.gsub("graph","?g") - ont_id = sub1.id.to_s.split("/")[0..-3].join("/") - - # latest_sub_filter_arr = latest_sub_ids.map { |_, id| "?g = <#{id}>" } - # filter += "\nFILTER (#{latest_sub_filter_arr.join(' || ')}) " - - #STRSTARTS is used to not count older graphs - #no need since now we delete older graphs - filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}'))" - else - query = query.gsub("graph", "") - end - query = query.gsub("filter", filter) - - if size > 0 - pagination = "OFFSET offset LIMIT limit" - query = query.gsub("page_group",pagination) - limit = size - offset = (page-1) * size - query = query.gsub("limit", "#{limit}").gsub("offset", "#{offset}") - else - query = query.gsub("page_group","") - end - epr = Goo.sparql_query_client(:main) - graphs = [sub1.id] - unless sub2.nil? - graphs << sub2.id - end - solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) - s1 = nil - unless classId.nil? - s1 = RDF::URI.new(classId.to_s) - end - solutions.each do |sol| - graph2 = nil + eos + query = mappings_in_ontology.gsub('unions', unions) + variables = '?s2 graph ?source ?o' + variables = '?s1 ' + variables if classId.nil? + query = query.gsub('variables', variables) + filter = classId.nil? ? "FILTER ((?s1 != ?s2) || (?source = 'SAME_URI'))" : '' + if sub2.nil? - graph2 = sol[:g] + query = query.gsub('graph', '?g') + ont_id = sub1.id.to_s.split('/')[0..-3].join('/') + + # latest_sub_filter_arr = latest_sub_ids.map { |_, id| "?g = <#{id}>" } + # filter += "\nFILTER (#{latest_sub_filter_arr.join(' || ')}) " + + #STRSTARTS is used to not count older graphs + #no need since now we delete older graphs + filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}'))" else - graph2 = sub2.id + query = query.gsub('graph', '') end - if classId.nil? - s1 = sol[:s1] + query = query.gsub('filter', filter) + + if size > 0 + pagination = 'OFFSET offset LIMIT limit' + query = query.gsub('page_group', pagination) + limit = size + offset = (page - 1) * size + query = query.gsub('limit', "#{limit}").gsub('offset', "#{offset}") + else + query = query.gsub('page_group', '') end - classes = [ read_only_class(s1.to_s,sub1.id.to_s), - read_only_class(sol[:s2].to_s,graph2.to_s) ] + epr = Goo.sparql_query_client(:main) + graphs = [sub1.id] + graphs << sub2.id unless sub2.nil? + solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) + s1 = nil + s1 = RDF::URI.new(classId.to_s) unless classId.nil? + solutions.each do |sol| + graph2 = nil + if sub2.nil? + graph2 = sol[:g] + else + graph2 = sub2.id + end + s1 = sol[:s1] if classId.nil? + classes = [read_only_class(s1.to_s, sub1.id.to_s), + read_only_class(sol[:s2].to_s, graph2.to_s)] + + backup_mapping = nil + mapping = nil + if sol[:source].to_s == 'REST' + backup_mapping = LinkedData::Models::RestBackupMapping + .find(sol[:o]).include(:process).first + backup_mapping.process.bring_remaining + end - backup_mapping = nil - mapping = nil - if sol[:source].to_s == "REST" - backup_mapping = LinkedData::Models::RestBackupMapping - .find(sol[:o]).include(:process).first - backup_mapping.process.bring_remaining - end - if backup_mapping.nil? - mapping = LinkedData::Models::Mapping.new( - classes,sol[:source].to_s) - else - mapping = LinkedData::Models::Mapping.new( - classes,sol[:source].to_s, - backup_mapping.process,backup_mapping.id) + mapping = if backup_mapping.nil? + LinkedData::Models::Mapping.new(classes, sol[:source].to_s) + else + LinkedData::Models::Mapping.new( + classes, sol[:source].to_s, + backup_mapping.process, backup_mapping.id) + end + + mappings << mapping end - mappings << mapping - end - if size == 0 - return mappings + return mappings if size == 0 + + page = Goo::Base::Page.new(page, size, nil, mappings) + page.aggregate = persistent_count + return page end - page = Goo::Base::Page.new(page,size,nil,mappings) - page.aggregate = persistent_count - return page - end - def self.mappings_ontology(sub,page,size,classId=nil,reload_cache=false) - return self.mappings_ontologies(sub,nil,page,size,classId=classId, - reload_cache=reload_cache) - end + def self.mappings_ontology(sub, page, size, classId = nil, reload_cache = false) + return self.mappings_ontologies(sub, nil, page, size, classId = classId, + reload_cache = reload_cache) + end - def self.read_only_class(classId,submissionId) + def self.read_only_class(classId, submissionId) ontologyId = submissionId acronym = nil - unless submissionId["submissions"].nil? - ontologyId = submissionId.split("/")[0..-3] + unless submissionId['submissions'].nil? + ontologyId = submissionId.split('/')[0..-3] acronym = ontologyId.last - ontologyId = ontologyId.join("/") + ontologyId = ontologyId.join('/') else - acronym = ontologyId.split("/")[-1] + acronym = ontologyId.split('/')[-1] end ontology = LinkedData::Models::Ontology - .read_only( - id: RDF::IRI.new(ontologyId), - acronym: acronym) + .read_only( + id: RDF::IRI.new(ontologyId), + acronym: acronym) submission = LinkedData::Models::OntologySubmission - .read_only( - id: RDF::IRI.new(ontologyId+"/submissions/latest"), - # id: RDF::IRI.new(submissionId), - ontology: ontology) + .read_only( + id: RDF::IRI.new(ontologyId + '/submissions/latest'), + # id: RDF::IRI.new(submissionId), + ontology: ontology) mappedClass = LinkedData::Models::Class - .read_only( - id: RDF::IRI.new(classId), - submission: submission, - urn_id: LinkedData::Models::Class.urn_id(acronym,classId) ) + .read_only( + id: RDF::IRI.new(classId), + submission: submission, + urn_id: LinkedData::Models::Class.urn_id(acronym, classId)) return mappedClass - end - - def self.migrate_rest_mappings(acronym) - mappings = LinkedData::Models::RestBackupMapping - .where.include(:uuid, :class_urns, :process).all - if mappings.length == 0 - return [] end - triples = [] - - rest_predicate = mapping_predicates()["REST"][0] - mappings.each do |m| - m.class_urns.each do |u| - u = u.to_s - if u.start_with?("urn:#{acronym}") - class_id = u.split(":")[2..-1].join(":") - triples << - " <#{class_id}> <#{rest_predicate}> <#{m.id}> . " + + def self.migrate_rest_mappings(acronym) + mappings = LinkedData::Models::RestBackupMapping + .where.include(:uuid, :class_urns, :process).all + return [] if mappings.length == 0 + + triples = [] + + rest_predicate = mapping_predicates()['REST'][0] + mappings.each do |m| + m.class_urns.each do |u| + u = u.to_s + if u.start_with?("urn:#{acronym}") + class_id = u.split(':')[2..-1].join(':') + triples << + " <#{class_id}> <#{rest_predicate}> <#{m.id}> . " + end end end - end - return triples - end - - def self.delete_rest_mapping(mapping_id) - mapping = get_rest_mapping(mapping_id) - if mapping.nil? - return nil - end - rest_predicate = mapping_predicates()["REST"][0] - classes = mapping.classes - classes.each do |c| - sub = c.submission - unless sub.id.to_s["latest"].nil? - #the submission in the class might point to latest - sub = LinkedData::Models::Ontology.find(c.submission.ontology.id) - .first - .latest_submission + return triples + end + + def self.delete_rest_mapping(mapping_id) + mapping = get_rest_mapping(mapping_id) + return nil if mapping.nil? + + rest_predicate = mapping_predicates()['REST'][0] + classes = mapping.classes + classes.each do |c| + sub = c.submission + unless sub.id.to_s['latest'].nil? + #the submission in the class might point to latest + sub = LinkedData::Models::Ontology.find(c.submission.ontology.id) + .first + .latest_submission + end + graph_delete = RDF::Graph.new + graph_delete << [c.id, RDF::URI.new(rest_predicate), mapping.id] + Goo.sparql_update_client.delete_data(graph_delete, graph: sub.id) end - graph_delete = RDF::Graph.new - graph_delete << [c.id, RDF::URI.new(rest_predicate), mapping.id] - Goo.sparql_update_client.delete_data(graph_delete, graph: sub.id) - end - mapping.process.delete - backup = LinkedData::Models::RestBackupMapping.find(mapping_id).first - unless backup.nil? - backup.delete + mapping.process.delete + backup = LinkedData::Models::RestBackupMapping.find(mapping_id).first + backup.delete unless backup.nil? + return mapping end - return mapping - end - def self.get_rest_mapping(mapping_id) - backup = LinkedData::Models::RestBackupMapping.find(mapping_id).first - if backup.nil? - return nil + def self.get_mapping_classes_instance(c1, g1, c2, g2) + [read_only_class(c1, g1), read_only_class(c2, g2)] end - rest_predicate = mapping_predicates()["REST"][0] - qmappings = <<-eos + + def self.get_rest_mapping(mapping_id) + backup = LinkedData::Models::RestBackupMapping.find(mapping_id).first + return nil if backup.nil? + + rest_predicate = mapping_predicates()['REST'][0] + qmappings = <<-eos SELECT DISTINCT ?s1 ?c1 ?s2 ?c2 ?uuid ?o WHERE { ?uuid ?o . @@ -383,71 +378,30 @@ def self.get_rest_mapping(mapping_id) FILTER(?uuid = <#{mapping_id}>) FILTER(?s1 != ?s2) } LIMIT 1 -eos - epr = Goo.sparql_query_client(:main) - graphs = [LinkedData::Models::MappingProcess.type_uri] - mapping = nil - epr.query(qmappings, - graphs: graphs).each do |sol| - classes = [ read_only_class(sol[:c1].to_s,sol[:s1].to_s), - read_only_class(sol[:c2].to_s,sol[:s2].to_s) ] - process = LinkedData::Models::MappingProcess.find(sol[:o]).first - mapping = LinkedData::Models::Mapping.new(classes,"REST", - process, - sol[:uuid]) - end - return mapping - end + eos + epr = Goo.sparql_query_client(:main) + graphs = [LinkedData::Models::MappingProcess.type_uri] + mapping = nil + epr.query(qmappings, + graphs: graphs).each do |sol| - def self.create_rest_mapping(classes,process) - unless process.instance_of? LinkedData::Models::MappingProcess - raise ArgumentError, "Process should be instance of MappingProcess" - end - if classes.length != 2 - raise ArgumentError, "Create REST is avalaible for two classes. " + - "Request contains #{classes.length} classes." - end - #first create back up mapping that lives across submissions - backup_mapping = LinkedData::Models::RestBackupMapping.new - backup_mapping.uuid = UUID.new.generate - backup_mapping.process = process - class_urns = [] - classes.each do |c| - if c.instance_of?LinkedData::Models::Class - acronym = c.submission.id.to_s.split("/")[-3] - class_urns << RDF::URI.new( - LinkedData::Models::Class.urn_id(acronym,c.id.to_s)) + classes = get_mapping_classes_instance(sol[:c1].to_s, sol[:s1].to_s, sol[:c2].to_s, sol[:s2].to_s) - else - class_urns << RDF::URI.new(c.urn_id()) - end - end - backup_mapping.class_urns = class_urns - backup_mapping.save - - #second add the mapping id to current submission graphs - rest_predicate = mapping_predicates()["REST"][0] - classes.each do |c| - sub = c.submission - unless sub.id.to_s["latest"].nil? - #the submission in the class might point to latest - sub = LinkedData::Models::Ontology.find(c.submission.ontology.id).first.latest_submission + process = LinkedData::Models::MappingProcess.find(sol[:o]).first + mapping = LinkedData::Models::Mapping.new(classes, 'REST', + process, + sol[:uuid]) end - graph_insert = RDF::Graph.new - graph_insert << [c.id, RDF::URI.new(rest_predicate), backup_mapping.id] - Goo.sparql_update_client.insert_data(graph_insert, graph: sub.id) + return mapping end - mapping = LinkedData::Models::Mapping.new(classes,"REST",process) - return mapping - end - def self.mappings_for_classids(class_ids,sources=["REST","CUI"]) - class_ids = class_ids.uniq - predicates = {} - sources.each do |t| - predicates[mapping_predicates()[t][0]] = t - end - qmappings = <<-eos + def self.mappings_for_classids(class_ids, sources = ['REST', 'CUI']) + class_ids = class_ids.uniq + predicates = {} + sources.each do |t| + predicates[mapping_predicates()[t][0]] = t + end + qmappings = <<-eos SELECT DISTINCT ?s1 ?c1 ?s2 ?c2 ?pred WHERE { GRAPH ?s1 { @@ -460,51 +414,50 @@ def self.mappings_for_classids(class_ids,sources=["REST","CUI"]) FILTER(filter_pred) FILTER(filter_classes) } -eos - qmappings = qmappings.gsub("filter_pred", - predicates.keys.map { |x| "?pred = <#{x}>"}.join(" || ")) - qmappings = qmappings.gsub("filter_classes", - class_ids.map { |x| "?c1 = <#{x}>" }.join(" || ")) - epr = Goo.sparql_query_client(:main) - graphs = [LinkedData::Models::MappingProcess.type_uri] - mappings = [] - epr.query(qmappings, - graphs: graphs).each do |sol| - classes = [ read_only_class(sol[:c1].to_s,sol[:s1].to_s), - read_only_class(sol[:c2].to_s,sol[:s2].to_s) ] - source = predicates[sol[:pred].to_s] - mappings << LinkedData::Models::Mapping.new(classes,source) + eos + qmappings = qmappings.gsub('filter_pred', + predicates.keys.map { |x| "?pred = <#{x}>" }.join(' || ')) + qmappings = qmappings.gsub('filter_classes', + class_ids.map { |x| "?c1 = <#{x}>" }.join(' || ')) + epr = Goo.sparql_query_client(:main) + graphs = [LinkedData::Models::MappingProcess.type_uri] + mappings = [] + epr.query(qmappings, + graphs: graphs).each do |sol| + classes = [read_only_class(sol[:c1].to_s, sol[:s1].to_s), + read_only_class(sol[:c2].to_s, sol[:s2].to_s)] + source = predicates[sol[:pred].to_s] + mappings << LinkedData::Models::Mapping.new(classes, source) + end + return mappings end - return mappings - end - def self.recent_rest_mappings(n) - graphs = [LinkedData::Models::MappingProcess.type_uri] - qdate = <<-eos + def self.recent_rest_mappings(n) + graphs = [LinkedData::Models::MappingProcess.type_uri] + qdate = <<-eos SELECT DISTINCT ?s FROM <#{LinkedData::Models::MappingProcess.type_uri}> WHERE { ?s ?o } ORDER BY DESC(?o) LIMIT #{n} -eos - epr = Goo.sparql_query_client(:main) - procs = [] - epr.query(qdate, graphs: graphs,query_options: {rules: :NONE}).each do |sol| - procs << sol[:s] - end - if procs.length == 0 - return [] - end - graphs = [LinkedData::Models::MappingProcess.type_uri] - proc_object = Hash.new - LinkedData::Models::MappingProcess.where - .include(LinkedData::Models::MappingProcess.attributes) - .all.each do |obj| - #highly cached query - proc_object[obj.id.to_s] = obj - end - procs = procs.map { |x| "?o = #{x.to_ntriples}" }.join " || " - rest_predicate = mapping_predicates()["REST"][0] - qmappings = <<-eos + eos + epr = Goo.sparql_query_client(:main) + procs = [] + epr.query(qdate, graphs: graphs, query_options: { rules: :NONE }).each do |sol| + procs << sol[:s] + end + return [] if procs.length == 0 + + graphs = [LinkedData::Models::MappingProcess.type_uri] + proc_object = Hash.new + LinkedData::Models::MappingProcess.where + .include(LinkedData::Models::MappingProcess.attributes) + .all.each do |obj| + #highly cached query + proc_object[obj.id.to_s] = obj + end + procs = procs.map { |x| "?o = #{x.to_ntriples}" }.join ' || ' + rest_predicate = mapping_predicates()['REST'][0] + qmappings = <<-eos SELECT DISTINCT ?ont1 ?c1 ?ont2 ?c2 ?o ?uuid WHERE { ?uuid ?o . @@ -521,25 +474,24 @@ def self.recent_rest_mappings(n) FILTER(?c1 != ?c2) FILTER (#{procs}) } -eos - epr = Goo.sparql_query_client(:main) - mappings = [] - epr.query(qmappings, - graphs: graphs,query_options: {rules: :NONE}).each do |sol| - classes = [ read_only_class(sol[:c1].to_s,sol[:ont1].to_s), - read_only_class(sol[:c2].to_s,sol[:ont2].to_s) ] - process = proc_object[sol[:o].to_s] - mapping = LinkedData::Models::Mapping.new(classes,"REST", - process, - sol[:uuid]) - mappings << mapping + eos + epr = Goo.sparql_query_client(:main) + mappings = [] + epr.query(qmappings, + graphs: graphs, query_options: { rules: :NONE }).each do |sol| + classes = get_mapping_classes_instance(sol[:c1].to_s, sol[:ont1].to_s, sol[:c2].to_s, sol[:ont2].to_s) + process = proc_object[sol[:o].to_s] + mapping = LinkedData::Models::Mapping.new(classes, 'REST', + process, + sol[:uuid]) + mappings << mapping + end + mappings.sort_by { |x| x.process.date }.reverse[0..n - 1] end - return mappings.sort_by { |x| x.process.date }.reverse[0..n-1] - end - def self.retrieve_latest_submission_ids(options = {}) - include_views = options[:include_views] || false - ids_query = <<-eos + def self.retrieve_latest_submission_ids(options = {}) + include_views = options[:include_views] || false + ids_query = <<-eos PREFIX xsd: SELECT (CONCAT(xsd:string(?ontology), "/submissions/", xsd:string(MAX(?submissionId))) as ?id) WHERE { @@ -550,8 +502,8 @@ def self.retrieve_latest_submission_ids(options = {}) include_views_filter } GROUP BY ?ontology - eos - include_views_filter = include_views ? '' : <<-eos + eos + include_views_filter = include_views ? '' : <<-eos OPTIONAL { ?id ?ontJoin . } @@ -559,219 +511,247 @@ def self.retrieve_latest_submission_ids(options = {}) ?ontJoin ?viewOf . } FILTER(!BOUND(?viewOf)) - eos - ids_query.gsub!("include_views_filter", include_views_filter) - epr = Goo.sparql_query_client(:main) - solutions = epr.query(ids_query) - latest_ids = {} - - solutions.each do |sol| - acr = sol[:id].to_s.split("/")[-3] - latest_ids[acr] = sol[:id].object - end - - latest_ids - end + eos + ids_query.gsub!('include_views_filter', include_views_filter) + epr = Goo.sparql_query_client(:main) + solutions = epr.query(ids_query) + latest_ids = {} + + solutions.each do |sol| + acr = sol[:id].to_s.split('/')[-3] + latest_ids[acr] = sol[:id].object + end - def self.retrieve_latest_submissions(options = {}) - acronyms = (options[:acronyms] || []) - status = (options[:status] || "RDF").to_s.upcase - include_ready = status.eql?("READY") ? true : false - status = "RDF" if status.eql?("READY") - any = status.eql?("ANY") - include_views = options[:include_views] || false - - if any - submissions_query = LinkedData::Models::OntologySubmission.where - else - submissions_query = LinkedData::Models::OntologySubmission.where(submissionStatus: [code: status]) - end - submissions_query = submissions_query.filter(Goo::Filter.new(ontology: [:viewOf]).unbound) unless include_views - submissions = submissions_query.include(:submissionStatus,:submissionId, ontology: [:acronym]).to_a - submissions.select! { |sub| acronyms.include?(sub.ontology.acronym) } unless acronyms.empty? - latest_submissions = {} - - submissions.each do |sub| - next if include_ready && !sub.ready? - latest_submissions[sub.ontology.acronym] ||= sub - latest_submissions[sub.ontology.acronym] = sub if sub.submissionId > latest_submissions[sub.ontology.acronym].submissionId + latest_ids end - return latest_submissions - end - def self.create_mapping_counts(logger, arr_acronyms=[]) - ont_msg = arr_acronyms.empty? ? "all ontologies" : "ontologies [#{arr_acronyms.join(', ')}]" + def self.retrieve_latest_submissions(options = {}) + acronyms = (options[:acronyms] || []) + status = (options[:status] || 'RDF').to_s.upcase + include_ready = status.eql?('READY') ? true : false + status = 'RDF' if status.eql?('READY') + any = status.eql?('ANY') + include_views = options[:include_views] || false - time = Benchmark.realtime do - self.create_mapping_count_totals_for_ontologies(logger, arr_acronyms) - end - logger.info("Completed rebuilding total mapping counts for #{ont_msg} in #{(time/60).round(1)} minutes.") + if any + submissions_query = LinkedData::Models::OntologySubmission.where + else + submissions_query = LinkedData::Models::OntologySubmission.where(submissionStatus: [code: status]) + end + submissions_query = submissions_query.filter(Goo::Filter.new(ontology: [:viewOf]).unbound) unless include_views + submissions = submissions_query.include(:submissionStatus, :submissionId, ontology: [:acronym]).to_a + submissions.select! { |sub| acronyms.include?(sub.ontology.acronym) } unless acronyms.empty? + latest_submissions = {} - time = Benchmark.realtime do - self.create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) - end - logger.info("Completed rebuilding mapping count pairs for #{ont_msg} in #{(time/60).round(1)} minutes.") - end + submissions.each do |sub| + next if include_ready && !sub.ready? - def self.create_mapping_count_totals_for_ontologies(logger, arr_acronyms) - new_counts = self.mapping_counts(enable_debug=true, logger=logger, reload_cache=true, arr_acronyms) - persistent_counts = {} - f = Goo::Filter.new(:pair_count) == false - LinkedData::Models::MappingCount.where.filter(f) - .include(:ontologies, :count) - .include(:all) - .all - .each do |m| - persistent_counts[m.ontologies.first] = m + latest_submissions[sub.ontology.acronym] ||= sub + if sub.submissionId > latest_submissions[sub.ontology.acronym].submissionId + latest_submissions[sub.ontology.acronym] = sub + end + end + return latest_submissions end - num_counts = new_counts.keys.length - ctr = 0 + def self.create_mapping_counts(logger, arr_acronyms = []) + ont_msg = arr_acronyms.empty? ? 'all ontologies' : "ontologies [#{arr_acronyms.join(', ')}]" - new_counts.each_key do |acr| - new_count = new_counts[acr] - ctr += 1 - - if persistent_counts.include?(acr) - inst = persistent_counts[acr] - - if new_count != inst.count - inst.bring_remaining - inst.count = new_count - - begin - if inst.valid? - inst.save - else - logger.error("Error updating mapping count for #{acr}: #{inst.id.to_s}. #{inst.errors}") - next - end - rescue Exception => e - logger.error("Exception updating mapping count for #{acr}: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") - next - end - end - else - m = LinkedData::Models::MappingCount.new - m.ontologies = [acr] - m.pair_count = false - m.count = new_count - - begin - if m.valid? - m.save - else - logger.error("Error saving new mapping count for #{acr}. #{m.errors}") - next - end - rescue Exception => e - logger.error("Exception saving new mapping count for #{acr}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") - next - end + time = Benchmark.realtime do + self.create_mapping_count_totals_for_ontologies(logger, arr_acronyms) end - remaining = num_counts - ctr - logger.info("Total mapping count saved for #{acr}: #{new_count}. " << ((remaining > 0) ? "#{remaining} counts remaining..." : "All done!")) - end - end + logger.info("Completed rebuilding total mapping counts for #{ont_msg} in #{(time / 60).round(1)} minutes.") - # This generates pair mapping counts for the given - # ontologies to ALL other ontologies in the system - def self.create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) - latest_submissions = self.retrieve_latest_submissions(options={acronyms:arr_acronyms}) - ont_total = latest_submissions.length - logger.info("There is a total of #{ont_total} ontologies to process...") - ont_ctr = 0 - # filename = 'mapping_pairs.ttl' - # temp_dir = Dir.tmpdir - # temp_file_path = File.join(temp_dir, filename) - # temp_dir = '/Users/mdorf/Downloads/test/' - # temp_file_path = File.join(File.dirname(file_path), "test.ttl") - # fsave = File.open(temp_file_path, "a") - - latest_submissions.each do |acr, sub| - self.handle_triple_store_downtime(logger) if LinkedData.settings.goo_backend_name === '4store' - new_counts = nil time = Benchmark.realtime do - new_counts = self.mapping_ontologies_count(sub, nil, reload_cache=true) + self.create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) end - logger.info("Retrieved new mapping pair counts for #{acr} in #{time} seconds.") - ont_ctr += 1 - persistent_counts = {} - LinkedData::Models::MappingCount.where(pair_count: true).and(ontologies: acr) - .include(:ontologies, :count).all.each do |m| - other = m.ontologies.first + logger.info("Completed rebuilding mapping count pairs for #{ont_msg} in #{(time / 60).round(1)} minutes.") + end - if other == acr - other = m.ontologies[1] - end - persistent_counts[other] = m + def self.create_mapping_count_totals_for_ontologies(logger, arr_acronyms) + new_counts = self.mapping_counts(enable_debug = true, logger = logger, reload_cache = true, arr_acronyms) + persistent_counts = {} + f = Goo::Filter.new(:pair_count) == false + LinkedData::Models::MappingCount.where.filter(f) + .include(:ontologies, :count) + .include(:all) + .all + .each do |m| + persistent_counts[m.ontologies.first] = m end num_counts = new_counts.keys.length - logger.info("Ontology: #{acr}. #{num_counts} mapping pair counts to record...") - logger.info("------------------------------------------------") ctr = 0 - new_counts.each_key do |other| - new_count = new_counts[other] + new_counts.each_key do |acr| + new_count = new_counts[acr] ctr += 1 - if persistent_counts.include?(other) - inst = persistent_counts[other] + if persistent_counts.include?(acr) + inst = persistent_counts[acr] if new_count != inst.count inst.bring_remaining - inst.pair_count = true inst.count = new_count begin if inst.valid? - inst.save() - # inst.save({ batch: fsave }) + inst.save else - logger.error("Error updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{inst.errors}") + logger.error("Error updating mapping count for #{acr}: #{inst.id.to_s}. #{inst.errors}") next end rescue Exception => e - logger.error("Exception updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") + logger.error("Exception updating mapping count for #{acr}: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") next end end else m = LinkedData::Models::MappingCount.new + m.ontologies = [acr] + m.pair_count = false m.count = new_count - m.ontologies = [acr,other] - m.pair_count = true begin if m.valid? - m.save() - # m.save({ batch: fsave }) + m.save else - logger.error("Error saving new mapping count for the pair [#{acr}, #{other}]. #{m.errors}") + logger.error("Error saving new mapping count for #{acr}. #{m.errors}") next end rescue Exception => e - logger.error("Exception saving new mapping count for the pair [#{acr}, #{other}]. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") + logger.error("Exception saving new mapping count for #{acr}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") next end end remaining = num_counts - ctr - logger.info("Mapping count saved for the pair [#{acr}, #{other}]: #{new_count}. " << ((remaining > 0) ? "#{remaining} counts remaining for #{acr}..." : "All done!")) - wait_interval = 250 + logger.info("Total mapping count saved for #{acr}: #{new_count}. " << ((remaining > 0) ? "#{remaining} counts remaining..." : 'All done!')) + end + end - if ctr % wait_interval == 0 - sec_to_wait = 1 - logger.info("Waiting #{sec_to_wait} second" << ((sec_to_wait > 1) ? 's' : '') << '...') - sleep(sec_to_wait) + # This generates pair mapping counts for the given + # ontologies to ALL other ontologies in the system + def self.create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) + latest_submissions = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms }) + ont_total = latest_submissions.length + logger.info("There is a total of #{ont_total} ontologies to process...") + ont_ctr = 0 + # filename = 'mapping_pairs.ttl' + # temp_dir = Dir.tmpdir + # temp_file_path = File.join(temp_dir, filename) + # temp_dir = '/Users/mdorf/Downloads/test/' + # temp_file_path = File.join(File.dirname(file_path), "test.ttl") + # fsave = File.open(temp_file_path, "a") + + latest_submissions.each do |acr, sub| + self.handle_triple_store_downtime(logger) if LinkedData.settings.goo_backend_name === '4store' + new_counts = nil + time = Benchmark.realtime do + new_counts = self.mapping_ontologies_count(sub, nil, reload_cache = true) + end + logger.info("Retrieved new mapping pair counts for #{acr} in #{time} seconds.") + ont_ctr += 1 + persistent_counts = {} + LinkedData::Models::MappingCount.where(pair_count: true).and(ontologies: acr) + .include(:ontologies, :count).all.each do |m| + other = m.ontologies.first + + other = m.ontologies[1] if other == acr + persistent_counts[other] = m end + + num_counts = new_counts.keys.length + logger.info("Ontology: #{acr}. #{num_counts} mapping pair counts to record...") + logger.info('------------------------------------------------') + ctr = 0 + + new_counts.each_key do |other| + new_count = new_counts[other] + ctr += 1 + + if persistent_counts.include?(other) + inst = persistent_counts[other] + + if new_count != inst.count + inst.bring_remaining + inst.pair_count = true + inst.count = new_count + + begin + if inst.valid? + inst.save() + # inst.save({ batch: fsave }) + else + logger.error("Error updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{inst.errors}") + next + end + rescue Exception => e + logger.error("Exception updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") + next + end + end + else + m = LinkedData::Models::MappingCount.new + m.count = new_count + m.ontologies = [acr, other] + m.pair_count = true + + begin + if m.valid? + m.save() + # m.save({ batch: fsave }) + else + logger.error("Error saving new mapping count for the pair [#{acr}, #{other}]. #{m.errors}") + next + end + rescue Exception => e + logger.error("Exception saving new mapping count for the pair [#{acr}, #{other}]. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") + next + end + end + remaining = num_counts - ctr + logger.info("Mapping count saved for the pair [#{acr}, #{other}]: #{new_count}. " << ((remaining > 0) ? "#{remaining} counts remaining for #{acr}..." : 'All done!')) + wait_interval = 250 + + if ctr % wait_interval == 0 + sec_to_wait = 1 + logger.info("Waiting #{sec_to_wait} second" << ((sec_to_wait > 1) ? 's' : '') << '...') + sleep(sec_to_wait) + end + end + remaining_ont = ont_total - ont_ctr + logger.info("Completed processing pair mapping counts for #{acr}. " << ((remaining_ont > 0) ? "#{remaining_ont} ontologies remaining..." : 'All ontologies processed!')) + sleep(5) end - remaining_ont = ont_total - ont_ctr - logger.info("Completed processing pair mapping counts for #{acr}. " << ((remaining_ont > 0) ? "#{remaining_ont} ontologies remaining..." : "All ontologies processed!")) - sleep(5) + # fsave.close end - # fsave.close - end -end + def self.check_mapping_exist(cls, relations_array) + class_urns = generate_class_urns(cls) + mapping_exist = false + qmappings = <<-eos +SELECT DISTINCT ?uuid ?urn1 ?urn2 ?p +WHERE { + ?uuid ?urn1 . + ?uuid ?urn2 . + ?uuid ?p . +FILTER(?urn1 = <#{class_urns[0]}>) +FILTER(?urn2 = <#{class_urns[1]}>) +} LIMIT 10 + eos + epr = Goo.sparql_query_client(:main) + graphs = [LinkedData::Models::MappingProcess.type_uri] + epr.query(qmappings, + graphs: graphs).each do |sol| + process = LinkedData::Models::MappingProcess.find(sol[:p]).include(:relation).first + process_relations = process.relation.map { |r| r.to_s } + relations_array = relations_array.map { |r| r.to_s } + if process_relations.sort == relations_array.sort + mapping_exist = true + break + end + end + return mapping_exist + end + end end diff --git a/lib/ontologies_linked_data/models/mappings/mapping.rb b/lib/ontologies_linked_data/models/mappings/mapping.rb index c52dd496..afd6523d 100644 --- a/lib/ontologies_linked_data/models/mappings/mapping.rb +++ b/lib/ontologies_linked_data/models/mappings/mapping.rb @@ -1,31 +1,20 @@ - module LinkedData module Models class Mapping include LinkedData::Hypermedia::Resource embed :classes, :process serialize_default :id, :source, :classes, :process + attr_reader :id, :source, :classes, :process - def initialize(classes, source, process=nil, id=nil) + def initialize(classes, source, process = nil, id = nil) @classes = classes @process = process @source = source @id = id end - def classes - return @classes - end - def process - return @process - end - def source - return @source - end - def id - return @id - end + def self.type_uri - LinkedData.settings.id_url_prefix+"metadata/Mapping" + "#{LinkedData.settings.id_url_prefix}metadata/Mapping" end end @@ -33,44 +22,58 @@ class RestBackupMapping < LinkedData::Models::Base include LinkedData::HTTPCache::CacheableResource cache_timeout 3600 model :rest_backup_mapping, name_with: :uuid - attribute :uuid, enforce: [:existence, :unique] - attribute :class_urns, enforce: [:uri, :existence, :list] - attribute :process, enforce: [:existence, :mapping_process] + attribute :uuid, enforce: %i[existence unique] + attribute :class_urns, enforce: [:uri, :existence, :list, ->(inst, attr) { validate_size(inst, attr) }] + attribute :process, enforce: %i[existence mapping_process] + + def self.validate_size(inst, attr) + inst.bring(attr) if inst.bring?(attr) + value = inst.send(attr) + + unless value.is_a?(Array) && value.length >= 2 + return [:relation_value_validator, 'does not contain at least 2 terms'] + end + + [:relation_value_validator, nil] + end end #only manual mappings class MappingProcess < LinkedData::Models::Base - model :mapping_process, - :name_with => lambda { |s| process_id_generator(s) } - attribute :name, enforce: [:existence] - attribute :creator, enforce: [:existence, :user] + model :mapping_process, + name_with: ->(s) { process_id_generator(s) } + attribute :name, enforce: [:existence] + attribute :creator, enforce: %i[existence user] - attribute :source - attribute :relation, enforce: [:uri] - attribute :source_contact_info - attribute :source_name - attribute :comment - attribute :date, enforce: [:date_time], - :default => lambda {|x| DateTime.now } + attribute :source + attribute :relation, enforce: %i[uri existence] + attribute :source_contact_info + attribute :source_name + attribute :comment + attribute :date, enforce: [:date_time], default: ->(x) { DateTime.now } + attribute :subject_source_id, enforce: [:uri] + attribute :object_source_id, enforce: [:uri] - embedded true + embedded true - def self.process_id_generator(inst) - return RDF::IRI.new( - "#{(self.namespace)}mapping_processes/" + - "-#{CGI.escape(inst.creator.username)}" + - "-#{UUID.new.generate}") - end + def self.process_id_generator(inst) + RDF::IRI.new( + "#{(self.namespace)}mapping_processes/" \ + "-#{CGI.escape(inst.creator.username)}" \ + "-#{UUID.new.generate}" + ) + end end class MappingCount < LinkedData::Models::Base - model :mapping_count, name_with: lambda { |x| mapping_count_id(x) } - attribute :ontologies, enforce: [:existence, :list] - attribute :count, enforce: [:existence, :integer] - attribute :pair_count, enforce: [:existence, :boolean] + model :mapping_count, name_with: ->(x) { mapping_count_id(x) } + attribute :ontologies, enforce: %i[existence list] + attribute :count, enforce: %i[existence integer] + attribute :pair_count, enforce: %i[existence boolean] + def self.mapping_count_id(x) - acrs = x.ontologies.sort.join("-") - return RDF::URI.new( + acrs = x.ontologies.sort.join('-') + RDF::URI.new( "#{(Goo.id_prefix)}mappingcount/#{CGI.escape(acrs)}" ) end diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index bf20fc34..4615e2cb 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -12,6 +12,8 @@ module Models class OntologySubmission < LinkedData::Models::Base + include LinkedData::Concerns::OntologySubmission::MetadataExtractor + FILES_TO_DELETE = ['labels.ttl', 'mappings.ttl', 'obsolete.ttl', 'owlapi.xrdf', 'errors.log'] FLAT_ROOTS_LIMIT = 1000 @@ -480,28 +482,9 @@ def generate_rdf(logger, file_path, reasoning=true) logger.flush end delete_and_append(triples_file_path, logger, mime_type) - version_info = extract_version() - - if version_info - self.version = version_info - end end - def extract_version - query_version_info = < - ?versionInfo . -} -eos - Goo.sparql_query_client.query(query_version_info).each_solution do |sol| - return sol[:versionInfo].to_s - end - return nil - end def process_callbacks(logger, callbacks, action_name, &block) callbacks.delete_if do |_, callback| @@ -978,6 +961,7 @@ def process_submission(logger, options={}) zip_dst = unzip_submission(logger) file_path = zip_dst ? zip_dst.to_s : self.uploadFilePath.to_s generate_rdf(logger, file_path, reasoning=reasoning) + extract_metadata add_submission_status(status) self.save rescue Exception => e diff --git a/lib/ontologies_linked_data/parser/owlapi.rb b/lib/ontologies_linked_data/parser/owlapi.rb index 4bd092b6..33a7dc4a 100644 --- a/lib/ontologies_linked_data/parser/owlapi.rb +++ b/lib/ontologies_linked_data/parser/owlapi.rb @@ -13,7 +13,7 @@ class RDFFileNotGeneratedException < Parser::ParserException class OWLAPICommand def initialize(input_file, output_repo, opts = {}) - @owlapi_wrapper_jar_path = LinkedData.bindir + "/owlapi-wrapper-1.4.0.jar" + @owlapi_wrapper_jar_path = LinkedData.bindir + "/owlapi-wrapper-1.4.1.jar" @input_file = input_file @output_repo = output_repo @master_file = opts[:master_file] diff --git a/test/mappings/test_mappings_bulk_load.rb b/test/mappings/test_mappings_bulk_load.rb new file mode 100644 index 00000000..1e1af81a --- /dev/null +++ b/test/mappings/test_mappings_bulk_load.rb @@ -0,0 +1,149 @@ +require_relative '../models/test_ontology_common' +require 'logger' + +class TestMappingBulkLoad < LinkedData::TestOntologyCommon + + ONT_ACR1 = 'MAPPING_TEST1' + ONT_ACR2 = 'MAPPING_TEST2' + + def self.before_suite + LinkedData::TestCase.backend_4s_delete + self.ontologies_parse + end + + def self.ontologies_parse + helper = LinkedData::TestOntologyCommon.new(self) + helper.submission_parse(ONT_ACR1, + 'MappingOntTest1', + './test/data/ontology_files/BRO_v3.3.owl', 11, + process_rdf: true, index_search: true, + run_metrics: false, reasoning: true) + helper.submission_parse(ONT_ACR2, + 'MappingOntTest2', + './test/data/ontology_files/CNO_05.owl', 22, + process_rdf: true, index_search: true, + run_metrics: false, reasoning: true) + end + + def test_mapping_classes_found + ontology_id = 'http://bioontology.org/ontologies/BiomedicalResources.owl' + mapping_hash = { + "classes": %w[http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#Image_Algorithm + http://purl.org/incf/ontology/Computational_Neurosciences/cno_alpha.owl#cno_0000202], + + "name": 'This is the mappings produced to test the bulk load', + "source": 'https://w3id.org/semapv/LexicalMatching', + "comment": 'mock data', + "relation": [ + 'http://www.w3.org/2002/07/owl#subClassOf' + ], + "subject_source_id": 'http://bioontology.org/ontologies/BiomedicalResources.owl', + "object_source_id": 'http://purl.org/incf/ontology/Computational_Neurosciences/cno_alpha.owl', + "source_name": 'https://w3id.org/sssom/mapping/tests/data/basic.tsv', + "source_contact_info": 'orcid:1234,orcid:5678', + "date": '2020-05-30' + + } + commun_test(mapping_hash, ontology_id) + end + + def test_mapping_classes_not_found + ontology_id = 'http://bioontology.org/ontologies/BiomedicalResources.owl' + mapping_hash = { + "classes": %w[http://bioontology.org/ontologies/test_1 + http://purl.org/incf/ontology/Computational_Neurosciences/test_2], + + "name": 'This is the mappings produced to test the bulk load', + "source": 'https://w3id.org/semapv/LexicalMatching', + "comment": 'mock data', + "relation": [ + 'http://www.w3.org/2002/07/owl#subClassOf' + ], + "subject_source_id": 'http://bioontology.org/ontologies/BiomedicalResources.owl', + "object_source_id": 'http://purl.org/incf/ontology/Computational_Neurosciences/cno_alpha.owl', + "source_name": 'https://w3id.org/sssom/mapping/tests/data/basic.tsv', + "source_contact_info": 'orcid:1234,orcid:5678', + "date": '2020-05-30' + + } + assert_raises ArgumentError do + mapping_load(mapping_hash, ontology_id) + end + end + + def test_mapping_ontologies_not_found + ontology_id = 'http://bioontology.org/ontologies/BiomedicalResources.owl' + mapping_hash = { + "classes": %w[http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#Image_Algorithm + http://purl.org/incf/ontology/Computational_Neurosciences/cno_alpha.owl#test2], + + "name": 'This is the mappings produced to test the bulk load', + "source": 'https://w3id.org/semapv/LexicalMatching', + "comment": 'mock data', + "relation": [ + 'http://www.w3.org/2002/07/owl#subClassOf' + ], + "source_name": 'https://w3id.org/sssom/mapping/tests/data/basic.tsv', + "source_contact_info": 'orcid:1234,orcid:5678', + "date": '2020-05-30' + + } + assert_raises ArgumentError do + commun_test(mapping_hash, ontology_id) + end + + end + + private + + def delete_rest_mappings + LinkedData::Models::RestBackupMapping.all.each do |m| + LinkedData::Mappings.delete_rest_mapping(m.id) + end + end + + def commun_test(mapping_hash, ontology_id) + mappings = mapping_load(mapping_hash, ontology_id) + selected = mappings.select do |m| + m.source == 'REST' && + m.classes.first.id.to_s['Image_Algorithm'] && + m.classes.last.id.to_s['cno_0000202'] + end + selected = selected.first + refute_nil selected + assert_equal Array(selected.process.relation), + ['http://www.w3.org/2002/07/owl#subClassOf'] + + assert_equal selected.process.subject_source_id.to_s, + 'http://bioontology.org/ontologies/BiomedicalResources.owl' + + assert_equal selected.process.object_source_id.to_s, + 'http://purl.org/incf/ontology/Computational_Neurosciences/cno_alpha.owl' + + end + + def mapping_load(mapping_hash, ontology_id) + delete_rest_mappings + user_name = 'test_mappings_user' + user = LinkedData::Models::User.where(username: user_name).include(:username).first + if user.nil? + user = LinkedData::Models::User.new(username: user_name, email: 'some@email.org') + user.passwordHash = 'some random pass hash' + user.save + end + loaded, errors = LinkedData::Mappings.bulk_load_mappings([mapping_hash], user, check_exist: true) + + raise ArgumentError, errors unless errors.empty? + + LinkedData::Mappings.create_mapping_counts(Logger.new(TestLogFile.new)) + ct = LinkedData::Models::MappingCount.where.all.length + assert ct > 2 + o = LinkedData::Models::Ontology.where(submissions: { uri: ontology_id }) + .include(submissions: %i[submissionId submissionStatus]) + .first + latest_sub = o.nil? ? nil : o.latest_submission + LinkedData::Mappings.mappings_ontology(latest_sub, 1, 1000) + + end +end +