diff --git a/flame/documentation.py b/flame/documentation.py index 4222072..c0a69fa 100644 --- a/flame/documentation.py +++ b/flame/documentation.py @@ -401,149 +401,146 @@ def dumpYAML (self): return (yaml_out) - def dumpExcel (self,oname): - - # openpyxl should be installed in the environment - # pip install openpyxl - - from openpyxl import Workbook - from openpyxl.styles import Font,NamedStyle,Alignment - # from openpyxl.comments import Comment - - wb = Workbook() - ws = wb.active - ws.title = f"Model {self.model} documentation" - alignment_style = Alignment(vertical='top',wrapText=True) - - # Label Style - Label = NamedStyle(name="Label") - Label.font = Font(name='Calibri',size=11,bold=True) - Label.alignment = alignment_style - - ws.column_dimensions['A'].width = 25.10 - ws.column_dimensions['B'].width = 28.00 - ws.column_dimensions['C'].width = 60.00 - ws.column_dimensions['D'].width = 60.00 + def dumpExcel(self,oname): + - # sections of the document, specifying the document keys which will be listed - sections = [('General model information',['ID', 'Version', 'Model_title', 'Model_description', 'Keywords', 'Contact', 'Institution', 'Date', 'Endpoint', - 'Endpoint_units', 'Interpretation', 'Dependent_variable', 'Species', - 'Limits_applicability', 'Experimental_protocol', 'Model_availability', - 'Data_info']), - ('Algorithm and software',['Algorithm', 'Software', 'Descriptors', 'Algorithm_settings', - 'AD_method', 'AD_parameters', 'Goodness_of_fit_statistics', - 'Internal_validation_1', 'Internal_validation_2', 'External_validation', - 'Comments']), - ('Other information',['Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates', - 'QMRF_updates', 'References', 'QMRF_same_models', 'Mechanistic_basis', - 'Mechanistic_references', 'Supporting_information', 'Comment_on_the_endpoint', - 'Endpoint_data_quality_and_variability', 'Descriptor_selection'])] + # openpyxl should be installed in the environment + # pip install openpyxl - #Save the position and name of the label for the first and last section - position = [] - name = [sections[0][1][0],'Other Comments'] - - count = 1 - for isection in sections: + from openpyxl import Workbook + from openpyxl.styles import Font,NamedStyle,Alignment - for ik in isection[1]: - - label_k = ik.replace('_',' ') + wb = Workbook() + ws = wb.active + ws.title = f"Model {self.model} documentation" + alignment_style = Alignment(vertical='top',wrapText=True) - if label_k == 'Internal validation 2' or label_k == 'External validation': - ws[f"A{count}"] = label_k - ws[f'A{count}'].style = Label - else: - ws[f"B{count}"] = label_k - ws[f"B{count}"].style = Label - - if ik in self.fields: - # set defaults for value - ivalue= '' - #v is the selected entry in the documentation dictionary - v = self.fields[ik] + # Label Style + Label = NamedStyle(name="Label") + Label.font = Font(name='Calibri',size=11,bold=True) + Label.alignment = alignment_style + + ws.column_dimensions['A'].width = 25.10 + ws.column_dimensions['B'].width = 28.00 + ws.column_dimensions['C'].width = 60.00 + ws.column_dimensions['D'].width = 60.00 + + # sections of the document, specifying the document keys which will be listed + sections = [('General model information',['ID', 'Version', 'Model_title', 'Model_description', 'Keywords', 'Contact', 'Institution', 'Date', 'Endpoint', + 'Endpoint_units', 'Interpretation', 'Dependent_variable', 'Species', + 'Limits_applicability', 'Experimental_protocol', 'Model_availability', + 'Data_info']), + ('Algorithm and software',['Algorithm', 'Software', 'Descriptors', 'Algorithm_settings', + 'AD_method', 'AD_parameters', 'Goodness_of_fit_statistics', + 'Internal_validation_1', 'Internal_validation_2', 'External_validation', + 'Comments']), + ('Other information',['Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates', + 'QMRF_updates', 'References', 'QMRF_same_models', 'Mechanistic_basis', + 'Mechanistic_references', 'Supporting_information', 'Comment_on_the_endpoint', + 'Endpoint_data_quality_and_variability', 'Descriptor_selection'])] + + #Save the position and name of the label for the first and last section + position = [] + name = [sections[0][1][0],'Other Comments'] + + count = 1 + for isection in sections: + + for ik in isection[1]: + + label_k = ik.replace('_',' ') + + if label_k in ['Internal validation 2', 'External validation']: + ws[f"A{count}"] = label_k + ws[f'A{count}'].style = Label + else: + ws[f"B{count}"] = label_k + ws[f"B{count}"].style = Label + + if ik in self.fields: + # set defaults for value + ivalue= '' + #v is the selected entry in the documentation dictionary + v = self.fields[ik] ## newest parameter formats are extended and contain ## rich metainformation for each entry - if 'value' in v: - ivalue = v['value'] - - if isinstance(ivalue,dict): + if 'value' in v: + ivalue = v['value'] - ws[f"A{count}"] = label_k - ws[f"A{count}"].style = Label - - end = (count)+(len(ivalue)-1) - - for intk in ivalue: - label_ik = intk.replace('_',' ') - # label_ik = intk.replace('_f', '').replace('_', ' ') - ws[f'B{count}'] = label_ik - ws[f'B{count}'].style = Label - - + if isinstance(ivalue,dict): + + ws[f"A{count}"] = label_k + ws[f"A{count}"].style = Label + + end = (count)+(len(ivalue)-1) + + for intk in ivalue: + label_ik = intk.replace('_',' ') + # label_ik = intk.replace('_f', '').replace('_', ' ') + ws[f'B{count}'] = label_ik + ws[f'B{count}'].style = Label + + + intv = ivalue[intk] + if not isinstance(intv,dict): + + iivalue = intv + if iivalue is None: + iivalue = " " + else: intv = ivalue[intk] - if not isinstance(intv,dict): - - iivalue = intv - if iivalue is None: - iivalue = " " - else: - intv = ivalue[intk] + iivalue = '' + if 'value' in intv: + iivalue = intv["value"] + if iivalue is None: iivalue = '' - if 'value' in intv: - iivalue = intv["value"] - if iivalue is None: - iivalue = '' - - ws[f'D{count}'] = intv['description'] - ws[f'D{count}'].alignment = alignment_style - - - ws[f'C{count}'] = f'{str(iivalue)}' - ws[f'C{count}'].font = Font(name='Calibri',size=11,color='3465a4') - ws[f'C{count}'].alignment = alignment_style - - ws.merge_cells(f'A{count}:A{end}') - - count +=1 - - else: - ws[f'D{count}'] = v['description'] - ws[f'D{count}'].alignment = alignment_style + ws[f'D{count}'] = intv['description'] + ws[f'D{count}'].alignment = alignment_style - if label_k == 'Experimental protocol' or label_k == 'Comments': - position.append(count) - - if ivalue is None: - ivalue = '' - ws[f'C{count}'] = f'{str(ivalue)}' + ws[f'C{count}'] = f'{str(iivalue)}' ws[f'C{count}'].font = Font(name='Calibri',size=11,color='3465a4') ws[f'C{count}'].alignment = alignment_style - - count += 1 - - itr = 0 - for i in position: - if itr == 0: - ws[f'A{1}'] = name[itr] - ws[f"A{1}"].style = Label - ws.merge_cells(f'A{1}:A{i}') - else: - ws[f'A{i}'] = name[itr] - ws[f"A{i}"].style = Label - ws.merge_cells(f'A{i}:A{count-1}') + ws.merge_cells(f'A{count}:A{end}') - itr +=1 + count +=1 - try: - wb.save(oname) - except: - return False, f'error saving document as {oname}' - - return True, 'OK' + else: + + ws[f'D{count}'] = v['description'] + ws[f'D{count}'].alignment = alignment_style + + if label_k in ['Experimental protocol', 'Comments']: + position.append(count) + + if ivalue is None: + ivalue = '' + + ws[f'C{count}'] = f'{str(ivalue)}' + ws[f'C{count}'].font = Font(name='Calibri',size=11,color='3465a4') + ws[f'C{count}'].alignment = alignment_style + + + count += 1 + + for itr, i in enumerate(position): + if itr == 0: + ws['A1'] = name[itr] + ws['A1'].style = Label + ws.merge_cells(f'A1:A{i}') + else: + ws[f'A{i}'] = name[itr] + ws[f"A{i}"].style = Label + ws.merge_cells(f'A{i}:A{count-1}') + + try: + wb.save(oname) + except: + return False, f'error saving document as {oname}' + + return True, 'OK' def dumpWORD (self, oname): @@ -1069,27 +1066,31 @@ def empty_fields(self): intv = ivalue[intk] if not isinstance(intv,dict): iivalue = intv - if iivalue is None or len(str(iivalue)) is 0: + if iivalue is None or not str(iivalue): emptyfields.append(intk) else: intv = ivalue[intk] iivalue = '' - if intv["value"] is None or len(str(intv["value"])) is 0: + if intv["value"] is None or not str(intv["value"]): emptyfields.append(intk) - + else: - if ivalue is None or len(str(ivalue)) is 0: + if ivalue is None or not str(ivalue): emptyfields.append(ik) - + return emptyfields - def get_mols(self): + def get_smiles(self): + '''''' - return dict(zip(self.conveyor.getVal("obj_nam"),self.conveyor.getVal("SMILES"))) - - - + #dict(zip(self.conveyor.getVal("obj_nam"),self.conveyor.getVal("SMILES"))) + return self.conveyor.getVal("SMILES") + + def get_names(self): + '''''' + return self.conveyor.getVal("obj_nam") + def autocomplete_documentation(self): """ Auto complete fields in model documentation @@ -1101,8 +1102,11 @@ def autocomplete_documentation(self): #Date, Date of model development and Date of QMRF. today = date.today().strftime("%B %d, %Y") - self.fields['Date']['value'] = today - self.fields['Date_of_QMRF']['value'] = today + if not self.fields['Date']['value']: + self.fields['Date']['value'] = today + + if not self.fields['Date_of_QMRF']['value']: + self.fields['Date_of_QMRF']['value'] = today #format, Format used(SDF,TSV) if self.parameters.getVal('input_type') == 'data': @@ -1116,10 +1120,7 @@ def autocomplete_documentation(self): fieldsapplysoftware = ['model','descriptors','applicability_domain'] for field in fieldsapplysoftware: - if field == 'applicability_domain': - if self.parameters.getVal('conformal'): - self.fields['Software']['value'][field]['value'] = software - else: + if field == 'applicability_domain' and self.parameters.getVal('conformal') or field != 'applicability_domain': self.fields['Software']['value'][field]['value'] = software diff --git a/flame/util/utils.py b/flame/util/utils.py index 4c8d96d..deff033 100644 --- a/flame/util/utils.py +++ b/flame/util/utils.py @@ -22,7 +22,7 @@ __modules__ = None -import os +import os import sys import yaml import random @@ -33,8 +33,10 @@ import codecs import string import re - from flame.util import get_logger +#from knowledgehub.api import KnowledgeHubAPI + + LOG = get_logger(__name__) @@ -463,4 +465,4 @@ def isFingerprint (md): if md in fplist: return True - return False + return False \ No newline at end of file diff --git a/flame/util/verify.py b/flame/util/verify.py index a6292af..04666f3 100644 --- a/flame/util/verify.py +++ b/flame/util/verify.py @@ -1,148 +1,419 @@ +#! -*- coding: utf-8 -*- + +# Description Verification process +# +# Authors: Manuel Pastor (manuel.pastor@upf.edu) +# Adrian Cabrera +# +# Copyright 2018 Manuel Pastor +# +# This file is part of Flame +# +# Flame is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation version 3. +# +# Flame is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Flame. If not, see . + +import pickle from flame.documentation import Documentation +import flame.chem.sdfileutils as sdfutils +from flame.parameters import Parameters from flame.util import utils,get_logger import os from rdkit import Chem,DataStructs -import pickle +import yaml +import urllib3 +urllib3.disable_warnings() +import requests +LOG = get_logger(__name__) +try: + from decouple import config +except: + LOG.error('decouple library is not installed.') + LOG.info('pip install python-decouple') -LOG = get_logger(__name__) -def verify_documentation (endpoint, version=None): +def verify_SDFile_activity(endpoint,version=None): + ''' ''' + success, mols = getActivity(endpoint,version) + if not success: + return False,{'status':'Aborted','comments':mols} + + mols = [x for x in mols if x['Activity'] is None] + + if mols: + result = {'status':'Failed','comments':'The activity must be present in all molecules.','Information':mols} + else: + result = {'status':'Passed'} + + return True,result + +def verify_model(endpoint, version=None): + ''' ''' + api = connect_api() + invalid = [] + not_found_list = [] + + if not isinstance(api, requests.models.Response): + return False,{'status':'Aborted','comments':'Failed connection to External Service'} + + try: + doc = Documentation(endpoint, version) + except: + return False,{'status':'Aborted','comments':f'{endpoint} documentation.yaml not found.'} + + smiles_list = dict(zip(doc.get_names(),doc.get_smiles())) + for drugname,smiles in smiles_list.items(): + ext_service_smiles = getSmilesByApi(api,drugname) + if ext_service_smiles: + fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(smiles_list[drugname])),Chem.RDKFingerprint(Chem.MolFromSmiles(ext_service_smiles)) + similarity = DataStructs.TanimotoSimilarity(fp1,fp2) + if similarity < 0.99: + invalid.append( + { + 'drugname': drugname, + 'input_smiles':smiles, + 'ext_service_smiles':ext_service_smiles, + 'similarity':similarity, + }) + else: + not_found_list.append(drugname) + + if invalid or not_found_list: + return True,{'status':'Failed', + 'comments':'The chemical structure of the following drugs is different from that obtained in External Service.', + 'Information':invalid, + 'Extra_Information':not_found_list} + + return True,{'status':'Passed'} + +def verify_library(endpoint, version=None): ''' - Check that the required fields are completed + Check that the current libraries are the same + as those with which the model was created. ''' + param = None + meta_path = utils.model_path(endpoint, version) + parameters_file_name = os.path.join(meta_path, 'parameters.yaml') + with open(parameters_file_name, 'r') as pfile: + param = yaml.safe_load(pfile) - blacklist = ['Species','Limits_applicability','Experimental_protocol','location','description','endpoint_positive','endpoint_negative','raw_data_url','test_set_size','training_set_url','test_set_url','bootstrap','ccp_alpha','criterion','max_depth','max_features','max_leaf_nodes','max_samples','min_impurity_decrease','min_impurity_split','min_samples_leaf','min_samples_split','min_weight_fraction_leaf','n_estimators','n_jobs','oob_score','random_state','verbose','warm_start','confidence','ACP_sampler','KNN_NN','aggregated','aggregation_function','conformal_predictors','normalizing_model','Conformal_mean_interval','Conformal_accuracy','Q2','SDEP','Comments','Other_related_models','Date_of_QMRF','Date_of_QMRF_updates','QMRF_updates','References','QMRF_same_models','Mechanistic_basis','Mechanistic_references','Supporting_information','Comment_on_the_endpoint','Endpoint_data_quality_and_variability','Descriptor_selection','Internal_validation_2','External_validation'] + model_pkl = os.path.join(param['model_path']['value'],'estimator.pkl') + LOG.debug(f'Loading model from pickle file, path: {model_pkl}') + try: + with open(model_pkl,"rb") as input_file: + dict_estimator = pickle.load(input_file) - if endpoint is None: - return False, 'Empty model label' - - # get de model repo path - rdir = utils.model_path(endpoint, version) - if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')): - return False, 'Info file not found' + except FileNotFoundError: + LOG.error(f'No valid model estimator found at: {model_pkl}') + return False, {'status':'Aborted','comments':f'No valid model estimator found at: {model_pkl}'} - doc = Documentation(endpoint, version) + # check if the pickle was created with a compatible version (currently, 1) + if dict_estimator['version'] is not 1: + return True, {'status':'Failed','comments':'Incompatible model version','Information':[]} + + # check if the libraries used to build this model are similar to current libraries + if 'libraries' not in dict_estimator: + return False, {'status':'Failed', + 'comments':'The libraries with which the model was built have not been found in the estimator.pkl'} + + success,results = utils.compatible_modules(dict_estimator['libraries']) + if not success: + return True,{'status':'Failed','comments':'Incompatible libraries have been found','Information':results} + else: + return True,{'status':'Passed'} + +def predict_train_series(endpoint, version=None): + ''' + Predict training_series and compare if the model quality + results are the same as the fitting. + ''' + return True,None + +def predict_benchmarking_dataset(): + ''' + Prediction of a benchmarking dataset + ''' + return True,None + +def verify_documentation (endpoint, version=None): + ''' + Check that the required fields are completed + ''' + blacklist = ['Species','Limits_applicability','Experimental_protocol','location','description','endpoint_positive','endpoint_negative','raw_data_url', + 'test_set_size','training_set_url','test_set_url','bootstrap','ccp_alpha','criterion','max_depth','max_features','max_leaf_nodes','max_samples', + 'min_impurity_decrease','min_impurity_split','min_samples_leaf','min_samples_split','min_weight_fraction_leaf','n_estimators','n_jobs','oob_score', + 'random_state','verbose','warm_start','confidence','ACP_sampler','KNN_NN','aggregated','aggregation_function','conformal_predictors','normalizing_model', + 'Conformal_mean_interval','Conformal_accuracy','Q2','SDEP','Comments','Other_related_models','Date_of_QMRF','Date_of_QMRF_updates','QMRF_updates', + 'References','QMRF_same_models','Mechanistic_basis','Mechanistic_references','Supporting_information','Comment_on_the_endpoint','Endpoint_data_quality_and_variability', + 'Descriptor_selection','Internal_validation_2','External_validation'] + + doc = Documentation(endpoint, version) fields = [field for field in doc.empty_fields() if field not in blacklist] if fields: - result = {'status':'Failed','comments':'fields not completed','Information':fields} + result = {'status':'Failed','comments':'Missing required information.','Information':fields} else: - result = {'status':'Passed','comments':'All fields required are completed','Information':[]} - + result = {'status':'Passed','comments':'All fields required are completed.','Information':[]} + return True,result +def verify_ExecSummary(endpoint,version=None): + ''' + Collects the fields required to generate the summary. + ''' + result = {} + doc = Documentation(endpoint,version) + param = Parameters() + meta_path = utils.model_path(endpoint, version) + param_file_name = os.path.join(meta_path, 'parameters.yaml') + + try: + with open(param_file_name, 'r') as pfile: + param.p = yaml.safe_load(pfile) + except Exception as e: + return False, {'status':'Aborted','comments':e} -# Manually verification -# TO DO -def verify_data (endpoint, version=None): - return True, {'status':'Passed','comments':'','Information':['Manually verification',]} + success, mols = getActivity(endpoint,version) + activity = [x['Activity'] for x in mols if x['Activity'] is not None] + if not success: + return False,{'status':'Aborted','comments':mols} -# TO DO -def verify_prediction (endpoint, version=None): + # get dictionaries + algorithm_dict = doc.getDict('Algorithm') + descriptors_dict = doc.getDict('Descriptors') - meta_path = utils.model_path(endpoint, version) - training_file = os.path.join(meta_path, 'training_series') - if not os.path.isfile(training_file): - return True, {'status':'Failed','comments':'','Information':[]} - - return True, {'status':'Passed','comments':'','Information':[]} - - -# def verify_model(endpoint, version= None): -# doc = Documentation(endpoint, version) -# list_of_mols = doc.get_mols() -# print("Total: ",len(list_of_mols)) -# api = utils.connect_api() -# count = 1 -# countInvalidMols = 1 - -# invalidMols = {} -# for mol in list_of_mols: -# apiSmile = utils.getSmilesByAPI(api,mol) -# aux_smile = apiSmile -# if apiSmile: -# localSmile,apiSmile = Chem.MolFromSmiles(list_of_mols[mol]),Chem.MolFromSmiles(apiSmile) -# fp1,fp2 = Chem.RDKFingerprint(localSmile),Chem.RDKFingerprint(apiSmile) - -# if DataStructs.TanimotoSimilarity(fp1,fp2) < 0.99: -# invalidMols[mol] = [list_of_mols[mol],aux_smile] -# countInvalidMols += 1 -# else: -# print(count,". Not found:",mol) -# count +=1 - -# print("Similarity below 0.99: ",countInvalidMols) -# return True,{'status':'Passed','comments':'','Information':invalidMols} + #section title + model_type,date = algorithm_dict['type'],doc.getVal('Date') + result['title'] = f'{endpoint} prediction based on a 3D {model_type} model. {date}' + + #section Interpretation + result['Interpretation'] = doc.getVal('Interpretation') + + #Methodology + algorithm = algorithm_dict['algorithm'] + descriptors = ",".join(descriptors_dict['descriptors']) + + #extra information in Methodology section + selection_method = descriptors_dict['selection_method'] + scaling = descriptors_dict['scaling'] + if all([scaling,selection_method]): + result['Methodology'] = f'A {model_type} model, was built using {algorithm} method and {descriptors} molecular descriptors [with {selection_method}][scaled using {scaling}]' + + result['Methodology'] = f'A {model_type} model, was built using {algorithm} method and {descriptors} molecular descriptors.' + + training_set_size = doc.getVal('Data_info')['training_set_size']['value'] + #section Val.Internal quantitative model + if param.getVal('quantitative')['value']: + r2 = doc.getVal('Goodness_of_fit_statistics')['R2'] + q2 = doc.getVal('Internal_validation_1')['Q2'] + sdep = doc.getVal('Internal_validation_1')['SDEP'] + min_activity = round(min(activity),2) + max_activity = round(max(activity),2) + avg = round(sum(activity)/len(activity),2) + + result['Val_internal'] = f'r2 {r2}, q2 {q2}, SDEP {sdep}' + result['Training_set'] = f'{training_set_size} compounds (min. {min_activity},max. {max_activity} average:{avg})' + + else: + #section Val.Internal qualitative model + Sensitivity = doc.getVal('Internal_validation_1')['Sensitivity'] + Specificity = doc.getVal('Internal_validation_1')['Specificity'] + MCC = doc.getVal('Internal_validation_1')['MCC'] + # Activity percentage + neg = round((len([x for x in activity if x <= 0]) / training_set_size) * 100) + pos = round(100 - neg) + + result['Val_internal'] = f'Sensitivity:{Sensitivity}, Specificity:{Specificity}, MCC: {MCC}' + result['Training_set'] = f'{training_set_size} compounds ({pos}% positive, {neg}% negative)' + + return True,{'status':'Review','comments':'Pending review','Information':[result]} def verify (endpoint, version=None): + result = {} + # 1.0 Data checking: activity + success, result['activity'] = verify_SDFile_activity(endpoint, version) + if not success: + return False,result + + # 1.1 Data cheking: Check the validity of the structure provided + success, result['model'] = verify_model(endpoint, version) + if not success: + return False,result + # save data checking step + datachecking = {'Data checking':result} + + # 2.0 Model testing: Check library result = {} - success, result['documentation'] = verify_documentation (endpoint, version) - #success, result['model'] = verify_model(endpoint, version) - + success,result['libraries'] = verify_library(endpoint,version) if not success: - return False, result + return False,result - success, result['data'] = verify_data (endpoint, version) + # save model testing step + modeltesting = {'Model testing':result} + datachecking.update(modeltesting) + + # 3- Documentation: required fields + result = {} + success, result['fields'] = verify_documentation (endpoint, version) if not success: return False, result - - success, result['prediction'] = verify_prediction (endpoint, version) - + + success,result['ExecSummary'] = verify_ExecSummary(endpoint, version) if not success: return False, result + # save documentation step + documentation = {'Documentation': result} - + datachecking.update(documentation) # concatenates the 3 steps meta_path = utils.model_path(endpoint, version) - verification_file = os.path.join(meta_path, 'verification.pkl') - - #Save in the model folder verification.pkl - file = open(verification_file,"wb") - pickle.dump(result,file) - file.close() - LOG.info(f'Save verification.pkl file \n') - - show_result(result) + verification_path = os.path.join(meta_path, 'verification.yaml') + + #Save in the model folder verification.yaml + with open(verification_path,'w') as file: + yaml.dump(datachecking,file) + + + # show first step of verification process + # show_result(datachecking['Data checking']) - return True, result + return True, datachecking def get_verification(endpoint,version): ''' - Retrieves the model verification if it exists + Retrieves the model verification ''' verification = False meta_path = utils.model_path(endpoint, version) - verification_file = os.path.join(meta_path, 'verification.pkl') + verification_path = os.path.join(meta_path, 'verification.yaml') - if os.path.isfile(verification_file): - file = open(verification_file,"rb") - verification = pickle.load(file) - file.close() + if os.path.isfile(verification_path): + with open(verification_path,'r') as file: + verification = yaml.load(file,Loader=yaml.FullLoader) + return True,verification return False - - -def show_result(result): +def getActivity(endpoint, version=None): ''' - Shows the model verification in the terminal + Return the list of molecules with their activity ''' - if result: - # HEADERS - print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n") + # I check that the model label exists only in the first function of the verification process. + # to avoid rechecking it in the following steps + if endpoint is None: + return False, 'Empty model label' + + param = None + meta_path = utils.model_path(endpoint, version) + param_file_name = os.path.join(meta_path, 'parameters.yaml') + ifile = os.path.join(meta_path,'training_series') + with open(param_file_name,'r') as pfile: + param = yaml.safe_load(pfile) + + # Initiate a RDKit SDFile iterator to process the molecules one by one + suppl = Chem.SDMolSupplier(ifile,sanitize=True) + + # check if the activity label is defined + if param['SDFile_activity']['value'] is None: + return False,'The activity field is not specified' + + # Iterate for every molecule inside the SDFile + bio = None + obj_num = 0 + result = [] + + for mol in suppl: + if mol is None: + LOG.error(f'(@extractInformaton) Unable to process molecule #{obj_num+1}' + f' in file {ifile}') + continue + + # extract the molecule name, using a sdfileutils algorithm + name = sdfutils.getName( + mol,count=obj_num, field=param['SDFile_name']['value']) + # extract biological information (Activity) + bio = sdfutils.getVal(mol,param['SDFile_activity']['value']) + result.append({ + 'name':name, + 'Activity':bio + }) + + obj_num +=1 + + return True,result + +def getSmilesByApi(response,name): + token = response.json()['access_token'] + # refresh_token = response.json()['refresh_token'] + headers = {'Authorization': f'Bearer {token}'} + for _ in range(3): + # acces to Chemistry Service + r = requests.get("https://test.toxhub.etransafe.eu/chemistryservice.kh.svc/v1/name_to_structure",verify=False,params={'name':name}, headers=headers) + + if r.status_code == 200: + if 'result' in r.json(): + return r.json()['result'][0]['smiles'] + + print(r.json()['Empty response']+name) + return None + if r.status_code == 401: + print('failed to reconnect') + +def connect_api(): + + KC_URL = config('KC_URL') + KC_USERNAME = config('KC_USERNAME') + PASSWORD = config('PASSWORD') + CLIENT_SECRET = config('CLIENT_SECRET') + + #get token + payload = f"grant_type=password&client_id=knowledge-hub&client_secret={CLIENT_SECRET}&username={KC_USERNAME}" + \ + f"&password={PASSWORD}" + + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + response = requests.post( + f'{KC_URL}/auth/realms/KH/protocol/openid-connect/token', + data=payload, + headers=headers, + verify=False, + ) + if response.status_code != 200: + LOG.error(response.status_code) + return None + + LOG.info('Succesfully connection') + return response + +#pending changes: improve scalability +#currently it is only useful for the first step of verification. +# def show_result(result): +# ''' +# Shows the model verification in the terminal +# ''' +# if result: +# # HEADERS +# print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n") - for x in result: - information = " ".join(result[x]['Information']) - print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information)) - else: - LOG.error("Unable to print verification result") +# for x in result: +# information = " ".join(result[x]['Information']) +# print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information)) +# else: +# LOG.error("Unable to print verification result")