From e4e8f69a1dc6d44c0eb059e4cc490aad6d9740c8 Mon Sep 17 00:00:00 2001 From: Adrian Cabrera Date: Thu, 20 Jan 2022 11:16:23 +0100 Subject: [PATCH 01/10] implementation of useful functions --- flame/util/utils.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/flame/util/utils.py b/flame/util/utils.py index 5ade5a0a..717ac8ea 100644 --- a/flame/util/utils.py +++ b/flame/util/utils.py @@ -22,7 +22,8 @@ __modules__ = None -import os +import os +from dotenv import load_dotenv import sys import yaml import random @@ -33,6 +34,9 @@ import numpy as np from flame.util import get_logger +from knowledgehub.api import KnowledgeHubAPI + +load_dotenv() LOG = get_logger(__name__) @@ -439,4 +443,25 @@ def isFingerprint (md): if md in fplist: return True - return False + return False + + +def connect_api(): + + api = KnowledgeHubAPI(server='TEST', client_secret=os.getenv('CLIENT_SECRET')) + api.login(os.getenv("USER_TEST"),os.getenv('PSWD_TEST')) + + return api + + + +def getSmilesByAPI(api,name): + + try: + smiles = api.ChemistryService().getCompoundByName(name) + except: + smiles = 0 + + return smiles + + From 0d629e980e4b560a60d86661cc629f9d5299b436 Mon Sep 17 00:00:00 2001 From: Adrian Cabrera Date: Tue, 1 Feb 2022 18:20:04 +0100 Subject: [PATCH 02/10] first version 2-model testing --- flame/util/verify.py | 156 ++++++++++++++++++++++++++++--------------- 1 file changed, 102 insertions(+), 54 deletions(-) diff --git a/flame/util/verify.py b/flame/util/verify.py index a6292afd..dead353b 100644 --- a/flame/util/verify.py +++ b/flame/util/verify.py @@ -1,3 +1,26 @@ +#! -*- coding: utf-8 -*- + +# Description Verification process +# +# Authors: Manuel Pastor (manuel.pastor@upf.edu) +# Adrian Cabrera +# +# Copyright 2018 Manuel Pastor +# +# This file is part of Flame +# +# Flame is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation version 3. +# +# Flame is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Flame. If not, see . + from flame.documentation import Documentation from flame.util import utils,get_logger import os @@ -7,6 +30,7 @@ LOG = get_logger(__name__) +# 1-Data cheking: Documentation def verify_documentation (endpoint, version=None): ''' Check that the required fields are completed @@ -30,19 +54,18 @@ def verify_documentation (endpoint, version=None): result = {'status':'Failed','comments':'fields not completed','Information':fields} else: result = {'status':'Passed','comments':'All fields required are completed','Information':[]} - + return True,result - - + +# 1-Data cheking: data # Manually verification -# TO DO def verify_data (endpoint, version=None): + '''TO DO''' return True, {'status':'Passed','comments':'','Information':['Manually verification',]} - -# TO DO +# 1-Data cheking: prediction def verify_prediction (endpoint, version=None): - + ''' TO DO ''' meta_path = utils.model_path(endpoint, version) training_file = os.path.join(meta_path, 'training_series') if not os.path.isfile(training_file): @@ -50,66 +73,91 @@ def verify_prediction (endpoint, version=None): return True, {'status':'Passed','comments':'','Information':[]} +# 2- Model testing +def verify_model(endpoint, version= None): + ''' TO DO''' + doc = Documentation(endpoint, version) + list_mols = doc.get_mols() + api = utils.connect_api() + count = 1 + invalid = [] + for mol in list_mols: + toxhub_smiles = utils.getSmilesByAPI(api,mol) + if toxhub_smiles: + fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(list_mols[mol])),Chem.RDKFingerprint(Chem.MolFromSmiles(toxhub_smiles)) + similarity = DataStructs.TanimotoSimilarity(fp1,fp2) + if similarity < 0.99: + invalid.append({'namedrug':mol,'input_smiles':list_mols[mol],'toxhub_smiles':toxhub_smiles,'similarity':similarity}) + else: + print(count,". Not found:",mol) + count +=1 -# def verify_model(endpoint, version= None): -# doc = Documentation(endpoint, version) -# list_of_mols = doc.get_mols() -# print("Total: ",len(list_of_mols)) -# api = utils.connect_api() -# count = 1 -# countInvalidMols = 1 - -# invalidMols = {} -# for mol in list_of_mols: -# apiSmile = utils.getSmilesByAPI(api,mol) -# aux_smile = apiSmile -# if apiSmile: -# localSmile,apiSmile = Chem.MolFromSmiles(list_of_mols[mol]),Chem.MolFromSmiles(apiSmile) -# fp1,fp2 = Chem.RDKFingerprint(localSmile),Chem.RDKFingerprint(apiSmile) - -# if DataStructs.TanimotoSimilarity(fp1,fp2) < 0.99: -# invalidMols[mol] = [list_of_mols[mol],aux_smile] -# countInvalidMols += 1 -# else: -# print(count,". Not found:",mol) -# count +=1 - -# print("Similarity below 0.99: ",countInvalidMols) -# return True,{'status':'Passed','comments':'','Information':invalidMols} + if invalid: + return True,{'status':'Failed','comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.','Information':invalidMols} -def verify (endpoint, version=None): + return True,{'status':'Passed','comments':'','Information':[]} + + +# 3-Inspection of Model + +def inspection_model(): + + return None + +# 4-Examination of Executive summary + +def executive_summary(): + + return None + +def verify (endpoint, version=None): result = {} + # 1- Data cheking: Documentation success, result['documentation'] = verify_documentation (endpoint, version) - #success, result['model'] = verify_model(endpoint, version) - + if not success: return False, result - + # 1- Data cheking: data success, result['data'] = verify_data (endpoint, version) if not success: return False, result - + # 1- Data cheking: prediction success, result['prediction'] = verify_prediction (endpoint, version) if not success: return False, result - + # save datacheking data + datacheking = {'Data cheking':result} + result = {} + + # 2- Model testing + success, result['model'] = verify_model(endpoint, version) + if not success: + return False, result + + # save model testing data + modeltesting = {'Model testing': result} + + + datacheking.update(modeltesting) # concatenates the dictionary of data cheking and the dictionary of model testing + meta_path = utils.model_path(endpoint, version) verification_file = os.path.join(meta_path, 'verification.pkl') #Save in the model folder verification.pkl file = open(verification_file,"wb") - pickle.dump(result,file) + pickle.dump(datacheking,file) file.close() LOG.info(f'Save verification.pkl file \n') - show_result(result) + # show first step of verification process + show_result(datacheking['Data cheking']) - return True, result + return True, datacheking def get_verification(endpoint,version): @@ -128,21 +176,21 @@ def get_verification(endpoint,version): return False - - +#pending changes: improve scalability +#currently it is only useful for the first step of verification. def show_result(result): - ''' - Shows the model verification in the terminal - ''' - if result: - # HEADERS - print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n") + ''' + Shows the model verification in the terminal + ''' + if result: + # HEADERS + print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n") - for x in result: - information = " ".join(result[x]['Information']) - print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information)) - else: - LOG.error("Unable to print verification result") + for x in result: + information = " ".join(result[x]['Information']) + print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information)) + else: + LOG.error("Unable to print verification result") From 7e9dcede0f0111db733f554b51283790500fd4f1 Mon Sep 17 00:00:00 2001 From: Adrian Cabrera Date: Tue, 1 Feb 2022 19:06:51 +0100 Subject: [PATCH 03/10] documented and refactored code --- flame/documentation.py | 3 ++- flame/util/verify.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/flame/documentation.py b/flame/documentation.py index 002a89c4..5f5f36a4 100644 --- a/flame/documentation.py +++ b/flame/documentation.py @@ -1064,7 +1064,8 @@ def empty_fields(self): return emptyfields - def get_mols(self): + def get_smiles(self): + '''Returns a dictionary with the fields:{(key)name:(value)SMILES}''' return dict(zip(self.conveyor.getVal("obj_nam"),self.conveyor.getVal("SMILES"))) diff --git a/flame/util/verify.py b/flame/util/verify.py index dead353b..4d501f58 100644 --- a/flame/util/verify.py +++ b/flame/util/verify.py @@ -77,23 +77,23 @@ def verify_prediction (endpoint, version=None): def verify_model(endpoint, version= None): ''' TO DO''' doc = Documentation(endpoint, version) - list_mols = doc.get_mols() + smiles_list = doc.get_smiles() api = utils.connect_api() count = 1 invalid = [] - for mol in list_mols: - toxhub_smiles = utils.getSmilesByAPI(api,mol) + for drugname in smiles_list: + toxhub_smiles = utils.getSmilesByAPI(api,drugname) if toxhub_smiles: - fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(list_mols[mol])),Chem.RDKFingerprint(Chem.MolFromSmiles(toxhub_smiles)) + fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(smiles_list[drugname])),Chem.RDKFingerprint(Chem.MolFromSmiles(toxhub_smiles)) similarity = DataStructs.TanimotoSimilarity(fp1,fp2) if similarity < 0.99: - invalid.append({'namedrug':mol,'input_smiles':list_mols[mol],'toxhub_smiles':toxhub_smiles,'similarity':similarity}) + invalid.append({'drugname':drugname,'input_smiles':smiles_list[drugname],'toxhub_smiles':toxhub_smiles,'similarity':similarity}) else: - print(count,". Not found:",mol) + print(count,". Not found:",drugname) count +=1 if invalid: - return True,{'status':'Failed','comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.','Information':invalidMols} + return True,{'status':'Failed','comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.','Information':invalid} return True,{'status':'Passed','comments':'','Information':[]} @@ -107,7 +107,7 @@ def inspection_model(): # 4-Examination of Executive summary def executive_summary(): - + return None From b49185d0515359a37ed3406d36bcf5c2df681217 Mon Sep 17 00:00:00 2001 From: Adrian Cabrera Date: Fri, 4 Feb 2022 20:35:33 +0100 Subject: [PATCH 04/10] change from pickle format to yaml format --- flame/util/verify.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/flame/util/verify.py b/flame/util/verify.py index 4d501f58..300a6275 100644 --- a/flame/util/verify.py +++ b/flame/util/verify.py @@ -25,7 +25,7 @@ from flame.util import utils,get_logger import os from rdkit import Chem,DataStructs -import pickle +import yaml LOG = get_logger(__name__) @@ -146,13 +146,12 @@ def verify (endpoint, version=None): datacheking.update(modeltesting) # concatenates the dictionary of data cheking and the dictionary of model testing meta_path = utils.model_path(endpoint, version) - verification_file = os.path.join(meta_path, 'verification.pkl') - - #Save in the model folder verification.pkl - file = open(verification_file,"wb") - pickle.dump(datacheking,file) - file.close() - LOG.info(f'Save verification.pkl file \n') + verification_path = os.path.join(meta_path, 'verification.yaml') + + #Save in the model folder verification.yaml + with open(verification_path,'w') as file: + yaml.dump(datacheking,file) + # show first step of verification process show_result(datacheking['Data cheking']) @@ -166,12 +165,12 @@ def get_verification(endpoint,version): ''' verification = False meta_path = utils.model_path(endpoint, version) - verification_file = os.path.join(meta_path, 'verification.pkl') + verification_path = os.path.join(meta_path, 'verification.yaml') - if os.path.isfile(verification_file): - file = open(verification_file,"rb") - verification = pickle.load(file) - file.close() + if os.path.isfile(verification_path): + with open(verification_path,'r') as file: + verification = yaml.load(file,Loader=yaml.FullLoader) + return True,verification return False From 6dab8027e35e6d5015440732a9d48ea49025edb4 Mon Sep 17 00:00:00 2001 From: Adrian Cabrera Date: Tue, 15 Feb 2022 13:10:02 +0100 Subject: [PATCH 05/10] date assignment error fixed --- flame/documentation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/flame/documentation.py b/flame/documentation.py index 3e2993f9..56afb81c 100644 --- a/flame/documentation.py +++ b/flame/documentation.py @@ -1082,8 +1082,11 @@ def autocomplete_documentation(self): #Date, Date of model development and Date of QMRF. today = date.today().strftime("%B %d, %Y") - self.fields['Date']['value'] = today - self.fields['Date_of_QMRF']['value'] = today + if not self.fields['Date']['value']: + self.fields['Date']['value'] = today + + if not self.fields['Date_of_QMRF']['value']: + self.fields['Date_of_QMRF']['value'] = today #format, Format used(SDF,TSV) if self.parameters.getVal('input_type') == 'data': From d3b24d766f48f769a0f2d5741b53db0cdd98a47d Mon Sep 17 00:00:00 2001 From: Adrian Cabrera Date: Wed, 23 Feb 2022 20:38:51 +0100 Subject: [PATCH 06/10] implemented the library check and sdfile_Activity --- flame/documentation.py | 13 +- flame/util/utils.py | 29 +--- flame/util/verify.py | 334 +++++++++++++++++++++++++++++------------ 3 files changed, 245 insertions(+), 131 deletions(-) diff --git a/flame/documentation.py b/flame/documentation.py index 56afb81c..2f49ff78 100644 --- a/flame/documentation.py +++ b/flame/documentation.py @@ -1065,12 +1065,15 @@ def empty_fields(self): return emptyfields def get_smiles(self): - '''Returns a dictionary with the fields:{(key)name:(value)SMILES}''' + '''''' - return dict(zip(self.conveyor.getVal("obj_nam"),self.conveyor.getVal("SMILES"))) - - - + #dict(zip(self.conveyor.getVal("obj_nam"),self.conveyor.getVal("SMILES"))) + return self.conveyor.getVal("SMILES") + + def get_names(self): + '''''' + return self.conveyor.getVal("obj_nam") + def autocomplete_documentation(self): """ Auto complete fields in model documentation diff --git a/flame/util/utils.py b/flame/util/utils.py index 6721729e..8c63bd5c 100644 --- a/flame/util/utils.py +++ b/flame/util/utils.py @@ -23,7 +23,6 @@ __modules__ = None import os -from dotenv import load_dotenv import sys import yaml import random @@ -35,11 +34,10 @@ import codecs import string import re - from flame.util import get_logger -from knowledgehub.api import KnowledgeHubAPI +#from knowledgehub.api import KnowledgeHubAPI + -load_dotenv() LOG = get_logger(__name__) @@ -470,25 +468,4 @@ def isFingerprint (md): if md in fplist: return True - return False - - -def connect_api(): - - api = KnowledgeHubAPI(server='TEST', client_secret=os.getenv('CLIENT_SECRET')) - api.login(os.getenv("USER_TEST"),os.getenv('PSWD_TEST')) - - return api - - - -def getSmilesByAPI(api,name): - - try: - smiles = api.ChemistryService().getCompoundByName(name) - except: - smiles = 0 - - return smiles - - + return False \ No newline at end of file diff --git a/flame/util/verify.py b/flame/util/verify.py index 300a6275..2523e029 100644 --- a/flame/util/verify.py +++ b/flame/util/verify.py @@ -21,142 +21,276 @@ # You should have received a copy of the GNU General Public License # along with Flame. If not, see . +import pickle +from flame.stats.base_model import BaseEstimator +from flame.parameters import Parameters from flame.documentation import Documentation +import flame.chem.sdfileutils as sdfutils from flame.util import utils,get_logger import os from rdkit import Chem,DataStructs import yaml +import urllib3 +urllib3.disable_warnings() +import requests +LOG = get_logger(__name__) +try: + from decouple import config +except: + LOG.error('decouple library is not installed.') + LOG.info('pip install python-decouple') -LOG = get_logger(__name__) -# 1-Data cheking: Documentation -def verify_documentation (endpoint, version=None): - ''' - Check that the required fields are completed - ''' - blacklist = ['Species','Limits_applicability','Experimental_protocol','location','description','endpoint_positive','endpoint_negative','raw_data_url','test_set_size','training_set_url','test_set_url','bootstrap','ccp_alpha','criterion','max_depth','max_features','max_leaf_nodes','max_samples','min_impurity_decrease','min_impurity_split','min_samples_leaf','min_samples_split','min_weight_fraction_leaf','n_estimators','n_jobs','oob_score','random_state','verbose','warm_start','confidence','ACP_sampler','KNN_NN','aggregated','aggregation_function','conformal_predictors','normalizing_model','Conformal_mean_interval','Conformal_accuracy','Q2','SDEP','Comments','Other_related_models','Date_of_QMRF','Date_of_QMRF_updates','QMRF_updates','References','QMRF_same_models','Mechanistic_basis','Mechanistic_references','Supporting_information','Comment_on_the_endpoint','Endpoint_data_quality_and_variability','Descriptor_selection','Internal_validation_2','External_validation'] - if endpoint is None: - return False, 'Empty model label' - - # get de model repo path - rdir = utils.model_path(endpoint, version) - if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')): - return False, 'Info file not found' +def verify_model(endpoint, version=None): + ''' ''' + api = connect_api() + + if not isinstance(api, requests.models.Response): + return False,{'status':'Aborted','comments':'Failed connection to External Service'} + invalid = [] + not_found_list = [] doc = Documentation(endpoint, version) + smiles_list = dict(zip(doc.get_names(),doc.get_smiles())) + for drugname,smiles in smiles_list.items(): + ext_service_smiles = getSmilesByApi(api,drugname) + if ext_service_smiles: + fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(smiles_list[drugname])),Chem.RDKFingerprint(Chem.MolFromSmiles(ext_service_smiles)) + similarity = DataStructs.TanimotoSimilarity(fp1,fp2) + if similarity < 0.99: + invalid.append( + { + 'drugname': drugname, + 'input_smiles':smiles, + 'ext_service_smiles':ext_service_smiles, + 'similarity':similarity, + }) + else: + not_found_list.append(drugname) + + if invalid or not_found_list: + return True,{'status':'Failed', + 'comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.', + 'Information':invalid, + 'Extra_Information':not_found_list} + + return True,{'status':'Passed'} - fields = [field for field in doc.empty_fields() if field not in blacklist] - if fields: - result = {'status':'Failed','comments':'fields not completed','Information':fields} - else: - result = {'status':'Passed','comments':'All fields required are completed','Information':[]} + + +def getSmilesByApi(response,name): + token = response.json()['access_token'] + # refresh_token = response.json()['refresh_token'] + headers = {'Authorization': f'Bearer {token}'} + for _ in range(3): + # acces to Chemistry Service + r = requests.get("https://test.toxhub.etransafe.eu/chemistryservice.kh.svc/v1/name_to_structure",verify=False,params={'name':name}, headers=headers) + + if r.status_code == 200: + if 'result' in r.json(): + return r.json()['result'][0]['smiles'] + + print(r.json()['Empty response']+name) + return None + if r.status_code == 401: + print('failed to reconnect') + +def connect_api(): - return True,result + KC_URL = config('KC_URL') + KC_USERNAME = config('KC_USERNAME') + PASSWORD = config('PASSWORD') + CLIENT_SECRET = config('CLIENT_SECRET') + + #get token + payload = f"grant_type=password&client_id=knowledge-hub&client_secret={CLIENT_SECRET}&username={KC_USERNAME}" + \ + f"&password={PASSWORD}" + + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + response = requests.post( + f'{KC_URL}/auth/realms/KH/protocol/openid-connect/token', + data=payload, + headers=headers, + verify=False, + ) + if response.status_code != 200: + LOG.error(response.status_code) + return None + + LOG.info('Succesfully connection') + return response + + + + + +def verify_SDFile_activity(endpoint,version=None): + ''' ''' + # I check that the model label exists only in the first function of the verification process. + # to avoid rechecking it in the following steps + if endpoint is None: + return False, {'status':'Aborted','comments':'Empty model label'} + + param = None + meta_path = utils.model_path(endpoint, version) + parameters_file_name = os.path.join(meta_path, 'parameters.yaml') + ifile = os.path.join(meta_path, 'training_series') + with open(parameters_file_name, 'r') as pfile: + param = yaml.safe_load(pfile) + + # Initiate a RDKit SDFile iterator to process the molecules one by one + suppl = Chem.SDMolSupplier(ifile,sanitize=True) -# 1-Data cheking: data -# Manually verification -def verify_data (endpoint, version=None): - '''TO DO''' - return True, {'status':'Passed','comments':'','Information':['Manually verification',]} - -# 1-Data cheking: prediction -def verify_prediction (endpoint, version=None): - ''' TO DO ''' + # check if the activity label is defined + if param['SDFile_activity']['value'] is None: + return False,{'status':'Aborted','comments':'The activity field is not specified'} + + # Iterate for every molecule inside the SDFile + bio = None + obj_num = 0 + list_mol_names = [] + for mol in suppl: + if mol is None: + LOG.error(f'(@extractInformaton) Unable to process molecule #{obj_num+1}' + f' in file {ifile}') + continue + + # extract the molecule name, using a sdfileutils algorithm + name = sdfutils.getName( + mol, count=obj_num, field=param['SDFile_name']['value']) + # extract biological information (activity) + bio = sdfutils.getVal(mol, param['SDFile_activity']['value']) + + if bio is None: + list_mol_names.append(name) + + obj_num +=1 + + if list_mol_names: + result = {'status':'Failed','comments':'The activity must be present in all molecules.','Information':list_mol_names} + else: + result = {'status':'Passed','comments':'','Information':list_mol_names} + + return True,result + +def verify_library(endpoint, version=None): + '''''' + param = None meta_path = utils.model_path(endpoint, version) - training_file = os.path.join(meta_path, 'training_series') - if not os.path.isfile(training_file): - return True, {'status':'Failed','comments':'','Information':[]} + parameters_file_name = os.path.join(meta_path, 'parameters.yaml') + with open(parameters_file_name, 'r') as pfile: + param = yaml.safe_load(pfile) + + model_pkl = os.path.join(param['model_path']['value'],'estimator.pkl') + LOG.debug(f'Loading model from pickle file, path: {model_pkl}') + try: + with open(model_pkl,"rb") as input_file: + dict_estimator = pickle.load(input_file) - return True, {'status':'Passed','comments':'','Information':[]} + except FileNotFoundError: + LOG.error(f'No valid model estimator found at: {model_pkl}') + return False, {'status':'Aborted','comments':f'No valid model estimator found at: {model_pkl}'} -# 2- Model testing -def verify_model(endpoint, version= None): - ''' TO DO''' - doc = Documentation(endpoint, version) - smiles_list = doc.get_smiles() - api = utils.connect_api() - count = 1 - invalid = [] - for drugname in smiles_list: - toxhub_smiles = utils.getSmilesByAPI(api,drugname) - if toxhub_smiles: - fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(smiles_list[drugname])),Chem.RDKFingerprint(Chem.MolFromSmiles(toxhub_smiles)) - similarity = DataStructs.TanimotoSimilarity(fp1,fp2) - if similarity < 0.99: - invalid.append({'drugname':drugname,'input_smiles':smiles_list[drugname],'toxhub_smiles':toxhub_smiles,'similarity':similarity}) - else: - print(count,". Not found:",drugname) - count +=1 + # check if the pickle was created with a compatible version (currently, 1) + if dict_estimator['version'] is not 1: + return True, {'status':'Failed','comments':'Incompatible model version','Information':[]} - if invalid: - return True,{'status':'Failed','comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.','Information':invalid} + # check if the libraries used to build this model are similar to current libraries + if 'libraries' not in dict_estimator: + return False, {'status':'Failed', + 'comments':'The libraries with which the model was built have not been found in the estimator.pkl'} - return True,{'status':'Passed','comments':'','Information':[]} + success,results = utils.compatible_modules(dict_estimator['libraries']) + if not success: + return True,{'status':'Failed','comments':'Incompatible libraries have been found','Information':results} + else: + return True,{'status':'Passed'} + + -# 3-Inspection of Model -def inspection_model(): - return None +def verify_documentation (endpoint, version=None): + ''' + Check that the required fields are completed + ''' + blacklist = ['Species','Limits_applicability','Experimental_protocol','location','description','endpoint_positive','endpoint_negative','raw_data_url', + 'test_set_size','training_set_url','test_set_url','bootstrap','ccp_alpha','criterion','max_depth','max_features','max_leaf_nodes','max_samples', + 'min_impurity_decrease','min_impurity_split','min_samples_leaf','min_samples_split','min_weight_fraction_leaf','n_estimators','n_jobs','oob_score', + 'random_state','verbose','warm_start','confidence','ACP_sampler','KNN_NN','aggregated','aggregation_function','conformal_predictors','normalizing_model', + 'Conformal_mean_interval','Conformal_accuracy','Q2','SDEP','Comments','Other_related_models','Date_of_QMRF','Date_of_QMRF_updates','QMRF_updates', + 'References','QMRF_same_models','Mechanistic_basis','Mechanistic_references','Supporting_information','Comment_on_the_endpoint','Endpoint_data_quality_and_variability', + 'Descriptor_selection','Internal_validation_2','External_validation'] -# 4-Examination of Executive summary + # get de model repo path + rdir = utils.model_path(endpoint, version) + if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')): + return False, 'Info file not found' -def executive_summary(): + doc = Documentation(endpoint, version) + fields = [field for field in doc.empty_fields() if field not in blacklist] - return None + if fields: + result = {'status':'Failed','comments':'Missing required information.','Information':fields} + else: + result = {'status':'Passed','comments':'All fields required are completed.','Information':[]} + return True,result def verify (endpoint, version=None): result = {} - # 1- Data cheking: Documentation - success, result['documentation'] = verify_documentation (endpoint, version) - - if not success: - return False, result - # 1- Data cheking: data - success, result['data'] = verify_data (endpoint, version) + # 1.0 Data checking: activity + success, result['activity'] = verify_SDFile_activity(endpoint, version) if not success: - return False, result - # 1- Data cheking: prediction - success, result['prediction'] = verify_prediction (endpoint, version) - + return False,result + + # 1.1 Data cheking: Check the validity of the structure provided + success, result['model'] = verify_model(endpoint, version) + if not success: + return False,result + + # save data checking step + datachecking = {'Data checking':result} + + # 2.0 Model testing: Check library + result = {} + success,result['libraries'] = verify_library(endpoint,version) if not success: - return False, result + return False,result - # save datacheking data - datacheking = {'Data cheking':result} + # save model testing step + modeltesting = {'Model testing':result} - result = {} + datachecking.update(modeltesting) - # 2- Model testing - success, result['model'] = verify_model(endpoint, version) - if not success: + # 3- Documentation: required fields + result = {} + success, result['fields'] = verify_documentation (endpoint, version) + if not success: return False, result - # save model testing data - modeltesting = {'Model testing': result} - - - datacheking.update(modeltesting) # concatenates the dictionary of data cheking and the dictionary of model testing - + # save documentation step + documentation = {'Documentation': result} + + datachecking.update(documentation) # concatenates the 3 steps meta_path = utils.model_path(endpoint, version) verification_path = os.path.join(meta_path, 'verification.yaml') #Save in the model folder verification.yaml with open(verification_path,'w') as file: - yaml.dump(datacheking,file) + yaml.dump(datachecking,file) + - # show first step of verification process - show_result(datacheking['Data cheking']) + # show_result(datachecking['Data checking']) - return True, datacheking + return True, datachecking def get_verification(endpoint,version): @@ -177,19 +311,19 @@ def get_verification(endpoint,version): #pending changes: improve scalability #currently it is only useful for the first step of verification. -def show_result(result): - ''' - Shows the model verification in the terminal - ''' - if result: - # HEADERS - print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n") +# def show_result(result): +# ''' +# Shows the model verification in the terminal +# ''' +# if result: +# # HEADERS +# print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n") - for x in result: - information = " ".join(result[x]['Information']) - print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information)) - else: - LOG.error("Unable to print verification result") +# for x in result: +# information = " ".join(result[x]['Information']) +# print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information)) +# else: +# LOG.error("Unable to print verification result") From a110e7b8337f6e958253daf15eb14168c1dc926c Mon Sep 17 00:00:00 2001 From: Adrian Cabrera Date: Fri, 25 Feb 2022 11:25:36 +0100 Subject: [PATCH 07/10] improved errors control --- flame/util/verify.py | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/flame/util/verify.py b/flame/util/verify.py index 2523e029..e66a577d 100644 --- a/flame/util/verify.py +++ b/flame/util/verify.py @@ -22,8 +22,6 @@ # along with Flame. If not, see . import pickle -from flame.stats.base_model import BaseEstimator -from flame.parameters import Parameters from flame.documentation import Documentation import flame.chem.sdfileutils as sdfutils from flame.util import utils,get_logger @@ -42,18 +40,19 @@ LOG.info('pip install python-decouple') - - def verify_model(endpoint, version=None): ''' ''' api = connect_api() - + invalid = [] + not_found_list = [] if not isinstance(api, requests.models.Response): return False,{'status':'Aborted','comments':'Failed connection to External Service'} - invalid = [] - not_found_list = [] - doc = Documentation(endpoint, version) + try: + doc = Documentation(endpoint, version) + except: + return False,{'status':'Aborted','comments':f'{endpoint} documentation.yaml not found.'} + smiles_list = dict(zip(doc.get_names(),doc.get_smiles())) for drugname,smiles in smiles_list.items(): ext_service_smiles = getSmilesByApi(api,drugname) @@ -73,15 +72,13 @@ def verify_model(endpoint, version=None): if invalid or not_found_list: return True,{'status':'Failed', - 'comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.', + 'comments':'The chemical structure of the following drugs is different from that obtained in External Service.', 'Information':invalid, 'Extra_Information':not_found_list} return True,{'status':'Passed'} - - def getSmilesByApi(response,name): token = response.json()['access_token'] # refresh_token = response.json()['refresh_token'] @@ -125,9 +122,6 @@ def connect_api(): return response - - - def verify_SDFile_activity(endpoint,version=None): ''' ''' # I check that the model label exists only in the first function of the verification process. @@ -210,9 +204,6 @@ def verify_library(endpoint, version=None): return True,{'status':'Failed','comments':'Incompatible libraries have been found','Information':results} else: return True,{'status':'Passed'} - - - def verify_documentation (endpoint, version=None): @@ -227,11 +218,6 @@ def verify_documentation (endpoint, version=None): 'References','QMRF_same_models','Mechanistic_basis','Mechanistic_references','Supporting_information','Comment_on_the_endpoint','Endpoint_data_quality_and_variability', 'Descriptor_selection','Internal_validation_2','External_validation'] - # get de model repo path - rdir = utils.model_path(endpoint, version) - if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')): - return False, 'Info file not found' - doc = Documentation(endpoint, version) fields = [field for field in doc.empty_fields() if field not in blacklist] From 89f23b60c8e3d1c43acf28ff8806f20a1acba8ba Mon Sep 17 00:00:00 2001 From: Adrian Cabrera Date: Mon, 28 Feb 2022 16:49:22 +0100 Subject: [PATCH 08/10] improvement of the code_dumpExcel() --- flame/documentation.py | 250 ++++++++++++++++++++--------------------- 1 file changed, 124 insertions(+), 126 deletions(-) diff --git a/flame/documentation.py b/flame/documentation.py index 662bfca6..2084316f 100644 --- a/flame/documentation.py +++ b/flame/documentation.py @@ -401,149 +401,147 @@ def dumpYAML (self): return (yaml_out) - def dumpExcel (self,oname): - - # openpyxl should be installed in the environment - # pip install openpyxl - - from openpyxl import Workbook - from openpyxl.styles import Font,NamedStyle,Alignment - # from openpyxl.comments import Comment - - wb = Workbook() - ws = wb.active - ws.title = f"Model {self.model} documentation" - alignment_style = Alignment(vertical='top',wrapText=True) - - # Label Style - Label = NamedStyle(name="Label") - Label.font = Font(name='Calibri',size=11,bold=True) - Label.alignment = alignment_style - - ws.column_dimensions['A'].width = 25.10 - ws.column_dimensions['B'].width = 28.00 - ws.column_dimensions['C'].width = 60.00 - ws.column_dimensions['D'].width = 60.00 + def dumpExcel(self,oname): + - # sections of the document, specifying the document keys which will be listed - sections = [('General model information',['ID', 'Version', 'Model_title', 'Model_description', 'Keywords', 'Contact', 'Institution', 'Date', 'Endpoint', - 'Endpoint_units', 'Interpretation', 'Dependent_variable', 'Species', - 'Limits_applicability', 'Experimental_protocol', 'Model_availability', - 'Data_info']), - ('Algorithm and software',['Algorithm', 'Software', 'Descriptors', 'Algorithm_settings', - 'AD_method', 'AD_parameters', 'Goodness_of_fit_statistics', - 'Internal_validation_1', 'Internal_validation_2', 'External_validation', - 'Comments']), - ('Other information',['Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates', - 'QMRF_updates', 'References', 'QMRF_same_models', 'Mechanistic_basis', - 'Mechanistic_references', 'Supporting_information', 'Comment_on_the_endpoint', - 'Endpoint_data_quality_and_variability', 'Descriptor_selection'])] + # openpyxl should be installed in the environment + # pip install openpyxl - #Save the position and name of the label for the first and last section - position = [] - name = [sections[0][1][0],'Other Comments'] - - count = 1 - for isection in sections: + from openpyxl import Workbook + from openpyxl.styles import Font,NamedStyle,Alignment + # from openpyxl.comments import Comment - for ik in isection[1]: - - label_k = ik.replace('_',' ') + wb = Workbook() + ws = wb.active + ws.title = f"Model {self.model} documentation" + alignment_style = Alignment(vertical='top',wrapText=True) - if label_k == 'Internal validation 2' or label_k == 'External validation': - ws[f"A{count}"] = label_k - ws[f'A{count}'].style = Label - else: - ws[f"B{count}"] = label_k - ws[f"B{count}"].style = Label - - if ik in self.fields: - # set defaults for value - ivalue= '' - #v is the selected entry in the documentation dictionary - v = self.fields[ik] + # Label Style + Label = NamedStyle(name="Label") + Label.font = Font(name='Calibri',size=11,bold=True) + Label.alignment = alignment_style + + ws.column_dimensions['A'].width = 25.10 + ws.column_dimensions['B'].width = 28.00 + ws.column_dimensions['C'].width = 60.00 + ws.column_dimensions['D'].width = 60.00 + + # sections of the document, specifying the document keys which will be listed + sections = [('General model information',['ID', 'Version', 'Model_title', 'Model_description', 'Keywords', 'Contact', 'Institution', 'Date', 'Endpoint', + 'Endpoint_units', 'Interpretation', 'Dependent_variable', 'Species', + 'Limits_applicability', 'Experimental_protocol', 'Model_availability', + 'Data_info']), + ('Algorithm and software',['Algorithm', 'Software', 'Descriptors', 'Algorithm_settings', + 'AD_method', 'AD_parameters', 'Goodness_of_fit_statistics', + 'Internal_validation_1', 'Internal_validation_2', 'External_validation', + 'Comments']), + ('Other information',['Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates', + 'QMRF_updates', 'References', 'QMRF_same_models', 'Mechanistic_basis', + 'Mechanistic_references', 'Supporting_information', 'Comment_on_the_endpoint', + 'Endpoint_data_quality_and_variability', 'Descriptor_selection'])] + + #Save the position and name of the label for the first and last section + position = [] + name = [sections[0][1][0],'Other Comments'] + + count = 1 + for isection in sections: + + for ik in isection[1]: + + label_k = ik.replace('_',' ') + + if label_k in ['Internal validation 2', 'External validation']: + ws[f"A{count}"] = label_k + ws[f'A{count}'].style = Label + else: + ws[f"B{count}"] = label_k + ws[f"B{count}"].style = Label + + if ik in self.fields: + # set defaults for value + ivalue= '' + #v is the selected entry in the documentation dictionary + v = self.fields[ik] ## newest parameter formats are extended and contain ## rich metainformation for each entry - if 'value' in v: - ivalue = v['value'] - - if isinstance(ivalue,dict): + if 'value' in v: + ivalue = v['value'] - ws[f"A{count}"] = label_k - ws[f"A{count}"].style = Label - - end = (count)+(len(ivalue)-1) - - for intk in ivalue: - label_ik = intk.replace('_',' ') - # label_ik = intk.replace('_f', '').replace('_', ' ') - ws[f'B{count}'] = label_ik - ws[f'B{count}'].style = Label - - + if isinstance(ivalue,dict): + + ws[f"A{count}"] = label_k + ws[f"A{count}"].style = Label + + end = (count)+(len(ivalue)-1) + + for intk in ivalue: + label_ik = intk.replace('_',' ') + # label_ik = intk.replace('_f', '').replace('_', ' ') + ws[f'B{count}'] = label_ik + ws[f'B{count}'].style = Label + + + intv = ivalue[intk] + if not isinstance(intv,dict): + + iivalue = intv + if iivalue is None: + iivalue = " " + else: intv = ivalue[intk] - if not isinstance(intv,dict): - - iivalue = intv - if iivalue is None: - iivalue = " " - else: - intv = ivalue[intk] + iivalue = '' + if 'value' in intv: + iivalue = intv["value"] + if iivalue is None: iivalue = '' - if 'value' in intv: - iivalue = intv["value"] - if iivalue is None: - iivalue = '' - - ws[f'D{count}'] = intv['description'] - ws[f'D{count}'].alignment = alignment_style - - - ws[f'C{count}'] = f'{str(iivalue)}' - ws[f'C{count}'].font = Font(name='Calibri',size=11,color='3465a4') - ws[f'C{count}'].alignment = alignment_style - - ws.merge_cells(f'A{count}:A{end}') - - count +=1 - - else: - ws[f'D{count}'] = v['description'] - ws[f'D{count}'].alignment = alignment_style + ws[f'D{count}'] = intv['description'] + ws[f'D{count}'].alignment = alignment_style - if label_k == 'Experimental protocol' or label_k == 'Comments': - position.append(count) - - if ivalue is None: - ivalue = '' - ws[f'C{count}'] = f'{str(ivalue)}' + ws[f'C{count}'] = f'{str(iivalue)}' ws[f'C{count}'].font = Font(name='Calibri',size=11,color='3465a4') ws[f'C{count}'].alignment = alignment_style - - count += 1 - - itr = 0 - for i in position: - if itr == 0: - ws[f'A{1}'] = name[itr] - ws[f"A{1}"].style = Label - ws.merge_cells(f'A{1}:A{i}') - else: - ws[f'A{i}'] = name[itr] - ws[f"A{i}"].style = Label - ws.merge_cells(f'A{i}:A{count-1}') + ws.merge_cells(f'A{count}:A{end}') - itr +=1 + count +=1 - try: - wb.save(oname) - except: - return False, f'error saving document as {oname}' - - return True, 'OK' + else: + + ws[f'D{count}'] = v['description'] + ws[f'D{count}'].alignment = alignment_style + + if label_k in ['Experimental protocol', 'Comments']: + position.append(count) + + if ivalue is None: + ivalue = '' + + ws[f'C{count}'] = f'{str(ivalue)}' + ws[f'C{count}'].font = Font(name='Calibri',size=11,color='3465a4') + ws[f'C{count}'].alignment = alignment_style + + + count += 1 + + for itr, i in enumerate(position): + if itr == 0: + ws['A1'] = name[itr] + ws['A1'].style = Label + ws.merge_cells(f'A1:A{i}') + else: + ws[f'A{i}'] = name[itr] + ws[f"A{i}"].style = Label + ws.merge_cells(f'A{i}:A{count-1}') + + try: + wb.save(oname) + except: + return False, f'error saving document as {oname}' + + return True, 'OK' def dumpWORD (self, oname): From 431bc0007ff30a2b1ced12655d5baeedb8ab0b67 Mon Sep 17 00:00:00 2001 From: Adrian Cabrera Date: Mon, 7 Mar 2022 17:11:32 +0100 Subject: [PATCH 09/10] implemented execSummary --- flame/util/verify.py | 296 +++++++++++++++++++++++++++++-------------- 1 file changed, 200 insertions(+), 96 deletions(-) diff --git a/flame/util/verify.py b/flame/util/verify.py index e66a577d..04666f37 100644 --- a/flame/util/verify.py +++ b/flame/util/verify.py @@ -24,6 +24,7 @@ import pickle from flame.documentation import Documentation import flame.chem.sdfileutils as sdfutils +from flame.parameters import Parameters from flame.util import utils,get_logger import os from rdkit import Chem,DataStructs @@ -40,11 +41,27 @@ LOG.info('pip install python-decouple') +def verify_SDFile_activity(endpoint,version=None): + ''' ''' + success, mols = getActivity(endpoint,version) + if not success: + return False,{'status':'Aborted','comments':mols} + + mols = [x for x in mols if x['Activity'] is None] + + if mols: + result = {'status':'Failed','comments':'The activity must be present in all molecules.','Information':mols} + else: + result = {'status':'Passed'} + + return True,result + def verify_model(endpoint, version=None): ''' ''' api = connect_api() invalid = [] not_found_list = [] + if not isinstance(api, requests.models.Response): return False,{'status':'Aborted','comments':'Failed connection to External Service'} @@ -78,101 +95,11 @@ def verify_model(endpoint, version=None): return True,{'status':'Passed'} - -def getSmilesByApi(response,name): - token = response.json()['access_token'] - # refresh_token = response.json()['refresh_token'] - headers = {'Authorization': f'Bearer {token}'} - for _ in range(3): - # acces to Chemistry Service - r = requests.get("https://test.toxhub.etransafe.eu/chemistryservice.kh.svc/v1/name_to_structure",verify=False,params={'name':name}, headers=headers) - - if r.status_code == 200: - if 'result' in r.json(): - return r.json()['result'][0]['smiles'] - - print(r.json()['Empty response']+name) - return None - if r.status_code == 401: - print('failed to reconnect') - -def connect_api(): - - KC_URL = config('KC_URL') - KC_USERNAME = config('KC_USERNAME') - PASSWORD = config('PASSWORD') - CLIENT_SECRET = config('CLIENT_SECRET') - - #get token - payload = f"grant_type=password&client_id=knowledge-hub&client_secret={CLIENT_SECRET}&username={KC_USERNAME}" + \ - f"&password={PASSWORD}" - - headers = {'Content-Type': 'application/x-www-form-urlencoded'} - response = requests.post( - f'{KC_URL}/auth/realms/KH/protocol/openid-connect/token', - data=payload, - headers=headers, - verify=False, - ) - if response.status_code != 200: - LOG.error(response.status_code) - return None - - LOG.info('Succesfully connection') - return response - - -def verify_SDFile_activity(endpoint,version=None): - ''' ''' - # I check that the model label exists only in the first function of the verification process. - # to avoid rechecking it in the following steps - if endpoint is None: - return False, {'status':'Aborted','comments':'Empty model label'} - - param = None - meta_path = utils.model_path(endpoint, version) - parameters_file_name = os.path.join(meta_path, 'parameters.yaml') - ifile = os.path.join(meta_path, 'training_series') - with open(parameters_file_name, 'r') as pfile: - param = yaml.safe_load(pfile) - - # Initiate a RDKit SDFile iterator to process the molecules one by one - suppl = Chem.SDMolSupplier(ifile,sanitize=True) - - # check if the activity label is defined - if param['SDFile_activity']['value'] is None: - return False,{'status':'Aborted','comments':'The activity field is not specified'} - - # Iterate for every molecule inside the SDFile - bio = None - obj_num = 0 - list_mol_names = [] - for mol in suppl: - if mol is None: - LOG.error(f'(@extractInformaton) Unable to process molecule #{obj_num+1}' - f' in file {ifile}') - continue - - # extract the molecule name, using a sdfileutils algorithm - name = sdfutils.getName( - mol, count=obj_num, field=param['SDFile_name']['value']) - # extract biological information (activity) - bio = sdfutils.getVal(mol, param['SDFile_activity']['value']) - - if bio is None: - list_mol_names.append(name) - - obj_num +=1 - - if list_mol_names: - result = {'status':'Failed','comments':'The activity must be present in all molecules.','Information':list_mol_names} - else: - result = {'status':'Passed','comments':'','Information':list_mol_names} - - return True,result - def verify_library(endpoint, version=None): - '''''' + ''' + Check that the current libraries are the same + as those with which the model was created. + ''' param = None meta_path = utils.model_path(endpoint, version) parameters_file_name = os.path.join(meta_path, 'parameters.yaml') @@ -205,6 +132,18 @@ def verify_library(endpoint, version=None): else: return True,{'status':'Passed'} +def predict_train_series(endpoint, version=None): + ''' + Predict training_series and compare if the model quality + results are the same as the fitting. + ''' + return True,None + +def predict_benchmarking_dataset(): + ''' + Prediction of a benchmarking dataset + ''' + return True,None def verify_documentation (endpoint, version=None): ''' @@ -228,9 +167,80 @@ def verify_documentation (endpoint, version=None): return True,result -def verify (endpoint, version=None): +def verify_ExecSummary(endpoint,version=None): + ''' + Collects the fields required to generate the summary. + ''' result = {} + doc = Documentation(endpoint,version) + param = Parameters() + meta_path = utils.model_path(endpoint, version) + param_file_name = os.path.join(meta_path, 'parameters.yaml') + + try: + with open(param_file_name, 'r') as pfile: + param.p = yaml.safe_load(pfile) + except Exception as e: + return False, {'status':'Aborted','comments':e} + + success, mols = getActivity(endpoint,version) + activity = [x['Activity'] for x in mols if x['Activity'] is not None] + + if not success: + return False,{'status':'Aborted','comments':mols} + + # get dictionaries + algorithm_dict = doc.getDict('Algorithm') + descriptors_dict = doc.getDict('Descriptors') + + #section title + model_type,date = algorithm_dict['type'],doc.getVal('Date') + result['title'] = f'{endpoint} prediction based on a 3D {model_type} model. {date}' + + #section Interpretation + result['Interpretation'] = doc.getVal('Interpretation') + + #Methodology + algorithm = algorithm_dict['algorithm'] + descriptors = ",".join(descriptors_dict['descriptors']) + + #extra information in Methodology section + selection_method = descriptors_dict['selection_method'] + scaling = descriptors_dict['scaling'] + if all([scaling,selection_method]): + result['Methodology'] = f'A {model_type} model, was built using {algorithm} method and {descriptors} molecular descriptors [with {selection_method}][scaled using {scaling}]' + + result['Methodology'] = f'A {model_type} model, was built using {algorithm} method and {descriptors} molecular descriptors.' + + training_set_size = doc.getVal('Data_info')['training_set_size']['value'] + #section Val.Internal quantitative model + if param.getVal('quantitative')['value']: + r2 = doc.getVal('Goodness_of_fit_statistics')['R2'] + q2 = doc.getVal('Internal_validation_1')['Q2'] + sdep = doc.getVal('Internal_validation_1')['SDEP'] + min_activity = round(min(activity),2) + max_activity = round(max(activity),2) + avg = round(sum(activity)/len(activity),2) + + result['Val_internal'] = f'r2 {r2}, q2 {q2}, SDEP {sdep}' + result['Training_set'] = f'{training_set_size} compounds (min. {min_activity},max. {max_activity} average:{avg})' + + else: + #section Val.Internal qualitative model + Sensitivity = doc.getVal('Internal_validation_1')['Sensitivity'] + Specificity = doc.getVal('Internal_validation_1')['Specificity'] + MCC = doc.getVal('Internal_validation_1')['MCC'] + # Activity percentage + neg = round((len([x for x in activity if x <= 0]) / training_set_size) * 100) + pos = round(100 - neg) + result['Val_internal'] = f'Sensitivity:{Sensitivity}, Specificity:{Specificity}, MCC: {MCC}' + result['Training_set'] = f'{training_set_size} compounds ({pos}% positive, {neg}% negative)' + + return True,{'status':'Review','comments':'Pending review','Information':[result]} + +def verify (endpoint, version=None): + result = {} # 1.0 Data checking: activity success, result['activity'] = verify_SDFile_activity(endpoint, version) if not success: @@ -260,6 +270,10 @@ def verify (endpoint, version=None): success, result['fields'] = verify_documentation (endpoint, version) if not success: return False, result + + success,result['ExecSummary'] = verify_ExecSummary(endpoint, version) + if not success: + return False, result # save documentation step documentation = {'Documentation': result} @@ -281,7 +295,7 @@ def verify (endpoint, version=None): def get_verification(endpoint,version): ''' - Retrieves the model verification if it exists + Retrieves the model verification ''' verification = False meta_path = utils.model_path(endpoint, version) @@ -295,6 +309,96 @@ def get_verification(endpoint,version): return False +def getActivity(endpoint, version=None): + ''' + Return the list of molecules with their activity + ''' + # I check that the model label exists only in the first function of the verification process. + # to avoid rechecking it in the following steps + if endpoint is None: + return False, 'Empty model label' + + param = None + meta_path = utils.model_path(endpoint, version) + param_file_name = os.path.join(meta_path, 'parameters.yaml') + ifile = os.path.join(meta_path,'training_series') + with open(param_file_name,'r') as pfile: + param = yaml.safe_load(pfile) + + # Initiate a RDKit SDFile iterator to process the molecules one by one + suppl = Chem.SDMolSupplier(ifile,sanitize=True) + + # check if the activity label is defined + if param['SDFile_activity']['value'] is None: + return False,'The activity field is not specified' + + # Iterate for every molecule inside the SDFile + bio = None + obj_num = 0 + result = [] + + for mol in suppl: + if mol is None: + LOG.error(f'(@extractInformaton) Unable to process molecule #{obj_num+1}' + f' in file {ifile}') + continue + + # extract the molecule name, using a sdfileutils algorithm + name = sdfutils.getName( + mol,count=obj_num, field=param['SDFile_name']['value']) + # extract biological information (Activity) + bio = sdfutils.getVal(mol,param['SDFile_activity']['value']) + result.append({ + 'name':name, + 'Activity':bio + }) + + obj_num +=1 + + return True,result + +def getSmilesByApi(response,name): + token = response.json()['access_token'] + # refresh_token = response.json()['refresh_token'] + headers = {'Authorization': f'Bearer {token}'} + for _ in range(3): + # acces to Chemistry Service + r = requests.get("https://test.toxhub.etransafe.eu/chemistryservice.kh.svc/v1/name_to_structure",verify=False,params={'name':name}, headers=headers) + + if r.status_code == 200: + if 'result' in r.json(): + return r.json()['result'][0]['smiles'] + + print(r.json()['Empty response']+name) + return None + if r.status_code == 401: + print('failed to reconnect') + +def connect_api(): + + KC_URL = config('KC_URL') + KC_USERNAME = config('KC_USERNAME') + PASSWORD = config('PASSWORD') + CLIENT_SECRET = config('CLIENT_SECRET') + + #get token + payload = f"grant_type=password&client_id=knowledge-hub&client_secret={CLIENT_SECRET}&username={KC_USERNAME}" + \ + f"&password={PASSWORD}" + + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + response = requests.post( + f'{KC_URL}/auth/realms/KH/protocol/openid-connect/token', + data=payload, + headers=headers, + verify=False, + ) + if response.status_code != 200: + LOG.error(response.status_code) + return None + + LOG.info('Succesfully connection') + return response + #pending changes: improve scalability #currently it is only useful for the first step of verification. # def show_result(result): From 0ee729eccbe0cb302832f0b472cdd9b5716ddd26 Mon Sep 17 00:00:00 2001 From: AdrianCabreraPhi Date: Fri, 22 Apr 2022 08:53:03 +0200 Subject: [PATCH 10/10] improve quality code: empty_fields fun --- flame/documentation.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/flame/documentation.py b/flame/documentation.py index 2084316f..2e2c392d 100644 --- a/flame/documentation.py +++ b/flame/documentation.py @@ -409,7 +409,6 @@ def dumpExcel(self,oname): from openpyxl import Workbook from openpyxl.styles import Font,NamedStyle,Alignment - # from openpyxl.comments import Comment wb = Workbook() ws = wb.active @@ -1054,19 +1053,19 @@ def empty_fields(self): intv = ivalue[intk] if not isinstance(intv,dict): iivalue = intv - if iivalue is None or len(str(iivalue)) is 0: + if iivalue is None or not str(iivalue): emptyfields.append(intk) else: intv = ivalue[intk] iivalue = '' - if intv["value"] is None or len(str(intv["value"])) is 0: + if intv["value"] is None or not str(intv["value"]): emptyfields.append(intk) - + else: - if ivalue is None or len(str(ivalue)) is 0: + if ivalue is None or not str(ivalue): emptyfields.append(ik) - + return emptyfields def get_smiles(self): @@ -1092,7 +1091,7 @@ def autocomplete_documentation(self): if not self.fields['Date']['value']: self.fields['Date']['value'] = today - + if not self.fields['Date_of_QMRF']['value']: self.fields['Date_of_QMRF']['value'] = today @@ -1108,10 +1107,7 @@ def autocomplete_documentation(self): fieldsapplysoftware = ['model','descriptors','applicability_domain'] for field in fieldsapplysoftware: - if field == 'applicability_domain': - if self.parameters.getVal('conformal'): - self.fields['Software']['value'][field]['value'] = software - else: + if field == 'applicability_domain' and self.parameters.getVal('conformal') or field != 'applicability_domain': self.fields['Software']['value'][field]['value'] = software