From e4e8f69a1dc6d44c0eb059e4cc490aad6d9740c8 Mon Sep 17 00:00:00 2001
From: Adrian Cabrera <adridominican@gmail.com>
Date: Thu, 20 Jan 2022 11:16:23 +0100
Subject: [PATCH 01/10] implementation of useful functions

---
 flame/util/utils.py | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/flame/util/utils.py b/flame/util/utils.py
index 5ade5a0a..717ac8ea 100644
--- a/flame/util/utils.py
+++ b/flame/util/utils.py
@@ -22,7 +22,8 @@
 
 __modules__ = None
 
-import os
+import os 
+from dotenv import load_dotenv
 import sys
 import yaml
 import random
@@ -33,6 +34,9 @@
 import numpy as np
 
 from flame.util import get_logger
+from knowledgehub.api import KnowledgeHubAPI
+
+load_dotenv()
 
 LOG = get_logger(__name__)
 
@@ -439,4 +443,25 @@ def isFingerprint (md):
         if md in fplist:
             return True
     
-    return False 
+    return False
+
+
+def connect_api():
+    
+    api = KnowledgeHubAPI(server='TEST', client_secret=os.getenv('CLIENT_SECRET'))
+    api.login(os.getenv("USER_TEST"),os.getenv('PSWD_TEST'))
+
+    return api
+
+
+
+def getSmilesByAPI(api,name):
+    
+    try:
+        smiles = api.ChemistryService().getCompoundByName(name)
+    except:
+        smiles = 0
+            
+    return smiles
+        
+     

From 0d629e980e4b560a60d86661cc629f9d5299b436 Mon Sep 17 00:00:00 2001
From: Adrian Cabrera <adridominican@gmail.com>
Date: Tue, 1 Feb 2022 18:20:04 +0100
Subject: [PATCH 02/10] first version 2-model testing

---
 flame/util/verify.py | 156 ++++++++++++++++++++++++++++---------------
 1 file changed, 102 insertions(+), 54 deletions(-)

diff --git a/flame/util/verify.py b/flame/util/verify.py
index a6292afd..dead353b 100644
--- a/flame/util/verify.py
+++ b/flame/util/verify.py
@@ -1,3 +1,26 @@
+#! -*- coding: utf-8 -*-
+
+# Description    Verification process
+#
+# Authors:       Manuel Pastor (manuel.pastor@upf.edu)
+#                Adrian Cabrera
+#
+# Copyright 2018 Manuel Pastor
+#
+# This file is part of Flame
+#
+# Flame is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation version 3.
+#
+# Flame is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Flame. If not, see <http://www.gnu.org/licenses/>.
+
 from flame.documentation import Documentation
 from flame.util import utils,get_logger 
 import os
@@ -7,6 +30,7 @@
 
 LOG = get_logger(__name__)
 
+# 1-Data cheking: Documentation
 def verify_documentation (endpoint, version=None):
     '''
       Check that the required fields are completed
@@ -30,19 +54,18 @@ def verify_documentation (endpoint, version=None):
         result = {'status':'Failed','comments':'fields not completed','Information':fields}
     else:
         result = {'status':'Passed','comments':'All fields required are completed','Information':[]}
-
+    
     return True,result
-
-
+    
+# 1-Data cheking: data
 # Manually verification
-# TO DO
 def verify_data (endpoint, version=None):
+    '''TO DO'''
     return True, {'status':'Passed','comments':'','Information':['Manually verification',]}
 
-
-# TO DO
+# 1-Data cheking: prediction
 def verify_prediction (endpoint, version=None):
-    
+    ''' TO DO '''
     meta_path = utils.model_path(endpoint, version)
     training_file = os.path.join(meta_path, 'training_series')
     if not os.path.isfile(training_file):
@@ -50,66 +73,91 @@ def verify_prediction (endpoint, version=None):
 
     return True, {'status':'Passed','comments':'','Information':[]}
 
+# 2- Model testing
+def verify_model(endpoint, version= None):
+      ''' TO DO'''
+      doc = Documentation(endpoint, version)
+      list_mols = doc.get_mols()
+      api = utils.connect_api()
+      count = 1
+      invalid = []
+      for mol in list_mols:
+          toxhub_smiles = utils.getSmilesByAPI(api,mol)
+          if toxhub_smiles:
+              fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(list_mols[mol])),Chem.RDKFingerprint(Chem.MolFromSmiles(toxhub_smiles))
+              similarity = DataStructs.TanimotoSimilarity(fp1,fp2)
+              if similarity < 0.99:
+                  invalid.append({'namedrug':mol,'input_smiles':list_mols[mol],'toxhub_smiles':toxhub_smiles,'similarity':similarity}) 
+          else:
+              print(count,". Not found:",mol)
+              count +=1
 
-# def verify_model(endpoint, version= None):
-#     doc = Documentation(endpoint, version)
-#     list_of_mols = doc.get_mols()
-#     print("Total: ",len(list_of_mols))
-#     api = utils.connect_api()
-#     count = 1
-#     countInvalidMols = 1
-
-#     invalidMols = {}
-#     for mol in list_of_mols:
-#         apiSmile = utils.getSmilesByAPI(api,mol)
-#         aux_smile = apiSmile
-#         if apiSmile:
-#             localSmile,apiSmile = Chem.MolFromSmiles(list_of_mols[mol]),Chem.MolFromSmiles(apiSmile)
-#             fp1,fp2 = Chem.RDKFingerprint(localSmile),Chem.RDKFingerprint(apiSmile)
-
-#             if DataStructs.TanimotoSimilarity(fp1,fp2) < 0.99:
-#                 invalidMols[mol] = [list_of_mols[mol],aux_smile]
-#                 countInvalidMols += 1
-#         else:
-#             print(count,". Not found:",mol)
-#             count +=1
-
-#     print("Similarity below 0.99: ",countInvalidMols)         
-#     return True,{'status':'Passed','comments':'','Information':invalidMols}
+      if invalid:
+          return True,{'status':'Failed','comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.','Information':invalidMols}
 
-def verify (endpoint, version=None):
+      return True,{'status':'Passed','comments':'','Information':[]}
+
+
+# 3-Inspection of Model
+
+def inspection_model():
+
+    return None
+
+# 4-Examination of Executive summary
+
+def executive_summary():
+    
+    return None
     
+
+def verify (endpoint, version=None):
     result = {}
+    # 1- Data cheking: Documentation
     success,  result['documentation'] = verify_documentation (endpoint, version)
-    #success, result['model'] = verify_model(endpoint, version)
-
+    
     if not success:
         return False, result
-
+    # 1- Data cheking: data
     success, result['data'] = verify_data (endpoint, version)
 
     if not success:
         return False, result
-
+    # 1- Data cheking: prediction
     success, result['prediction'] = verify_prediction (endpoint, version)
 
     if not success:
         return False, result
-    
 
+    # save datacheking data
+    datacheking = {'Data cheking':result}
 
+    result = {}
+
+    # 2- Model testing
+    success, result['model'] = verify_model(endpoint, version)
+    if not  success:
+        return False, result
+    
+    # save model testing data
+    modeltesting = {'Model testing': result}
+    
+    
+    datacheking.update(modeltesting) # concatenates the dictionary of data cheking and the dictionary of model testing
+    
     meta_path = utils.model_path(endpoint, version)
     verification_file = os.path.join(meta_path, 'verification.pkl')
 
     #Save in the model folder verification.pkl
     file = open(verification_file,"wb")
-    pickle.dump(result,file)
+    pickle.dump(datacheking,file)
     file.close()
     LOG.info(f'Save verification.pkl file \n')
 
-    show_result(result)
+    # show first step of verification process
+    show_result(datacheking['Data cheking'])
 
-    return True, result
+    return True, datacheking
 
 
 def get_verification(endpoint,version):
@@ -128,21 +176,21 @@ def get_verification(endpoint,version):
 
     return False
 
-
-
+#pending changes: improve scalability
+#currently it is only useful for the first step of verification.
 def show_result(result):
-    '''
-    Shows the model verification in the terminal
-    '''
-    if result:
-        # HEADERS
-        print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n")
+     '''
+     Shows the model verification in the terminal
+     '''
+     if result:
+         # HEADERS
+         print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n")
         
-        for x in result:
-            information = " ".join(result[x]['Information'])
-            print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information))
-    else:
-        LOG.error("Unable to print verification result")
+         for x in result:
+             information = " ".join(result[x]['Information'])
+             print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information))
+     else:
+         LOG.error("Unable to print verification result")
     
 
     

From 7e9dcede0f0111db733f554b51283790500fd4f1 Mon Sep 17 00:00:00 2001
From: Adrian Cabrera <adridominican@gmail.com>
Date: Tue, 1 Feb 2022 19:06:51 +0100
Subject: [PATCH 03/10] documented and refactored code

---
 flame/documentation.py |  3 ++-
 flame/util/verify.py   | 16 ++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/flame/documentation.py b/flame/documentation.py
index 002a89c4..5f5f36a4 100644
--- a/flame/documentation.py
+++ b/flame/documentation.py
@@ -1064,7 +1064,8 @@ def empty_fields(self):
                         
         return emptyfields
     
-    def get_mols(self):
+    def get_smiles(self):
+        '''Returns a dictionary with the fields:{(key)name:(value)SMILES}'''
         
         return dict(zip(self.conveyor.getVal("obj_nam"),self.conveyor.getVal("SMILES")))
 
diff --git a/flame/util/verify.py b/flame/util/verify.py
index dead353b..4d501f58 100644
--- a/flame/util/verify.py
+++ b/flame/util/verify.py
@@ -77,23 +77,23 @@ def verify_prediction (endpoint, version=None):
 def verify_model(endpoint, version= None):
       ''' TO DO'''
       doc = Documentation(endpoint, version)
-      list_mols = doc.get_mols()
+      smiles_list = doc.get_smiles()
       api = utils.connect_api()
       count = 1
       invalid = []
-      for mol in list_mols:
-          toxhub_smiles = utils.getSmilesByAPI(api,mol)
+      for drugname in smiles_list:
+          toxhub_smiles = utils.getSmilesByAPI(api,drugname)
           if toxhub_smiles:
-              fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(list_mols[mol])),Chem.RDKFingerprint(Chem.MolFromSmiles(toxhub_smiles))
+              fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(smiles_list[drugname])),Chem.RDKFingerprint(Chem.MolFromSmiles(toxhub_smiles))
               similarity = DataStructs.TanimotoSimilarity(fp1,fp2)
               if similarity < 0.99:
-                  invalid.append({'namedrug':mol,'input_smiles':list_mols[mol],'toxhub_smiles':toxhub_smiles,'similarity':similarity}) 
+                  invalid.append({'drugname':drugname,'input_smiles':smiles_list[drugname],'toxhub_smiles':toxhub_smiles,'similarity':similarity}) 
           else:
-              print(count,". Not found:",mol)
+              print(count,". Not found:",drugname)
               count +=1
 
       if invalid:
-          return True,{'status':'Failed','comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.','Information':invalidMols}
+          return True,{'status':'Failed','comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.','Information':invalid}
 
       return True,{'status':'Passed','comments':'','Information':[]}
 
@@ -107,7 +107,7 @@ def inspection_model():
 # 4-Examination of Executive summary
 
 def executive_summary():
-    
+
     return None
     
 

From b49185d0515359a37ed3406d36bcf5c2df681217 Mon Sep 17 00:00:00 2001
From: Adrian Cabrera <adridominican@gmail.com>
Date: Fri, 4 Feb 2022 20:35:33 +0100
Subject: [PATCH 04/10] change from pickle format to yaml format

---
 flame/util/verify.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/flame/util/verify.py b/flame/util/verify.py
index 4d501f58..300a6275 100644
--- a/flame/util/verify.py
+++ b/flame/util/verify.py
@@ -25,7 +25,7 @@
 from flame.util import utils,get_logger 
 import os
 from rdkit import Chem,DataStructs
-import pickle
+import yaml
 
 
 LOG = get_logger(__name__)
@@ -146,13 +146,12 @@ def verify (endpoint, version=None):
     datacheking.update(modeltesting) # concatenates the dictionary of data cheking and the dictionary of model testing
     
     meta_path = utils.model_path(endpoint, version)
-    verification_file = os.path.join(meta_path, 'verification.pkl')
-
-    #Save in the model folder verification.pkl
-    file = open(verification_file,"wb")
-    pickle.dump(datacheking,file)
-    file.close()
-    LOG.info(f'Save verification.pkl file \n')
+    verification_path = os.path.join(meta_path, 'verification.yaml')
+    
+    #Save in the model folder verification.yaml
+    with open(verification_path,'w') as file:
+        yaml.dump(datacheking,file)
+    
 
     # show first step of verification process
     show_result(datacheking['Data cheking'])
@@ -166,12 +165,12 @@ def get_verification(endpoint,version):
     '''
     verification = False
     meta_path = utils.model_path(endpoint, version)
-    verification_file = os.path.join(meta_path, 'verification.pkl')
+    verification_path = os.path.join(meta_path, 'verification.yaml')
 
-    if os.path.isfile(verification_file):
-        file = open(verification_file,"rb")
-        verification = pickle.load(file)
-        file.close()
+    if os.path.isfile(verification_path):
+        with open(verification_path,'r') as file:
+            verification = yaml.load(file,Loader=yaml.FullLoader)
+            
         return True,verification
 
     return False

From 6dab8027e35e6d5015440732a9d48ea49025edb4 Mon Sep 17 00:00:00 2001
From: Adrian Cabrera <adridominican@gmail.com>
Date: Tue, 15 Feb 2022 13:10:02 +0100
Subject: [PATCH 05/10] date assignment error fixed

---
 flame/documentation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/flame/documentation.py b/flame/documentation.py
index 3e2993f9..56afb81c 100644
--- a/flame/documentation.py
+++ b/flame/documentation.py
@@ -1082,8 +1082,11 @@ def autocomplete_documentation(self):
         #Date, Date of model development and Date of QMRF.
         today = date.today().strftime("%B %d, %Y")
 
-        self.fields['Date']['value'] = today
-        self.fields['Date_of_QMRF']['value'] = today
+        if not self.fields['Date']['value']:
+            self.fields['Date']['value'] = today
+        
+        if not self.fields['Date_of_QMRF']['value']:
+            self.fields['Date_of_QMRF']['value'] = today
 
         #format, Format used(SDF,TSV)
         if self.parameters.getVal('input_type') == 'data':

From d3b24d766f48f769a0f2d5741b53db0cdd98a47d Mon Sep 17 00:00:00 2001
From: Adrian Cabrera <adridominican@gmail.com>
Date: Wed, 23 Feb 2022 20:38:51 +0100
Subject: [PATCH 06/10] implemented the library check and sdfile_Activity

---
 flame/documentation.py |  13 +-
 flame/util/utils.py    |  29 +---
 flame/util/verify.py   | 334 +++++++++++++++++++++++++++++------------
 3 files changed, 245 insertions(+), 131 deletions(-)

diff --git a/flame/documentation.py b/flame/documentation.py
index 56afb81c..2f49ff78 100644
--- a/flame/documentation.py
+++ b/flame/documentation.py
@@ -1065,12 +1065,15 @@ def empty_fields(self):
         return emptyfields
     
     def get_smiles(self):
-        '''Returns a dictionary with the fields:{(key)name:(value)SMILES}'''
+        ''''''
         
-        return dict(zip(self.conveyor.getVal("obj_nam"),self.conveyor.getVal("SMILES")))
-
-
-
+        #dict(zip(self.conveyor.getVal("obj_nam"),self.conveyor.getVal("SMILES")))
+        return self.conveyor.getVal("SMILES")
+    
+    def get_names(self):
+        ''''''
+        return self.conveyor.getVal("obj_nam")
+    
     def autocomplete_documentation(self):
         """
         Auto complete fields in model documentation
diff --git a/flame/util/utils.py b/flame/util/utils.py
index 6721729e..8c63bd5c 100644
--- a/flame/util/utils.py
+++ b/flame/util/utils.py
@@ -23,7 +23,6 @@
 __modules__ = None
 
 import os 
-from dotenv import load_dotenv
 import sys
 import yaml
 import random
@@ -35,11 +34,10 @@
 import codecs
 import string
 import re 
-
 from flame.util import get_logger
-from knowledgehub.api import KnowledgeHubAPI
+#from knowledgehub.api import KnowledgeHubAPI
+
 
-load_dotenv()
 
 LOG = get_logger(__name__)
 
@@ -470,25 +468,4 @@ def isFingerprint (md):
         if md in fplist:
             return True
     
-    return False
-
-
-def connect_api():
-    
-    api = KnowledgeHubAPI(server='TEST', client_secret=os.getenv('CLIENT_SECRET'))
-    api.login(os.getenv("USER_TEST"),os.getenv('PSWD_TEST'))
-
-    return api
-
-
-
-def getSmilesByAPI(api,name):
-    
-    try:
-        smiles = api.ChemistryService().getCompoundByName(name)
-    except:
-        smiles = 0
-            
-    return smiles
-        
-     
+    return False
\ No newline at end of file
diff --git a/flame/util/verify.py b/flame/util/verify.py
index 300a6275..2523e029 100644
--- a/flame/util/verify.py
+++ b/flame/util/verify.py
@@ -21,142 +21,276 @@
 # You should have received a copy of the GNU General Public License
 # along with Flame. If not, see <http://www.gnu.org/licenses/>.
 
+import pickle
+from flame.stats.base_model import BaseEstimator
+from flame.parameters import Parameters
 from flame.documentation import Documentation
+import flame.chem.sdfileutils as sdfutils
 from flame.util import utils,get_logger 
 import os
 from rdkit import Chem,DataStructs
 import yaml
+import urllib3
+urllib3.disable_warnings()
+import requests
+LOG = get_logger(__name__)
 
+try:
+    from decouple import config
+except:
+    LOG.error('decouple library is not installed.')
+    LOG.info('pip install python-decouple')
 
-LOG = get_logger(__name__)
 
-# 1-Data cheking: Documentation
-def verify_documentation (endpoint, version=None):
-    '''
-      Check that the required fields are completed
-    '''
 
-    blacklist = ['Species','Limits_applicability','Experimental_protocol','location','description','endpoint_positive','endpoint_negative','raw_data_url','test_set_size','training_set_url','test_set_url','bootstrap','ccp_alpha','criterion','max_depth','max_features','max_leaf_nodes','max_samples','min_impurity_decrease','min_impurity_split','min_samples_leaf','min_samples_split','min_weight_fraction_leaf','n_estimators','n_jobs','oob_score','random_state','verbose','warm_start','confidence','ACP_sampler','KNN_NN','aggregated','aggregation_function','conformal_predictors','normalizing_model','Conformal_mean_interval','Conformal_accuracy','Q2','SDEP','Comments','Other_related_models','Date_of_QMRF','Date_of_QMRF_updates','QMRF_updates','References','QMRF_same_models','Mechanistic_basis','Mechanistic_references','Supporting_information','Comment_on_the_endpoint','Endpoint_data_quality_and_variability','Descriptor_selection','Internal_validation_2','External_validation']
 
-    if endpoint is None:
-        return False, 'Empty model label'
-    
-    # get de model repo path
-    rdir = utils.model_path(endpoint, version)
-    if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')):
-        return False, 'Info file not found' 
+def verify_model(endpoint, version=None):
+    ''' '''
+    api = connect_api()
+
+    if not isinstance(api, requests.models.Response):
+        return False,{'status':'Aborted','comments':'Failed connection to External Service'}
 
+    invalid = []
+    not_found_list = []
     doc = Documentation(endpoint, version)
+    smiles_list = dict(zip(doc.get_names(),doc.get_smiles()))
+    for drugname,smiles in smiles_list.items():
+        ext_service_smiles = getSmilesByApi(api,drugname)
+        if ext_service_smiles:
+            fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(smiles_list[drugname])),Chem.RDKFingerprint(Chem.MolFromSmiles(ext_service_smiles))
+            similarity = DataStructs.TanimotoSimilarity(fp1,fp2)
+            if similarity < 0.99:
+                invalid.append(
+                    {
+                        'drugname': drugname,
+                        'input_smiles':smiles,
+                        'ext_service_smiles':ext_service_smiles,
+                        'similarity':similarity,
+                    })
+        else:
+            not_found_list.append(drugname)
+
+    if invalid or not_found_list:
+        return True,{'status':'Failed',
+                     'comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.',
+                     'Information':invalid,
+                     'Extra_Information':not_found_list}
+
+    return True,{'status':'Passed'}
 
-    fields =  [field for field in doc.empty_fields() if field not in blacklist]
 
-    if fields:
-        result = {'status':'Failed','comments':'fields not completed','Information':fields}
-    else:
-        result = {'status':'Passed','comments':'All fields required are completed','Information':[]}
+
+            
+def getSmilesByApi(response,name):
+    token = response.json()['access_token']
+    # refresh_token = response.json()['refresh_token']
+    headers = {'Authorization': f'Bearer {token}'}
+    for _ in range(3):
+        # acces to Chemistry Service
+        r = requests.get("https://test.toxhub.etransafe.eu/chemistryservice.kh.svc/v1/name_to_structure",verify=False,params={'name':name}, headers=headers)
+
+        if r.status_code == 200:
+            if 'result' in r.json():
+                return r.json()['result'][0]['smiles']
+
+            print(r.json()['Empty response']+name)
+            return None
+        if r.status_code == 401:
+            print('failed to reconnect')
+
+def connect_api():
     
-    return True,result
+    KC_URL = config('KC_URL')
+    KC_USERNAME = config('KC_USERNAME')
+    PASSWORD = config('PASSWORD')
+    CLIENT_SECRET = config('CLIENT_SECRET')
+
+    #get token
+    payload = f"grant_type=password&client_id=knowledge-hub&client_secret={CLIENT_SECRET}&username={KC_USERNAME}" + \
+              f"&password={PASSWORD}"
+
+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
+    response = requests.post(
+        f'{KC_URL}/auth/realms/KH/protocol/openid-connect/token',
+        data=payload,
+        headers=headers,
+        verify=False,
+    )
+    if response.status_code != 200:
+        LOG.error(response.status_code)
+        return None 
+
+    LOG.info('Succesfully connection')
+    return response
+
+
+
+
+
+def verify_SDFile_activity(endpoint,version=None):
+    ''' '''
+    # I check that the model label exists only in the first function of the verification process.
+    # to avoid rechecking it in the following steps
+    if endpoint is None:
+        return False, {'status':'Aborted','comments':'Empty model label'}
+
+    param = None
+    meta_path = utils.model_path(endpoint, version)
+    parameters_file_name = os.path.join(meta_path, 'parameters.yaml')
+    ifile = os.path.join(meta_path, 'training_series')
+    with open(parameters_file_name, 'r') as pfile:
+                param = yaml.safe_load(pfile)
+
+    # Initiate a RDKit SDFile iterator to process the molecules one by one
+    suppl = Chem.SDMolSupplier(ifile,sanitize=True)
     
-# 1-Data cheking: data
-# Manually verification
-def verify_data (endpoint, version=None):
-    '''TO DO'''
-    return True, {'status':'Passed','comments':'','Information':['Manually verification',]}
-
-# 1-Data cheking: prediction
-def verify_prediction (endpoint, version=None):
-    ''' TO DO '''
+    # check if the activity label is defined
+    if param['SDFile_activity']['value'] is  None:
+        return False,{'status':'Aborted','comments':'The activity field is not specified'}
+
+    # Iterate for every molecule inside the SDFile
+    bio = None
+    obj_num = 0
+    list_mol_names = []
+    for mol in suppl:
+        if mol is None:
+            LOG.error(f'(@extractInformaton) Unable to process molecule #{obj_num+1}'
+                    f' in file {ifile}')
+            continue
+
+        # extract the molecule name, using a sdfileutils algorithm
+        name = sdfutils.getName(
+        mol, count=obj_num, field=param['SDFile_name']['value'])
+        # extract biological information (activity)
+        bio = sdfutils.getVal(mol, param['SDFile_activity']['value'])
+        
+        if bio is None:
+            list_mol_names.append(name)
+    
+        obj_num +=1
+
+    if list_mol_names:
+        result = {'status':'Failed','comments':'The activity must be present in all molecules.','Information':list_mol_names}
+    else:
+        result = {'status':'Passed','comments':'','Information':list_mol_names}
+
+    return True,result  
+
+def verify_library(endpoint, version=None):
+    ''''''
+    param = None
     meta_path = utils.model_path(endpoint, version)
-    training_file = os.path.join(meta_path, 'training_series')
-    if not os.path.isfile(training_file):
-        return True, {'status':'Failed','comments':'','Information':[]}
+    parameters_file_name = os.path.join(meta_path, 'parameters.yaml')
+    with open(parameters_file_name, 'r') as pfile:
+                param = yaml.safe_load(pfile)
+
+    model_pkl = os.path.join(param['model_path']['value'],'estimator.pkl')
+    LOG.debug(f'Loading model from pickle file, path: {model_pkl}')
+    try:
+        with open(model_pkl,"rb") as input_file:
+            dict_estimator = pickle.load(input_file)
 
-    return True, {'status':'Passed','comments':'','Information':[]}
+    except FileNotFoundError:
+        LOG.error(f'No valid model estimator found at: {model_pkl}')
+        return False, {'status':'Aborted','comments':f'No valid model estimator found at: {model_pkl}'}
 
-# 2- Model testing
-def verify_model(endpoint, version= None):
-      ''' TO DO'''
-      doc = Documentation(endpoint, version)
-      smiles_list = doc.get_smiles()
-      api = utils.connect_api()
-      count = 1
-      invalid = []
-      for drugname in smiles_list:
-          toxhub_smiles = utils.getSmilesByAPI(api,drugname)
-          if toxhub_smiles:
-              fp1,fp2 = Chem.RDKFingerprint(Chem.MolFromSmiles(smiles_list[drugname])),Chem.RDKFingerprint(Chem.MolFromSmiles(toxhub_smiles))
-              similarity = DataStructs.TanimotoSimilarity(fp1,fp2)
-              if similarity < 0.99:
-                  invalid.append({'drugname':drugname,'input_smiles':smiles_list[drugname],'toxhub_smiles':toxhub_smiles,'similarity':similarity}) 
-          else:
-              print(count,". Not found:",drugname)
-              count +=1
+    # check if the pickle was created with a compatible version (currently, 1)
+    if dict_estimator['version'] is not 1:
+        return True, {'status':'Failed','comments':'Incompatible model version','Information':[]}
 
-      if invalid:
-          return True,{'status':'Failed','comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.','Information':invalid}
+    # check if the libraries used to build this model are similar to current libraries
+    if 'libraries' not in dict_estimator:
+        return False, {'status':'Failed',
+                       'comments':'The libraries with which the model was built have not been found in the estimator.pkl'}
 
-      return True,{'status':'Passed','comments':'','Information':[]}
+    success,results = utils.compatible_modules(dict_estimator['libraries'])
 
+    if not success:
+        return True,{'status':'Failed','comments':'Incompatible libraries have been found','Information':results}
+    else:
+         return True,{'status':'Passed'}
+        
+    
 
-# 3-Inspection of Model
 
-def inspection_model():
 
-    return None
+def verify_documentation (endpoint, version=None):
+    '''
+      Check that the required fields are completed
+    '''
+    blacklist = ['Species','Limits_applicability','Experimental_protocol','location','description','endpoint_positive','endpoint_negative','raw_data_url',
+    'test_set_size','training_set_url','test_set_url','bootstrap','ccp_alpha','criterion','max_depth','max_features','max_leaf_nodes','max_samples',
+    'min_impurity_decrease','min_impurity_split','min_samples_leaf','min_samples_split','min_weight_fraction_leaf','n_estimators','n_jobs','oob_score',
+    'random_state','verbose','warm_start','confidence','ACP_sampler','KNN_NN','aggregated','aggregation_function','conformal_predictors','normalizing_model',
+    'Conformal_mean_interval','Conformal_accuracy','Q2','SDEP','Comments','Other_related_models','Date_of_QMRF','Date_of_QMRF_updates','QMRF_updates',
+    'References','QMRF_same_models','Mechanistic_basis','Mechanistic_references','Supporting_information','Comment_on_the_endpoint','Endpoint_data_quality_and_variability',
+    'Descriptor_selection','Internal_validation_2','External_validation']
 
-# 4-Examination of Executive summary
+    # get de model repo path
+    rdir = utils.model_path(endpoint, version)
+    if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')):
+        return False, 'Info file not found' 
 
-def executive_summary():
+    doc = Documentation(endpoint, version)
+    fields =  [field for field in doc.empty_fields() if field not in blacklist]
 
-    return None
+    if fields:
+        result = {'status':'Failed','comments':'Missing required information.','Information':fields}
+    else:
+        result = {'status':'Passed','comments':'All fields required are completed.','Information':[]}
     
+    return True,result
 
 def verify (endpoint, version=None):
     result = {}
-    # 1- Data cheking: Documentation
-    success,  result['documentation'] = verify_documentation (endpoint, version)
-    
-    if not success:
-        return False, result
-    # 1- Data cheking: data
-    success, result['data'] = verify_data (endpoint, version)
 
+    # 1.0 Data checking: activity
+    success, result['activity'] = verify_SDFile_activity(endpoint, version)
     if not success:
-        return False, result
-    # 1- Data cheking: prediction
-    success, result['prediction'] = verify_prediction (endpoint, version)
-
+        return False,result
+    
+    # 1.1 Data cheking: Check the validity of the structure provided
+    success, result['model'] = verify_model(endpoint, version)
+    if not success:     
+        return False,result
+    
+    # save data checking step
+    datachecking = {'Data checking':result}
+    
+    # 2.0 Model testing: Check library
+    result = {}
+    success,result['libraries'] = verify_library(endpoint,version)
     if not success:
-        return False, result
+        return False,result
 
-    # save datacheking data
-    datacheking = {'Data cheking':result}
+    # save model testing step
+    modeltesting = {'Model testing':result}
 
-    result = {}
+    datachecking.update(modeltesting)
 
-    # 2- Model testing
-    success, result['model'] = verify_model(endpoint, version)
-    if not  success:
+    # 3- Documentation: required fields
+    result = {}
+    success,  result['fields'] = verify_documentation (endpoint, version)
+    if not success:
         return False, result
     
-    # save model testing data
-    modeltesting = {'Model testing': result}
-    
-    
-    datacheking.update(modeltesting) # concatenates the dictionary of data cheking and the dictionary of model testing
-    
+    # save documentation step
+    documentation = {'Documentation': result}
+
+    datachecking.update(documentation) # concatenates the 3 steps
     meta_path = utils.model_path(endpoint, version)
     verification_path = os.path.join(meta_path, 'verification.yaml')
     
     #Save in the model folder verification.yaml
     with open(verification_path,'w') as file:
-        yaml.dump(datacheking,file)
+        yaml.dump(datachecking,file)
+    
     
-
     # show first step of verification process
-    show_result(datacheking['Data cheking'])
+    # show_result(datachecking['Data checking'])
 
-    return True, datacheking
+    return True, datachecking
 
 
 def get_verification(endpoint,version):
@@ -177,19 +311,19 @@ def get_verification(endpoint,version):
 
 #pending changes: improve scalability
 #currently it is only useful for the first step of verification.
-def show_result(result):
-     '''
-     Shows the model verification in the terminal
-     '''
-     if result:
-         # HEADERS
-         print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n")
+# def show_result(result):
+#      '''
+#      Shows the model verification in the terminal
+#      '''
+#      if result:
+#          # HEADERS
+#          print("{:<18}{:<10}{:<40}{:<10}".format('Stage','Status','Comments','Information'),"\n")
         
-         for x in result:
-             information = " ".join(result[x]['Information'])
-             print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information))
-     else:
-         LOG.error("Unable to print verification result")
+#          for x in result:
+#              information = " ".join(result[x]['Information'])
+#              print("{:<18}{:<10}{:<40}{:<10}".format(x,result[x]['status'],result[x]['comments'],information))
+#      else:
+#          LOG.error("Unable to print verification result")
     
 
     

From a110e7b8337f6e958253daf15eb14168c1dc926c Mon Sep 17 00:00:00 2001
From: Adrian Cabrera <adridominican@gmail.com>
Date: Fri, 25 Feb 2022 11:25:36 +0100
Subject: [PATCH 07/10] improved errors control

---
 flame/util/verify.py | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/flame/util/verify.py b/flame/util/verify.py
index 2523e029..e66a577d 100644
--- a/flame/util/verify.py
+++ b/flame/util/verify.py
@@ -22,8 +22,6 @@
 # along with Flame. If not, see <http://www.gnu.org/licenses/>.
 
 import pickle
-from flame.stats.base_model import BaseEstimator
-from flame.parameters import Parameters
 from flame.documentation import Documentation
 import flame.chem.sdfileutils as sdfutils
 from flame.util import utils,get_logger 
@@ -42,18 +40,19 @@
     LOG.info('pip install python-decouple')
 
 
-
-
 def verify_model(endpoint, version=None):
     ''' '''
     api = connect_api()
-
+    invalid = []
+    not_found_list = []
     if not isinstance(api, requests.models.Response):
         return False,{'status':'Aborted','comments':'Failed connection to External Service'}
 
-    invalid = []
-    not_found_list = []
-    doc = Documentation(endpoint, version)
+    try:
+        doc = Documentation(endpoint, version)
+    except:
+        return False,{'status':'Aborted','comments':f'{endpoint} documentation.yaml not found.'}
+
     smiles_list = dict(zip(doc.get_names(),doc.get_smiles()))
     for drugname,smiles in smiles_list.items():
         ext_service_smiles = getSmilesByApi(api,drugname)
@@ -73,15 +72,13 @@ def verify_model(endpoint, version=None):
 
     if invalid or not_found_list:
         return True,{'status':'Failed',
-                     'comments':'The chemical structure of the following drugs is different from that obtained in ToxHub.',
+                     'comments':'The chemical structure of the following drugs is different from that obtained in External Service.',
                      'Information':invalid,
                      'Extra_Information':not_found_list}
 
     return True,{'status':'Passed'}
 
 
-
-            
 def getSmilesByApi(response,name):
     token = response.json()['access_token']
     # refresh_token = response.json()['refresh_token']
@@ -125,9 +122,6 @@ def connect_api():
     return response
 
 
-
-
-
 def verify_SDFile_activity(endpoint,version=None):
     ''' '''
     # I check that the model label exists only in the first function of the verification process.
@@ -210,9 +204,6 @@ def verify_library(endpoint, version=None):
         return True,{'status':'Failed','comments':'Incompatible libraries have been found','Information':results}
     else:
          return True,{'status':'Passed'}
-        
-    
-
 
 
 def verify_documentation (endpoint, version=None):
@@ -227,11 +218,6 @@ def verify_documentation (endpoint, version=None):
     'References','QMRF_same_models','Mechanistic_basis','Mechanistic_references','Supporting_information','Comment_on_the_endpoint','Endpoint_data_quality_and_variability',
     'Descriptor_selection','Internal_validation_2','External_validation']
 
-    # get de model repo path
-    rdir = utils.model_path(endpoint, version)
-    if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')):
-        return False, 'Info file not found' 
-
     doc = Documentation(endpoint, version)
     fields =  [field for field in doc.empty_fields() if field not in blacklist]
 

From 89f23b60c8e3d1c43acf28ff8806f20a1acba8ba Mon Sep 17 00:00:00 2001
From: Adrian Cabrera <adridominican@gmail.com>
Date: Mon, 28 Feb 2022 16:49:22 +0100
Subject: [PATCH 08/10] improvement of the code_dumpExcel()

---
 flame/documentation.py | 250 ++++++++++++++++++++---------------------
 1 file changed, 124 insertions(+), 126 deletions(-)

diff --git a/flame/documentation.py b/flame/documentation.py
index 662bfca6..2084316f 100644
--- a/flame/documentation.py
+++ b/flame/documentation.py
@@ -401,149 +401,147 @@ def dumpYAML (self):
         
         return (yaml_out)
 
-    def dumpExcel (self,oname):
-
-            # openpyxl should be installed in the environment
-            # pip install openpyxl
-           
-            from openpyxl import Workbook
-            from openpyxl.styles import Font,NamedStyle,Alignment
-            # from openpyxl.comments import Comment
-
-            wb = Workbook() 
-            ws = wb.active 
-            ws.title = f"Model {self.model} documentation" 
-            alignment_style = Alignment(vertical='top',wrapText=True)
-            
-            # Label Style
-            Label = NamedStyle(name="Label")
-            Label.font = Font(name='Calibri',size=11,bold=True)
-            Label.alignment = alignment_style
-            
-            ws.column_dimensions['A'].width = 25.10
-            ws.column_dimensions['B'].width = 28.00
-            ws.column_dimensions['C'].width = 60.00
-            ws.column_dimensions['D'].width = 60.00
+    def dumpExcel(self,oname):
+        
 
-            # sections of the document, specifying the document keys which will be listed
-            sections = [('General model information',['ID', 'Version', 'Model_title', 'Model_description', 'Keywords', 'Contact', 'Institution', 'Date', 'Endpoint',
-                        'Endpoint_units', 'Interpretation', 'Dependent_variable', 'Species',
-                        'Limits_applicability', 'Experimental_protocol', 'Model_availability',
-                        'Data_info']), 
-                        ('Algorithm and software',['Algorithm', 'Software', 'Descriptors', 'Algorithm_settings',
-                        'AD_method', 'AD_parameters', 'Goodness_of_fit_statistics',
-                        'Internal_validation_1', 'Internal_validation_2', 'External_validation',
-                        'Comments']),
-                        ('Other information',['Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates',
-                        'QMRF_updates', 'References', 'QMRF_same_models', 'Mechanistic_basis', 
-                        'Mechanistic_references', 'Supporting_information', 'Comment_on_the_endpoint',
-                        'Endpoint_data_quality_and_variability', 'Descriptor_selection'])]
+        # openpyxl should be installed in the environment
+        # pip install openpyxl
 
-            #Save the position and name of the label for the first and last section
-            position = []
-            name = [sections[0][1][0],'Other Comments']
-            
-            count = 1
-            for isection in sections:
+        from openpyxl import Workbook
+        from openpyxl.styles import Font,NamedStyle,Alignment
+        # from openpyxl.comments import Comment
 
-                for ik in isection[1]:
-                 
-                    label_k = ik.replace('_',' ')
+        wb = Workbook()
+        ws = wb.active
+        ws.title = f"Model {self.model} documentation"
+        alignment_style = Alignment(vertical='top',wrapText=True)
 
-                    if label_k == 'Internal validation 2' or label_k == 'External validation':
-                        ws[f"A{count}"] = label_k
-                        ws[f'A{count}'].style = Label
-                    else:
-                        ws[f"B{count}"] = label_k
-                        ws[f"B{count}"].style = Label
-
-                    if ik in self.fields:
-                        # set defaults for value
-                        ivalue= ''
-                        #v is the selected entry in the documentation dictionary
-                        v = self.fields[ik]
+        # Label Style
+        Label = NamedStyle(name="Label")
+        Label.font = Font(name='Calibri',size=11,bold=True)
+        Label.alignment = alignment_style
+
+        ws.column_dimensions['A'].width = 25.10
+        ws.column_dimensions['B'].width = 28.00
+        ws.column_dimensions['C'].width = 60.00
+        ws.column_dimensions['D'].width = 60.00
+
+        # sections of the document, specifying the document keys which will be listed
+        sections = [('General model information',['ID', 'Version', 'Model_title', 'Model_description', 'Keywords', 'Contact', 'Institution', 'Date', 'Endpoint',
+                    'Endpoint_units', 'Interpretation', 'Dependent_variable', 'Species',
+                    'Limits_applicability', 'Experimental_protocol', 'Model_availability',
+                    'Data_info']), 
+                    ('Algorithm and software',['Algorithm', 'Software', 'Descriptors', 'Algorithm_settings',
+                    'AD_method', 'AD_parameters', 'Goodness_of_fit_statistics',
+                    'Internal_validation_1', 'Internal_validation_2', 'External_validation',
+                    'Comments']),
+                    ('Other information',['Other_related_models', 'Date_of_QMRF', 'Date_of_QMRF_updates',
+                    'QMRF_updates', 'References', 'QMRF_same_models', 'Mechanistic_basis', 
+                    'Mechanistic_references', 'Supporting_information', 'Comment_on_the_endpoint',
+                    'Endpoint_data_quality_and_variability', 'Descriptor_selection'])]
+
+        #Save the position and name of the label for the first and last section
+        position = []
+        name = [sections[0][1][0],'Other Comments']
+
+        count = 1
+        for isection in sections:
+
+            for ik in isection[1]:
+
+                label_k = ik.replace('_',' ')
+
+                if label_k in ['Internal validation 2', 'External validation']:
+                    ws[f"A{count}"] = label_k
+                    ws[f'A{count}'].style = Label
+                else:
+                    ws[f"B{count}"] = label_k
+                    ws[f"B{count}"].style = Label
+
+                if ik in self.fields:
+                    # set defaults for value
+                    ivalue= ''
+                    #v is the selected entry in the documentation dictionary
+                    v = self.fields[ik]
                         ## newest parameter formats are extended and contain
                         ## rich metainformation for each entry
-                        if 'value' in v:
-                            ivalue = v['value']
-                             
-                            if isinstance(ivalue,dict):
+                    if 'value' in v:
+                        ivalue = v['value']
 
-                                ws[f"A{count}"] = label_k
-                                ws[f"A{count}"].style = Label
-                                
-                                end = (count)+(len(ivalue)-1)
-
-                                for intk in ivalue:
-                                    label_ik = intk.replace('_',' ')
-                                    # label_ik = intk.replace('_f', '').replace('_', ' ')
-                                    ws[f'B{count}'] = label_ik
-                                    ws[f'B{count}'].style = Label
-                                    
-                                     
+                        if isinstance(ivalue,dict):
+
+                            ws[f"A{count}"] = label_k
+                            ws[f"A{count}"].style = Label
+
+                            end = (count)+(len(ivalue)-1)
+
+                            for intk in ivalue:
+                                label_ik = intk.replace('_',' ')
+                                # label_ik = intk.replace('_f', '').replace('_', ' ')
+                                ws[f'B{count}'] = label_ik
+                                ws[f'B{count}'].style = Label
+
+
+                                intv = ivalue[intk]
+                                if not isinstance(intv,dict):
+
+                                    iivalue = intv
+                                    if iivalue is None:
+                                        iivalue = " "
+                                else:
                                     intv = ivalue[intk]
-                                    if not isinstance(intv,dict):
-                                        
-                                        iivalue = intv
-                                        if iivalue is None:
-                                            iivalue = " "
-                                    else:
-                                        intv = ivalue[intk]
+                                    iivalue = ''
+                                    if 'value' in intv:
+                                        iivalue = intv["value"]
+                                    if iivalue is None:
                                         iivalue = ''
-                                        if 'value' in intv:
-                                            iivalue = intv["value"]
-                                        if iivalue is None:
-                                            iivalue = ''
-
-                                        ws[f'D{count}'] = intv['description']
-                                        ws[f'D{count}'].alignment = alignment_style
-
-                                        
-                                    ws[f'C{count}'] = f'{str(iivalue)}'
-                                    ws[f'C{count}'].font = Font(name='Calibri',size=11,color='3465a4')
-                                    ws[f'C{count}'].alignment = alignment_style
-                                    
-                                    ws.merge_cells(f'A{count}:A{end}')
-                                 
-                                    count +=1
-                                               
-                            else:
 
-                                ws[f'D{count}'] = v['description']
-                                ws[f'D{count}'].alignment = alignment_style
+                                    ws[f'D{count}'] = intv['description']
+                                    ws[f'D{count}'].alignment = alignment_style
 
-                                if label_k == 'Experimental protocol' or label_k == 'Comments':
-                                    position.append(count)
-                                    
-                                if ivalue is None:
-                                    ivalue = ''
 
-                                ws[f'C{count}'] = f'{str(ivalue)}'
+                                ws[f'C{count}'] = f'{str(iivalue)}'
                                 ws[f'C{count}'].font = Font(name='Calibri',size=11,color='3465a4')
                                 ws[f'C{count}'].alignment = alignment_style
 
-                                
-                                count += 1
-            
-            itr = 0
-            for i in position:
-                if itr == 0:    
-                    ws[f'A{1}'] = name[itr]
-                    ws[f"A{1}"].style = Label
-                    ws.merge_cells(f'A{1}:A{i}')
-                else:
-                    ws[f'A{i}'] = name[itr]
-                    ws[f"A{i}"].style = Label
-                    ws.merge_cells(f'A{i}:A{count-1}')
+                                ws.merge_cells(f'A{count}:A{end}')
 
-                itr +=1
+                                count +=1
 
-            try:    
-                wb.save(oname)
-            except:
-                return False, f'error saving document as {oname}'
-            
-            return True, 'OK'
+                        else:
+
+                            ws[f'D{count}'] = v['description']
+                            ws[f'D{count}'].alignment = alignment_style
+
+                            if label_k in ['Experimental protocol', 'Comments']:
+                                position.append(count)
+
+                            if ivalue is None:
+                                ivalue = ''
+
+                            ws[f'C{count}'] = f'{str(ivalue)}'
+                            ws[f'C{count}'].font = Font(name='Calibri',size=11,color='3465a4')
+                            ws[f'C{count}'].alignment = alignment_style
+
+
+                            count += 1
+
+        for itr, i in enumerate(position):
+            if itr == 0:    
+                ws['A1'] = name[itr]
+                ws['A1'].style = Label
+                ws.merge_cells(f'A1:A{i}')
+            else:
+                ws[f'A{i}'] = name[itr]
+                ws[f"A{i}"].style = Label
+                ws.merge_cells(f'A{i}:A{count-1}')
+
+        try:    
+            wb.save(oname)
+        except:
+            return False, f'error saving document as {oname}'
+
+        return True, 'OK'
 
     def dumpWORD (self, oname):
 

From 431bc0007ff30a2b1ced12655d5baeedb8ab0b67 Mon Sep 17 00:00:00 2001
From: Adrian Cabrera <adridominican@gmail.com>
Date: Mon, 7 Mar 2022 17:11:32 +0100
Subject: [PATCH 09/10] implemented execSummary

---
 flame/util/verify.py | 296 +++++++++++++++++++++++++++++--------------
 1 file changed, 200 insertions(+), 96 deletions(-)

diff --git a/flame/util/verify.py b/flame/util/verify.py
index e66a577d..04666f37 100644
--- a/flame/util/verify.py
+++ b/flame/util/verify.py
@@ -24,6 +24,7 @@
 import pickle
 from flame.documentation import Documentation
 import flame.chem.sdfileutils as sdfutils
+from flame.parameters import Parameters
 from flame.util import utils,get_logger 
 import os
 from rdkit import Chem,DataStructs
@@ -40,11 +41,27 @@
     LOG.info('pip install python-decouple')
 
 
+def verify_SDFile_activity(endpoint,version=None):
+    ''' '''
+    success, mols = getActivity(endpoint,version)
+    if not success:
+        return False,{'status':'Aborted','comments':mols}
+
+    mols = [x for x in mols if x['Activity'] is None]
+
+    if mols:
+        result = {'status':'Failed','comments':'The activity must be present in all molecules.','Information':mols}
+    else:
+        result = {'status':'Passed'}
+
+    return True,result
+
 def verify_model(endpoint, version=None):
     ''' '''
     api = connect_api()
     invalid = []
     not_found_list = []
+
     if not isinstance(api, requests.models.Response):
         return False,{'status':'Aborted','comments':'Failed connection to External Service'}
 
@@ -78,101 +95,11 @@ def verify_model(endpoint, version=None):
 
     return True,{'status':'Passed'}
 
-
-def getSmilesByApi(response,name):
-    token = response.json()['access_token']
-    # refresh_token = response.json()['refresh_token']
-    headers = {'Authorization': f'Bearer {token}'}
-    for _ in range(3):
-        # acces to Chemistry Service
-        r = requests.get("https://test.toxhub.etransafe.eu/chemistryservice.kh.svc/v1/name_to_structure",verify=False,params={'name':name}, headers=headers)
-
-        if r.status_code == 200:
-            if 'result' in r.json():
-                return r.json()['result'][0]['smiles']
-
-            print(r.json()['Empty response']+name)
-            return None
-        if r.status_code == 401:
-            print('failed to reconnect')
-
-def connect_api():
-    
-    KC_URL = config('KC_URL')
-    KC_USERNAME = config('KC_USERNAME')
-    PASSWORD = config('PASSWORD')
-    CLIENT_SECRET = config('CLIENT_SECRET')
-
-    #get token
-    payload = f"grant_type=password&client_id=knowledge-hub&client_secret={CLIENT_SECRET}&username={KC_USERNAME}" + \
-              f"&password={PASSWORD}"
-
-    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
-    response = requests.post(
-        f'{KC_URL}/auth/realms/KH/protocol/openid-connect/token',
-        data=payload,
-        headers=headers,
-        verify=False,
-    )
-    if response.status_code != 200:
-        LOG.error(response.status_code)
-        return None 
-
-    LOG.info('Succesfully connection')
-    return response
-
-
-def verify_SDFile_activity(endpoint,version=None):
-    ''' '''
-    # I check that the model label exists only in the first function of the verification process.
-    # to avoid rechecking it in the following steps
-    if endpoint is None:
-        return False, {'status':'Aborted','comments':'Empty model label'}
-
-    param = None
-    meta_path = utils.model_path(endpoint, version)
-    parameters_file_name = os.path.join(meta_path, 'parameters.yaml')
-    ifile = os.path.join(meta_path, 'training_series')
-    with open(parameters_file_name, 'r') as pfile:
-                param = yaml.safe_load(pfile)
-
-    # Initiate a RDKit SDFile iterator to process the molecules one by one
-    suppl = Chem.SDMolSupplier(ifile,sanitize=True)
-    
-    # check if the activity label is defined
-    if param['SDFile_activity']['value'] is  None:
-        return False,{'status':'Aborted','comments':'The activity field is not specified'}
-
-    # Iterate for every molecule inside the SDFile
-    bio = None
-    obj_num = 0
-    list_mol_names = []
-    for mol in suppl:
-        if mol is None:
-            LOG.error(f'(@extractInformaton) Unable to process molecule #{obj_num+1}'
-                    f' in file {ifile}')
-            continue
-
-        # extract the molecule name, using a sdfileutils algorithm
-        name = sdfutils.getName(
-        mol, count=obj_num, field=param['SDFile_name']['value'])
-        # extract biological information (activity)
-        bio = sdfutils.getVal(mol, param['SDFile_activity']['value'])
-        
-        if bio is None:
-            list_mol_names.append(name)
-    
-        obj_num +=1
-
-    if list_mol_names:
-        result = {'status':'Failed','comments':'The activity must be present in all molecules.','Information':list_mol_names}
-    else:
-        result = {'status':'Passed','comments':'','Information':list_mol_names}
-
-    return True,result  
-
 def verify_library(endpoint, version=None):
-    ''''''
+    '''
+    Check that the current libraries are the same
+    as those with which the model was created.
+    '''
     param = None
     meta_path = utils.model_path(endpoint, version)
     parameters_file_name = os.path.join(meta_path, 'parameters.yaml')
@@ -205,6 +132,18 @@ def verify_library(endpoint, version=None):
     else:
          return True,{'status':'Passed'}
 
+def predict_train_series(endpoint, version=None):
+    '''
+    Predict training_series and compare if the model quality 
+    results are the same as the fitting.
+    '''
+    return True,None
+
+def predict_benchmarking_dataset():
+    '''
+    Prediction of a benchmarking dataset
+    '''
+    return True,None
 
 def verify_documentation (endpoint, version=None):
     '''
@@ -228,9 +167,80 @@ def verify_documentation (endpoint, version=None):
     
     return True,result
 
-def verify (endpoint, version=None):
+def verify_ExecSummary(endpoint,version=None):
+    '''
+    Collects the fields required to generate the summary.
+    '''
     result = {}
+    doc = Documentation(endpoint,version)
+    param = Parameters()
+    meta_path = utils.model_path(endpoint, version)
+    param_file_name = os.path.join(meta_path, 'parameters.yaml')
+
+    try:
+        with open(param_file_name, 'r') as pfile:
+            param.p = yaml.safe_load(pfile)
+    except Exception as e:
+        return False, {'status':'Aborted','comments':e}
+
+    success, mols = getActivity(endpoint,version)
+    activity = [x['Activity'] for x in mols if x['Activity'] is not None]
+
+    if not success:
+        return False,{'status':'Aborted','comments':mols}
+
+    # get dictionaries
+    algorithm_dict = doc.getDict('Algorithm')
+    descriptors_dict = doc.getDict('Descriptors')
+    
+    #section title
+    model_type,date = algorithm_dict['type'],doc.getVal('Date')
+    result['title'] = f'{endpoint} prediction based on a 3D {model_type} model. {date}'
+    
+    #section Interpretation
+    result['Interpretation'] = doc.getVal('Interpretation')
+
+    #Methodology
+    algorithm  = algorithm_dict['algorithm']
+    descriptors = ",".join(descriptors_dict['descriptors'])
+
+    #extra information in Methodology section
+    selection_method = descriptors_dict['selection_method']
+    scaling = descriptors_dict['scaling']
+    if all([scaling,selection_method]):
+        result['Methodology'] = f'A {model_type} model, was built using {algorithm} method and {descriptors} molecular descriptors [with {selection_method}][scaled using {scaling}]'
+
+    result['Methodology'] = f'A {model_type} model, was built using {algorithm} method and {descriptors} molecular descriptors.'
+
+    training_set_size = doc.getVal('Data_info')['training_set_size']['value']
+    #section Val.Internal quantitative model
+    if param.getVal('quantitative')['value']:
+        r2 = doc.getVal('Goodness_of_fit_statistics')['R2']
+        q2 = doc.getVal('Internal_validation_1')['Q2']
+        sdep = doc.getVal('Internal_validation_1')['SDEP']
+        min_activity = round(min(activity),2)
+        max_activity = round(max(activity),2)
+        avg = round(sum(activity)/len(activity),2)
+
+        result['Val_internal'] = f'r2 {r2}, q2 {q2}, SDEP {sdep}'
+        result['Training_set'] = f'{training_set_size} compounds (min. {min_activity},max. {max_activity} average:{avg})'
+        
+    else:
+        #section Val.Internal qualitative model
+        Sensitivity = doc.getVal('Internal_validation_1')['Sensitivity']
+        Specificity = doc.getVal('Internal_validation_1')['Specificity']
+        MCC = doc.getVal('Internal_validation_1')['MCC']
+        # Activity percentage
+        neg = round((len([x for x in activity if x <= 0]) / training_set_size) * 100)
+        pos = round(100 - neg)
 
+        result['Val_internal'] = f'Sensitivity:{Sensitivity}, Specificity:{Specificity}, MCC: {MCC}'
+        result['Training_set'] = f'{training_set_size} compounds ({pos}% positive, {neg}% negative)'
+
+    return True,{'status':'Review','comments':'Pending review','Information':[result]}
+
+def verify (endpoint, version=None):
+    result = {}
     # 1.0 Data checking: activity
     success, result['activity'] = verify_SDFile_activity(endpoint, version)
     if not success:
@@ -260,6 +270,10 @@ def verify (endpoint, version=None):
     success,  result['fields'] = verify_documentation (endpoint, version)
     if not success:
         return False, result
+        
+    success,result['ExecSummary'] = verify_ExecSummary(endpoint, version)
+    if not success:
+        return False, result
     
     # save documentation step
     documentation = {'Documentation': result}
@@ -281,7 +295,7 @@ def verify (endpoint, version=None):
 
 def get_verification(endpoint,version):
     '''
-    Retrieves the model verification if it exists
+    Retrieves the model verification
     '''
     verification = False
     meta_path = utils.model_path(endpoint, version)
@@ -295,6 +309,96 @@ def get_verification(endpoint,version):
 
     return False
 
+def getActivity(endpoint, version=None):
+    '''
+    Return the list of molecules with their activity
+    '''
+    # I check that the model label exists only in the first function of the verification process.
+    # to avoid rechecking it in the following steps
+    if endpoint is None:
+        return False, 'Empty model label'
+    
+    param = None
+    meta_path = utils.model_path(endpoint, version)
+    param_file_name = os.path.join(meta_path, 'parameters.yaml')
+    ifile = os.path.join(meta_path,'training_series')
+    with open(param_file_name,'r') as pfile:
+        param = yaml.safe_load(pfile)
+    
+    # Initiate a RDKit SDFile iterator to process the molecules one by one
+    suppl = Chem.SDMolSupplier(ifile,sanitize=True)
+
+    # check if the activity label is defined
+    if param['SDFile_activity']['value'] is None:
+        return False,'The activity field is not specified'
+    
+    # Iterate for every molecule inside the SDFile
+    bio = None
+    obj_num = 0
+    result = []
+
+    for mol in suppl:
+        if mol is None:
+            LOG.error(f'(@extractInformaton) Unable to process molecule #{obj_num+1}'
+                    f' in file {ifile}')
+            continue
+
+        # extract the molecule name, using a sdfileutils algorithm
+        name = sdfutils.getName(
+        mol,count=obj_num, field=param['SDFile_name']['value'])
+        # extract biological information (Activity)
+        bio = sdfutils.getVal(mol,param['SDFile_activity']['value'])
+        result.append({
+            'name':name,
+            'Activity':bio
+        })
+
+        obj_num +=1
+
+    return True,result
+
+def getSmilesByApi(response,name):
+    token = response.json()['access_token']
+    # refresh_token = response.json()['refresh_token']
+    headers = {'Authorization': f'Bearer {token}'}
+    for _ in range(3):
+        # acces to Chemistry Service
+        r = requests.get("https://test.toxhub.etransafe.eu/chemistryservice.kh.svc/v1/name_to_structure",verify=False,params={'name':name}, headers=headers)
+
+        if r.status_code == 200:
+            if 'result' in r.json():
+                return r.json()['result'][0]['smiles']
+
+            print(r.json()['Empty response']+name)
+            return None
+        if r.status_code == 401:
+            print('failed to reconnect')
+
+def connect_api():
+    
+    KC_URL = config('KC_URL')
+    KC_USERNAME = config('KC_USERNAME')
+    PASSWORD = config('PASSWORD')
+    CLIENT_SECRET = config('CLIENT_SECRET')
+
+    #get token
+    payload = f"grant_type=password&client_id=knowledge-hub&client_secret={CLIENT_SECRET}&username={KC_USERNAME}" + \
+              f"&password={PASSWORD}"
+
+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
+    response = requests.post(
+        f'{KC_URL}/auth/realms/KH/protocol/openid-connect/token',
+        data=payload,
+        headers=headers,
+        verify=False,
+    )
+    if response.status_code != 200:
+        LOG.error(response.status_code)
+        return None 
+
+    LOG.info('Succesfully connection')
+    return response
+
 #pending changes: improve scalability
 #currently it is only useful for the first step of verification.
 # def show_result(result):

From 0ee729eccbe0cb302832f0b472cdd9b5716ddd26 Mon Sep 17 00:00:00 2001
From: AdrianCabreraPhi <adridominican@gmail.com>
Date: Fri, 22 Apr 2022 08:53:03 +0200
Subject: [PATCH 10/10] improve quality code: empty_fields fun

---
 flame/documentation.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/flame/documentation.py b/flame/documentation.py
index 2084316f..2e2c392d 100644
--- a/flame/documentation.py
+++ b/flame/documentation.py
@@ -409,7 +409,6 @@ def dumpExcel(self,oname):
 
         from openpyxl import Workbook
         from openpyxl.styles import Font,NamedStyle,Alignment
-        # from openpyxl.comments import Comment
 
         wb = Workbook()
         ws = wb.active
@@ -1054,19 +1053,19 @@ def empty_fields(self):
                         intv = ivalue[intk]
                         if not isinstance(intv,dict):
                             iivalue = intv
-                            if iivalue is None or len(str(iivalue)) is 0:
+                            if iivalue is None or not str(iivalue):
                                 emptyfields.append(intk)
 
                         else:
                             intv = ivalue[intk]
                             iivalue = ''
-                            if intv["value"] is None or len(str(intv["value"])) is 0:
+                            if intv["value"] is None or not str(intv["value"]):
                                 emptyfields.append(intk)
-                  
+
                 else:
-                     if ivalue is None or len(str(ivalue)) is 0:
+                    if ivalue is None or not str(ivalue):
                         emptyfields.append(ik)
-                        
+
         return emptyfields
     
     def get_smiles(self):
@@ -1092,7 +1091,7 @@ def autocomplete_documentation(self):
 
         if not self.fields['Date']['value']:
             self.fields['Date']['value'] = today
-        
+
         if not self.fields['Date_of_QMRF']['value']:
             self.fields['Date_of_QMRF']['value'] = today
 
@@ -1108,10 +1107,7 @@ def autocomplete_documentation(self):
         fieldsapplysoftware = ['model','descriptors','applicability_domain']
 
         for field in fieldsapplysoftware:
-            if field == 'applicability_domain':
-                if self.parameters.getVal('conformal'):
-                    self.fields['Software']['value'][field]['value'] = software
-            else:
+            if field == 'applicability_domain' and self.parameters.getVal('conformal') or field != 'applicability_domain':
                 self.fields['Software']['value'][field]['value'] = software