diff --git a/environment.yml b/environment.yml index 8de6ee8e..32acc14a 100644 --- a/environment.yml +++ b/environment.yml @@ -4,6 +4,7 @@ channels: - rdkit - conda-forge - chembl + - anaconda dependencies: - pip - python=3.6 @@ -11,6 +12,7 @@ dependencies: - scikit-learn=>0.19.1 - requests - rdkit + - keras - pyyaml - pytest - psycopg2 diff --git a/environment_gpu.yml b/environment_gpu.yml new file mode 100644 index 00000000..bff84d92 --- /dev/null +++ b/environment_gpu.yml @@ -0,0 +1,28 @@ +name: flame_gpu +channels: + - default + - rdkit + - conda-forge + - chembl + - anaconda +dependencies: + - pip + - python=3.6 + - setuptools=>38.4.0 + - scikit-learn=>0.19.1 + - requests + - rdkit + - matplotlib + - keras-gpu + - pyyaml + - pytest + - psycopg2 + - chembl_structure_pipeline + - pip: + - xgboost + - django==2.2.8 + - djangorestframework + - django-cors-headers + - djangorestframework-yaml + - "https://github.com/phi-grib/standardiser/archive/master.zip" + - "https://github.com/josecarlosgomezt/nonconformist/archive/master.zip" \ No newline at end of file diff --git a/flame/apply.py b/flame/apply.py index 3b127aad..bee3014f 100644 --- a/flame/apply.py +++ b/flame/apply.py @@ -32,6 +32,7 @@ from flame.stats.PLSDA import PLSDA from flame.stats.combo import median, mean, majority, matrix from flame.stats.XGboost import XGBOOST +from flame.stats.Keras import Keras_nn from sklearn.metrics import mean_squared_error, matthews_corrcoef as mcc from sklearn.metrics import f1_score from sklearn.metrics import make_scorer @@ -57,6 +58,7 @@ def __init__(self, parameters, conveyor): ('GNB', GNB), ('PLSR', PLSR), ('PLSDA', PLSDA), + ('Keras', Keras_nn), ('median', median), ('mean', mean), ('majority', majority), diff --git a/flame/children/my_keras.py b/flame/children/my_keras.py new file mode 100644 index 00000000..e1846d77 --- /dev/null +++ b/flame/children/my_keras.py @@ -0,0 +1,44 @@ +#! -*- coding: utf-8 -*- + +# Description Flame Apply internal class +## +# Authors: Manuel Pastor (manuel.pastor@upf.edu) +## +# Copyright 2018 Manuel Pastor +## +# This file is part of Flame +## +# Flame is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation version 3. +## +# Flame is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +## +# You should have received a copy of the GNU General Public License +# along with Flame. If not, see . + +import numpy as np +from flame.stats.Keras import Keras_nn + +class my_keras(Keras_nn): + def __init__(self, X, Y, parameters, conveyor): + Keras_nn.__init__(self, X, Y, parameters, conveyor) + +# Function to create model, required for KerasClassifier + def create_model(self, dim=20): + # create model + model = Sequential() + model.add(Dense(10, input_dim=dim, activation='relu')) + model.add(Dense(20, activation='sigmoid')) + model.add(Dense(1, activation='sigmoid')) + # Compile model + + if self.param.getVal('quantitative'): + loss = 'mean_squared_error' + else: + loss = 'binary_crossentropy' + model.compile(loss=loss, optimizer='adam', metrics=['accuracy']) + return model \ No newline at end of file diff --git a/flame/children/parameters.yaml b/flame/children/parameters.yaml index 77a4cf2d..fcd8a58e 100644 --- a/flame/children/parameters.yaml +++ b/flame/children/parameters.yaml @@ -1067,6 +1067,59 @@ XGBOOST_optimize: comments: group: modeling +Keras_parameters: + advanced: advanced + object_type: dictionary + writable: false + options: null + value: + epochs: + object_type: list + writable: false + value: 100 + options: + - 100 + - 200 + - 250 + description: Number of epochs + batch_size: + object_type: float + writable: true + value: 36 + options: + - 1 + - 10 + - 30 + description: Batch size for model Keras batch learning + description: + dependencies: + model: SKLEARN + comments: + group: modeling + +Keras_optimize: + advanced: advanced + object_type: dictionary + writable: false + options: null + value: + epoch: + object_type: list + writable: false + value: + - 50 + - 100 + - 150 + options: + - 50 + - 100 + - 150 + description: + description: Keras optimize parameters + dependencies: + model: Keras + comments: + group: modeling output_format: advanced: regular diff --git a/flame/learn.py b/flame/learn.py index 7fe9101c..0e061324 100644 --- a/flame/learn.py +++ b/flame/learn.py @@ -24,6 +24,7 @@ import os import pickle import numpy as np +import time from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import StandardScaler @@ -36,6 +37,7 @@ from flame.stats.PLSDA import PLSDA from flame.stats import feature_selection from flame.stats.XGboost import XGBOOST +from flame.stats.Keras import Keras_nn from flame.stats.combo import median, mean, majority, matrix from flame.stats.imbalance import run_imbalance @@ -65,6 +67,7 @@ def __init__(self, parameters, conveyor): ('GNB', GNB), ('PLSR', PLSR), ('PLSDA', PLSDA), + ('Keras', Keras_nn), ('median', median), ('mean', mean), ('majority', majority), @@ -397,6 +400,8 @@ def run(self): ''' Builds the model using the appropriate toolkit (internal or custom). ''' + # Count the time + start = time.time() toolkit = self.param.getVal('modelingToolkit') @@ -411,5 +416,10 @@ def run(self): LOG.error("Modeling toolkit is not yet supported") self.conveyor.setError( 'modeling Toolkit ' + \ toolkit+' is not supported yet') - + + end = time.time() + hours, rem = divmod(end-start, 3600) + minutes, seconds = divmod(rem, 60) + LOG.info("{:0>2}:{:0>2}:{:05.2f}".format(int(hours), + int(minutes),seconds)) return diff --git a/flame/stats/Keras.py b/flame/stats/Keras.py index 41216ef2..3cd49c60 100644 --- a/flame/stats/Keras.py +++ b/flame/stats/Keras.py @@ -35,6 +35,7 @@ from keras.models import Sequential from keras.layers import Dense from sklearn.base import clone +import keras import numpy as np @@ -140,7 +141,7 @@ def build(self): **self.estimator_parameters, verbose=0) results.append(('model', 'model type', 'Keras quantitative')) else: - + print(self.estimator_parameters) LOG.info("Building Qualitative Keras model") self.estimator = KerasClassifier(build_fn=self.create_model, dim=self.X.shape[1], **self.estimator_parameters, verbose=0) @@ -209,13 +210,13 @@ def build(self): raise e return False, f'Exception building conformal Keras estimator with exception {e}' - return True, [] + return True, results # Function to create model, required for KerasClassifier - def create_model(self, dim=20): + def create_model(self, dim=25): # create model model = Sequential() - model.add(Dense(10, input_dim=dim, activation='relu')) + model.add(Dense(30, input_dim=dim, activation='relu')) model.add(Dense(20, activation='sigmoid')) model.add(Dense(1, activation='sigmoid')) # Compile model @@ -224,7 +225,15 @@ def create_model(self, dim=20): #loss = 'mean_squared_error' #else: loss = 'binary_crossentropy' - model.compile(loss=loss, optimizer='adam', metrics=['accuracy']) + optimizer = keras.optimizers.Adam(lr=0.1) + model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) + # model.compile( + # optimizer=keras.optimizers.Adam( + # hp.Choice('learning_rate', + # values=[1e-2, 1e-3, 1e-4])), + # loss='sparse_categorical_crossentropy', + # metrics=['accuracy']) + return model # Overrides regular project to single class prediction diff --git a/flame/stats/base_model.py b/flame/stats/base_model.py index e32788f5..343d1f34 100644 --- a/flame/stats/base_model.py +++ b/flame/stats/base_model.py @@ -146,6 +146,15 @@ def __init__(self, X, Y, parameters, conveyor=None): self.Y = Y self.nobj, self.nvarx = np.shape(X) + # Check if model is a keras model and + # adjust n_jobs=1 if so + if self.param.getVal('model') == 'my_keras'\ + or self.param.getVal('model') == 'Keras': + self.n_jobs = 1 + else: + self.n_jobs = -1 + + # Get cross-validator # Consider to include a Random Seed for cross-validator if self.param.getVal('ModelValidationCV'): @@ -793,6 +802,7 @@ def quantitativeValidation(self): return False, f'Error computing goodness of fit with exception: {e}' # Compute Cross-validation quality metrics + try: # Get predicted Y # y_pred = cross_val_predict(copy.copy(self.estimator), copy.copy(X), copy.copy(Y), cv=self.cv, n_jobs=1) @@ -825,6 +835,8 @@ def quantitativeValidation(self): results ['Y_pred'] = y_pred return True, results + + def qualitativeValidation(self): ''' performs validation for qualitative models ''' @@ -864,7 +876,8 @@ def qualitativeValidation(self): # Get cross-validated Y try: - y_pred = cross_val_predict(self.estimator, X, Y, cv=self.cv, n_jobs=-1) + y_pred = cross_val_predict(self.estimator, X, Y, cv=self.cv, + n_jobs=self.n_jobs) except Exception as e: return False, f'Cross-validation failed with exception: {e}' @@ -1147,6 +1160,8 @@ def save_model(self): params = self.estimator_temp.get_params() else: params = self.estimator.get_params() + if self.param.getVal('model') == "Keras": + params = ["Keras sequential model"] self.conveyor.addVal(params, 'estimator_parameters', 'estimator parameters', 'method', 'single', @@ -1180,4 +1195,4 @@ def load_model(self): if self.estimator is None: return False, 'No valid model estimator found. Try to rebuild the model' - return True, 'model loaded' \ No newline at end of file + return True, 'model loaded'