Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 16 additions & 28 deletions kopt/hyopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import kopt.eval_metrics as ce
from kopt.utils import write_json, merge_dicts, _to_string
from kopt.model_data import (subset, split_train_test_idx, split_KFold_idx)
from kopt.config import db_host, db_port, save_dir
from datetime import datetime, timedelta
from uuid import uuid4
from hyperopt import STATUS_OK
Expand All @@ -21,17 +20,18 @@
import glob
import pprint
import logging
import matplotlib.pyplot as plt


logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s')
logger = logging.getLogger()
logger.setLevel(logging.INFO)


def test_fn(fn, hyper_params, n_train=1000, save_model='best', tmp_dir="/tmp/kopt_test/", custom_objects=None):

def test_fn(fn, hyper_params, n_train=1000, save_model=None, tmp_dir="/tmp/kopt_test/", custom_objects=None):
"""Test the correctness of the compiled objective function (CompileFN). I will also test
model saving/loading from disk.

# Arguments
fn: CompileFN instance
hyper_params: pyll graph of hyper-parameters - as later provided to `hyperopt.fmin`
Expand All @@ -48,7 +48,7 @@ def new_data_fn(*args, **kwargs):
data = data_fn(*args, **kwargs)
train = data[0]
train = subset(train, idx=np.arange(min(n_train, train[1].shape[0])))
return train,
return train, data[1], data[2]
return new_data_fn
start_time = datetime.now()
fn = deepcopy(fn)
Expand Down Expand Up @@ -80,7 +80,6 @@ def new_data_fn(*args, **kwargs):

class KMongoTrials(MongoTrials):
"""`hyperopt.MonoTrials` extended with the following methods:

- get_trial(tid) - Retrieve trial by tid (Trial ID).
- get_param(tid) - Retrieve used hyper-parameters for a trial.
- best_trial_tid(rank=0) - Return the trial with lowest loss.
Expand All @@ -95,15 +94,13 @@ class KMongoTrials(MongoTrials):
- get_ok_results - Return a list of trial results with an "ok" status
- load_model(tid) - Load a Keras model of a tid.
- as_df - Returns a tidy `pandas.DataFrame` of the trials database.

# Arguments
db_name: str, MongoTrials database name
exp_name: strm, MongoTrials experiment name
ip: str, MongoDB IP address.
port: int, MongoDB port.
kill_timeout: int, Maximum runtime of a job (in seconds) before it gets killed. None for infinite.
**kwargs: Additional keyword arguments passed to the `hyperopt.MongoTrials` constructor.

"""

def __init__(self, db_name, exp_name,
Expand All @@ -128,7 +125,6 @@ def get_param(self, tid):

def best_trial_tid(self, rank=0):
"""Get tid of the best trial

rank=0 means the best model
rank=1 means second best
...
Expand Down Expand Up @@ -178,7 +174,6 @@ def count_by_state_unsynced(self, arg):

def delete_running(self, timeout_last_refresh=0, dry_run=False):
"""Delete jobs stalled in the running state for too long

timeout_last_refresh, int: number of seconds
"""
running_all = self.handle.jobs_running()
Expand Down Expand Up @@ -246,8 +241,6 @@ def plot_history(self, tid, scores=["loss", "f1", "accuracy"],
figsize=(15, 3)):
"""Plot the loss curves"""
history = self.train_history(tid)
import matplotlib.pyplot as plt

fig = plt.figure(figsize=figsize)
for i, score in enumerate(scores):
plt.subplot(1, len(scores), i + 1)
Expand All @@ -262,9 +255,7 @@ def plot_history(self, tid, scores=["loss", "f1", "accuracy"],

def load_model(self, tid, custom_objects=None):
"""Load saved keras model of the trial.

If tid = None, get the best model

Not applicable for trials ran in cross validion (i.e. not applicable
for `CompileFN.cv_n_folds is None`
"""
Expand Down Expand Up @@ -331,12 +322,10 @@ def add_n_epoch(df):


# --------------------------------------------
# TODO - put to a separate module
def _train_and_eval_single(train, valid, model,
def _train_and_eval_single(train, valid, test, model,
batch_size=32, epochs=300, use_weight=False,
callbacks=[], eval_best=False, add_eval_metrics={}, custom_objects=None):
"""Fit and evaluate a keras model

eval_best: if True, load the checkpointed model for evaluation
"""
def _format_keras_history(history):
Expand All @@ -358,7 +347,7 @@ def _format_keras_history(history):
epochs=epochs,
sample_weight=sample_weight,
verbose=2,
callbacks=[history] + callbacks)
callbacks=[history])# + callbacks) <------------------------------ #TODO: make early stopping optional

# get history
hist = _format_keras_history(history)
Expand All @@ -368,28 +357,25 @@ def _format_keras_history(history):
assert len(mcp) == 1
model = load_model(mcp[0].filepath, custom_objects=custom_objects)

return eval_model(model, valid, add_eval_metrics), hist
return eval_model(model, valid, test, add_eval_metrics), hist


def eval_model(model, test, add_eval_metrics={}):
def eval_model(model, valid, test, add_eval_metrics={}):
"""Evaluate model's performance on the test-set.

# Arguments
model: Keras model
test: test-dataset. Tuple of inputs `x` and target `y` - `(x, y)`.
add_eval_metrics: Additional evaluation metrics to use. Can be a dictionary or a list of functions
accepting arguments: `y_true`, `y_predicted`. Alternatively, you can provide names of functions from
the `kopt.eval_metrics` module.

# Returns
dictionary with evaluation metrics

"""
# evaluate the model
logger.info("Evaluate...")
# - model_metrics
model_metrics_values = model.evaluate(test[0], test[1], verbose=0,
batch_size=test[1].shape[0])
model_metrics_values = model.evaluate(valid[0], valid[1], verbose=0,
batch_size=valid[1].shape[0])
# evaluation is done in a single pass to have more precise metics
model_metrics = dict(zip(_listify(model.metrics_names),
_listify(model_metrics_values)))
Expand Down Expand Up @@ -424,11 +410,9 @@ def get_data(data_fn, param):

class CompileFN():
"""Compile an objective function that

- trains the model on the training set
- evaluates the model on the validation set
- reports the performance metric on the validation set as the objective loss

# Arguments
db_name: Database name of the KMongoTrials.
exp_name: Experiment name of the KMongoTrials.
Expand Down Expand Up @@ -462,8 +446,6 @@ class CompileFN():
if save_model="last", save the model after training it.
save_results: If True, the return value is saved as .json to the `save_dir` directory.
save_dir: Path to the save directory.
custom_objects: argument passed to load_model - Optional dictionary mapping names (strings) to
custom classes or functions to be considered during deserialization.
"""
# TODO - check if we can get (db_name, exp_name) from hyperopt

Expand Down Expand Up @@ -596,6 +578,9 @@ def __call__(self, param):
train = data[0]
if self.cv_n_folds is None and self.valid_split is None:
valid_data = data[1]
test = data[2]
else:
test = data[1]
del data
time_data_loaded = datetime.now()

Expand All @@ -622,6 +607,7 @@ def __call__(self, param):
save_best_only=True)]
eval_metrics, history = _train_and_eval_single(train=train_data,
valid=valid_data,
test=test,
model=model,
epochs=param["fit"]["epochs"],
batch_size=param["fit"]["batch_size"],
Expand Down Expand Up @@ -651,6 +637,7 @@ def __call__(self, param):
save_best_only=True)]
eval_m, history_elem = _train_and_eval_single(train=subset(train, train_idx),
valid=subset(train, valid_idx),
test=test,
model=model,
epochs=param["fit"]["epochs"],
batch_size=param["fit"]["batch_size"],
Expand Down Expand Up @@ -784,3 +771,4 @@ def to_str(v):
return str(v)

return ";".join([k + "=" + to_str(v) for k, v in d.items()])