From 1ed4cad50d48202a3f74b8c9d32a722c56fb3b30 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 23 Jul 2025 18:02:12 +0000 Subject: [PATCH] Bug: Precision in transform --- encexp/__init__.py | 2 +- encexp/tests/test_text_repr.py | 312 +------------------------ encexp/text_repr.py | 403 +-------------------------------- 3 files changed, 9 insertions(+), 708 deletions(-) diff --git a/encexp/__init__.py b/encexp/__init__.py index 542790e..8df4d85 100644 --- a/encexp/__init__.py +++ b/encexp/__init__.py @@ -17,4 +17,4 @@ if not '-m' in sys.argv: from encexp.text_repr import EncExpT, SeqTM, TextModel -__version__ = "0.1.6" +__version__ = "0.1.7" diff --git a/encexp/tests/test_text_repr.py b/encexp/tests/test_text_repr.py index 1b9061f..d54c5a6 100644 --- a/encexp/tests/test_text_repr.py +++ b/encexp/tests/test_text_repr.py @@ -235,309 +235,9 @@ def test_TextModel_diac(): assert len(lst) > 3 - -# def test_EncExp_filename(): -# """Test EncExp""" -# if not isfile('encexp-es-mx.json.gz'): -# samples() -# data = compute_b4msa_vocabulary('es-mx-sample.json') -# voc = compute_seqtm_vocabulary(SeqTM, data, -# 'es-mx-sample.json', -# voc_size_exponent=10) -# build_encexp(voc, 'es-mx-sample.json', 'encexp-es-mx.json.gz') -# enc = EncExp(EncExp_filename='encexp-es-mx.json.gz') -# assert enc.weights.dtype == np.float32 -# assert len(enc.names) == 12 -# os.unlink('encexp-es-mx.json.gz') - - -# def test_EncExp(): -# """Test EncExp""" -# enc = EncExp(precision=np.float16) -# assert enc.weights.dtype == np.float16 -# assert len(enc.names) == 8192 - - -# def test_EncExp_encode(): -# """Test EncExp encode""" - -# dense = EncExp(precision=np.float16) -# assert dense.encode('buenos días').shape[1] == 2 - - -# def test_EncExp_transform(): -# """Test EncExp transform""" - -# encexp = EncExp() -# X = encexp.transform(['buenos dias']) -# assert X.shape[0] == 1 -# assert X.shape[1] == 8192 -# assert X.dtype == np.float32 - - -# def test_EncExp_prefix_suffix(): -# """Test EncExp prefix/suffix""" - -# encexp = EncExp(lang='es', -# precision=np.float16, -# prefix_suffix=True) -# for k in encexp.bow.names: -# if k[:2] != 'q:': -# continue -# if len(k) >= 6: -# continue -# assert k[3] == '~' or k[-1] == '~' - - -# def test_EncExp_fit(): -# """Test EncExp fit""" -# from sklearn.svm import LinearSVC -# samples() -# mx = list(tweet_iterator('es-mx-sample.json')) -# samples(filename='es-ar-sample.json.zip') -# ar = list(tweet_iterator('es-ar-sample.json')) -# y = ['mx'] * len(mx) -# y += ['ar'] * len(ar) -# enc = EncExp(lang='es', -# prefix_suffix=True, -# precision=np.float16).fit(mx + ar, y) -# assert isinstance(enc.estimator, LinearSVC) -# hy = enc.predict(ar) -# assert hy.shape[0] == len(ar) -# df = enc.decision_function(ar) -# assert df.shape[0] == len(ar) -# assert df.dtype == np.float64 - - -# def test_EncExp_fit_sgd(): -# """Test EncExp fit""" -# from sklearn.linear_model import SGDClassifier -# from itertools import repeat -# samples() -# mx = list(tweet_iterator('es-mx-sample.json')) -# samples(filename='es-ar-sample.json.zip') -# ar = list(tweet_iterator('es-ar-sample.json')) -# y = ['mx'] * len(mx) -# y += ['ar'] * len(ar) -# D = mx + ar -# # while len(D) < 2**17: -# for i in range(5): -# D.extend(D) -# y.extend(y) -# D.append(D[0]) -# y.append(y[0]) -# enc = EncExp(lang='es').fit(D, y) -# assert isinstance(enc.estimator, SGDClassifier) -# hy = enc.predict(ar) -# assert hy.shape[0] == len(ar) -# df = enc.decision_function(ar) -# assert df.shape[0] == len(ar) -# assert df.dtype == np.float64 - - -# def test_EncExp_train_predict_decision_function(): -# """Test EncExp train_predict_decision_function""" -# samples() -# mx = list(tweet_iterator('es-mx-sample.json')) -# samples(filename='es-ar-sample.json.zip') -# ar = list(tweet_iterator('es-ar-sample.json')) -# samples(filename='es-es-sample.json.zip') -# es = list(tweet_iterator('es-es-sample.json')) -# y = ['mx'] * len(mx) -# y += ['ar'] * len(ar) -# enc = EncExp(lang='es', -# prefix_suffix=True, -# precision=np.float16) -# hy = enc.train_predict_decision_function(mx + ar, y) -# assert hy.ndim == 2 and hy.shape[0] == len(y) and hy.shape[1] == 1 -# y += ['es'] * len(es) -# hy = enc.train_predict_decision_function(mx + ar + es, y) -# assert hy.shape[1] == 3 and hy.shape[0] == len(y) - - -# def test_EncExp_clone(): -# """Test EncExp clone""" - -# enc = EncExp(lang='es', prefix_suffix=True, -# precision=np.float16) -# enc2 = clone(enc) -# assert isinstance(enc2, EncExp) -# assert np.all(enc2.weights == enc.weights) - - -# def test_EncExp_merge_IDF(): -# """Test EncExp without keyword's weight""" - -# enc = EncExp(lang='es', prefix_suffix=True, -# precision=np.float16, merge_IDF=False, -# force_token=False) -# enc.fill(inplace=True) - -# for k, v in enc.bow.token2id.items(): -# assert enc.weights[v, v] == 0 -# enc2 = EncExp(lang='es', prefix_suffix=True, -# precision=np.float16, merge_IDF=True, -# force_token=False) -# enc2.fill(inplace=True) -# _ = (enc.weights * enc.bow.weights).astype(enc.precision) -# assert_almost_equal(_, enc2.weights, decimal=5) - - -# def test_EncExp_fill(): -# """Test EncExp fill weights""" -# from encexp.download import download_seqtm - -# voc = download_seqtm(lang='es') -# samples() -# if not isfile('encexp-es-mx.json.gz'): -# build_encexp(voc, 'es-mx-sample.json', 'encexp-es-mx.json.gz', -# min_pos=64) -# enc = EncExp(EncExp_filename='encexp-es-mx.json.gz') -# iden = {v:k for k, v in enumerate(enc.bow.names)} -# comp = [x for x in enc.bow.names if x not in enc.names] -# key = enc.names[0] -# enc.weights -# w = enc.fill() -# assert np.any(w[iden[key]] != 0) -# assert_almost_equal(w[iden[comp[0]]], 0) -# os.unlink('encexp-es-mx.json.gz') -# assert np.all(enc.names == enc.bow.names) - - -# def test_EncExp_iadd(): -# """Test EncExp iadd""" - -# from encexp.download import download_seqtm - -# voc = download_seqtm(lang='es') -# samples() -# if not isfile('encexp-es-mx.json.gz'): -# build_encexp(voc, 'es-mx-sample.json', 'encexp-es-mx.json.gz', -# min_pos=64) -# enc = EncExp(EncExp_filename='encexp-es-mx.json.gz') -# w = enc.weights -# enc += enc -# assert_almost_equal(w, enc.weights, decimal=4) -# os.unlink('encexp-es-mx.json.gz') -# enc2 = EncExp(lang='es', voc_source='noGeo') -# enc2 += enc -# enc2 = EncExp(lang='es', voc_source='noGeo') -# r = enc2 + enc2 -# r.weights[:, :] = 0 -# assert enc2.weights[0, 0] != 0 - - -# def test_EncExp_force_tokens(): -# """Test force tokens""" - -# enc = EncExp(lang='es', prefix_suffix=True, -# precision=np.float16, -# force_token=False) -# w = enc.weights -# _max = w.max(axis=1) -# rows = np.arange(len(enc.names)) -# cols = np.array([enc.bow.token2id[x] for x in enc.names]) -# assert_almost_equal(w[rows, cols], 0) -# enc = EncExp(lang='es', prefix_suffix=True, -# precision=np.float16, -# force_token=True) -# w[rows, cols] = _max -# assert_almost_equal(enc.weights, w) -# enc = EncExp(lang='es', prefix_suffix=True, -# precision=np.float16, merge_IDF=False, -# force_token=False) -# assert enc.weights[0, 0] == 0 -# enc.force_tokens_weights(IDF=True) -# enc2 = EncExp(lang='es', prefix_suffix=True, -# precision=np.float16, merge_IDF=False, -# force_token=True) -# assert enc.weights[0, 0] != enc2.weights[0, 0] -# assert_almost_equal(enc.weights[0, 1:], enc2.weights[0, 1:]) - - -# def test_EncExp_enc_training_size(): -# """Test training size of the embeddings""" - -# enc = EncExp(lang='es') -# assert isinstance(enc.enc_training_size, dict) -# for k in enc.enc_training_size: -# assert k in enc.names - - -# def test_EncExp_distance(): -# """Test distance to hyperplane""" - -# txt = 'buenos días' -# enc = EncExp(lang='es', transform_distance=True) -# assert enc.weights_norm.shape[0] == enc.weights.shape[0] -# X = enc.transform([txt]) -# X2 = EncExp(lang='es', -# transform_distance=False).transform([txt]) -# assert np.fabs(X - X2).sum() != 0 - - -# def test_EncExp_unit_vector(): -# """Test distance to hyperplane""" - -# txt = 'buenos días' -# enc = EncExp(lang='es', unit_vector=False) -# X = enc.transform([txt]) -# assert np.linalg.norm(X) != 1 -# enc = EncExp(lang='es') -# X = enc.transform([txt]) -# assert_almost_equal(np.linalg.norm(X), 1) - - -# def test_EncExp_build_tailored(): -# """Test the development of tailored models""" - -# samples() -# mx = list(tweet_iterator('es-mx-sample.json')) -# samples(filename='es-ar-sample.json.zip') -# ar = list(tweet_iterator('es-ar-sample.json')) -# y = ['mx'] * len(mx) -# y += ['ar'] * len(ar) - -# enc = EncExp(lang='es', -# tailored=True) -# w = enc.weights -# enc.build_tailored(mx + ar, load=True) -# assert isfile(enc.tailored) -# assert hasattr(enc, '_tailored_built') -# enc = EncExp(lang='es', -# tailored=enc.tailored).fit(mx + ar, y) -# assert np.fabs(w - enc.weights).sum() != 0 -# enc2 = clone(enc) -# assert hasattr(enc2, '_tailored_built') -# assert hasattr(enc2, '_estimator') -# # os.unlink(enc.tailored) - - -# def test_pipeline_encexp(): -# """Test Pipeline in EncExpT""" -# from sklearn.pipeline import Pipeline -# from sklearn.svm import LinearSVC -# from sklearn.model_selection import GridSearchCV -# from sklearn.model_selection import StratifiedShuffleSplit - -# samples() -# mx = list(tweet_iterator('es-mx-sample.json')) -# samples(filename='es-ar-sample.json.zip') -# ar = list(tweet_iterator('es-ar-sample.json')) -# y = ['mx'] * len(mx) -# y += ['ar'] * len(ar) - -# pipe = Pipeline([('encexp', EncExpT(lang='es')), -# ('cl', LinearSVC(class_weight='balanced'))]) -# params = {'cl__C': [0.01, 0.1, 1, 10], -# 'encexp__voc_source': ['mix', 'noGeo']} -# sss = StratifiedShuffleSplit(random_state=0, -# n_splits=1, -# test_size=0.3) - -# grid = GridSearchCV(pipe, -# param_grid=params, -# cv=sss, -# n_jobs=1, -# scoring='f1_macro').fit(mx + ar, y) -# assert grid.best_score_ > 0.7 +def test_EncExpT_transform_dtype(): + """Test EncExpT transform type""" + enc = EncExpT(lang='es', + token_max_filter=2**13) + X = enc.transform(['buenos dias']) + assert X.dtype == enc.precision \ No newline at end of file diff --git a/encexp/text_repr.py b/encexp/text_repr.py index ce2568f..735e926 100644 --- a/encexp/text_repr.py +++ b/encexp/text_repr.py @@ -545,12 +545,9 @@ def encode(self, text): W = self.weights tfidf = self.seqTM.weights if len(seq) == 0: - return np.ones((1, W.shape[1]), dtype=W.dtype) + return np.ones((1, W.shape[1]), dtype=self.precision) index, tf_ = np.unique(seq, return_counts=True) - # cnt = Counter(seq) - # seq = np.array(list(cnt.keys())) - # tf = np.array([cnt[k] for k in seq]) - tf = tf_ / tf_.sum() + tf = np.divide(tf_, tf_.sum(), dtype=self.precision) _ = tfidf[index] * tf if self.merge_encode: return W[index] * np.c_[_ / norm(_)] @@ -677,399 +674,3 @@ def set_weights(data): except PermissionError: pass return self - - -# @dataclass -# class EncExpT: -# """EncExpT (Encaje Explicable) - -# Represent a text in the embedding using the `transform`method. -# """ -# lang: str='es' -# voc_size_exponent: int=13 -# EncExp_filename: str=None -# precision: np.dtype=np.float32 -# voc_source: str='mix' -# enc_source: str=None -# prefix_suffix: bool=True -# merge_IDF: bool=True -# force_token: bool=True -# intercept: bool=False -# transform_distance: bool=False -# unit_vector: bool=True -# tailored: Union[bool, str]=False -# progress_bar: bool=False - -# def get_params(self, deep=None): -# """Parameters""" -# return dict(lang=self.lang, -# voc_size_exponent=self.voc_size_exponent, -# EncExp_filename=self.EncExp_filename, -# precision=self.precision, -# voc_source=self.voc_source, -# enc_source=self.enc_source, -# prefix_suffix=self.prefix_suffix, -# merge_IDF=self.merge_IDF, -# force_token=self.force_token, -# intercept=self.intercept, -# transform_distance=self.transform_distance, -# unit_vector=self.unit_vector, -# tailored=self.tailored, -# progress_bar=self.progress_bar) - -# def set_params(self, **kwargs): -# """Set the parameters""" -# for key, value in kwargs.items(): -# setattr(self, key, value) - -# def fit(self, D, y=None): -# """Estimate the parameters""" -# if self.tailored is not False: -# self.build_tailored(D, load=True) -# return self - -# def force_tokens_weights(self, IDF: bool=False): -# """Set the maximum weight""" -# # rows = np.arange(len(self.names)) -# rows = np.array([i for i, k in enumerate(self.names) -# if k in self.bow.token2id]) - -# cols = np.array([self.bow.token2id[x] for x in self.names -# if x in self.bow.token2id]) -# if cols.shape[0] == 0: -# return -# if IDF: -# w = self.weights[rows][:, cols] * self.bow.weights[cols] -# _max = (w.max(axis=1) / self.bow.weights[cols]).astype(self.precision) -# else: -# _max = self.weights[rows].max(axis=1) -# self.weights[rows, cols] = _max - -# @property -# def bias(self): -# """Bias / Intercept""" -# try: -# return self._bias -# except AttributeError: -# self.weights -# return self._bias - -# @bias.setter -# def bias(self, value): -# self._bias = value - -# @property -# def weights(self): -# """Weights""" -# try: -# return self._weights -# except AttributeError: -# if self.EncExp_filename is not None: -# data = download_encexp(output=self.EncExp_filename) -# else: -# if self.intercept: -# assert not self.merge_IDF -# data = download_encexp(lang=self.lang, -# voc_size_exponent=self.voc_size_exponent, -# voc_source=self.voc_source, -# enc_source=self.enc_source, -# prefix_suffix=self.prefix_suffix, -# intercept=self.intercept) -# self.bow = SeqTM(vocabulary=data['seqtm']) -# w = self.bow.weights -# weights = [] -# precision = self.precision -# for vec in data['coefs']: -# if not self.merge_IDF: -# coef = vec['coef'] -# else: -# coef = (vec['coef'] * w).astype(precision) -# weights.append(coef) -# self.weights = np.vstack(weights) -# self.bias = np.array([vec['intercept'] for vec in data['coefs']], -# dtype=self.precision) -# self.names = np.array([vec['label'] for vec in data['coefs']]) -# self.enc_training_size = {vec['label']: vec['N'] for vec in data['coefs']} -# if self.force_token: -# self.force_tokens_weights(IDF=self.intercept) -# self.weights = np.asarray(self._weights, order='F') -# return self._weights - -# @property -# def weights_norm(self): -# """Weights norm""" -# try: -# return self._weights_norm -# except AttributeError: -# _ = np.linalg.norm(self.weights, axis=1) -# self._weights_norm = _ -# return self._weights_norm - -# @property -# def enc_training_size(self): -# """Training size of each embedding""" -# try: -# return self._enc_training_size -# except AttributeError: -# self.weights -# return self._enc_training_size - -# @enc_training_size.setter -# def enc_training_size(self, value): -# self._enc_training_size = value - -# @weights.setter -# def weights(self, value): -# self._weights = value - -# @property -# def names(self): -# """Vector space components""" -# try: -# return self._names -# except AttributeError: -# self.weights -# return self._names - -# @names.setter -# def names(self, value): -# self._names = value - -# @property -# def bow(self): -# """BoW""" -# try: -# return self._bow -# except AttributeError: -# self.weights -# return self._bow - -# @bow.setter -# def bow(self, value): -# self._bow = value - -# def encode(self, text): -# """Encode utterace into a matrix""" - -# token2id = self.bow.token2id -# seq = [] -# for token in self.bow.tokenize(text): -# try: -# seq.append(token2id[token]) -# except KeyError: -# continue -# W = self.weights -# if len(seq) == 0: -# return np.ones((W.shape[0], 1), dtype=W.dtype) -# return W[:, seq] - -# def transform(self, texts): -# """Represents the texts into a matrix""" -# if self.intercept: -# X = self.bow.transform(texts) @ self.weights.T + self.bias -# else: -# X = np.r_[[self.encode(data).sum(axis=1) -# for data in progress_bar(texts, total=len(texts), -# desc='Transform', -# use_tqdm=self.progress_bar)]] -# if self.transform_distance: -# X = X / self.weights_norm -# if self.unit_vector: -# _norm = norm(X, axis=1) -# _norm[_norm == 0] = 1 -# return X / np.c_[_norm] -# return X - -# def fill(self, inplace: bool=True, names: list=None): -# """Fill weights with the missing dimensions""" -# weights = self.weights -# if names is None: -# names = self.bow.names -# w = np.zeros((len(names), weights.shape[1]), -# dtype=self.precision) -# iden = {v: k for k, v in enumerate(names)} -# for key, value in zip(self.names, weights): -# w[iden[key]] = value -# if inplace: -# self.weights = w -# self.names = names -# return w - -# def build_tailored(self, data, load=False, **kwargs): -# """Build a tailored model with data""" - -# import os -# from os.path import isfile -# from tempfile import mkstemp -# from json import dumps -# from microtc.utils import tweet_iterator -# from encexp.download import download_seqtm -# from encexp.build_encexp import build_encexp -# if hasattr(self, '_tailored_built'): -# return None - -# get_text = self.bow.get_text -# if isinstance(self.tailored, str) and isfile(self.tailored): -# if load: -# _ = self.__class__(EncExp_filename=self.tailored) -# self.__iadd__(_) -# self._tailored_built = True -# return None -# iden, path = mkstemp() -# with open(iden, 'w', encoding='utf-8') as fpt: -# for d in data: -# print(dumps(dict(text=get_text(d))), file=fpt) -# if isinstance(self.tailored, bool): -# _, self.tailored = mkstemp(suffix='.gz') -# if self.EncExp_filename is not None: -# voc = next(tweet_iterator(self.EncExp_filename)) -# else: -# voc = download_seqtm(self.lang, self.voc_size_exponent, -# voc_source=self.voc_source) -# build_kw = dict(min_pos=16, tokens=self.names) -# build_kw.update(kwargs) -# build_encexp(voc, path, self.tailored, **build_kw) -# os.unlink(path) -# if load: -# self.__iadd__(self.__class__(EncExp_filename=self.tailored)) -# self._tailored_built = True - -# def __add__(self, other): -# """Add weights""" -# ins = clone(self) -# return ins.__iadd__(other) - -# def __iadd__(self, other): -# """Add weights""" - -# assert np.all(self.bow.names == other.bow.names) -# _ = self.precision == np.float32 -# weights_ = self.weights if _ else self.weights.astype(np.float32) -# _ = other.precision == np.float32 -# w_other = other.weights if _ else other.weights.astype(np.float32) -# w_norm = np.linalg.norm(weights_, axis=1) -# other_norm = np.linalg.norm(w_other, axis=1) -# w = dict(zip(self.names, weights_ / np.c_[w_norm])) -# w_other = dict(zip(other.names, w_other / np.c_[other_norm])) -# w_norm = dict(zip(self.names, w_norm)) -# other_norm = dict(zip(other.names, other_norm)) -# names = sorted(set(self.names).union(set(other.names))) -# weights = [] -# norms = [] -# for name in names: -# if name in w and name in w_other: -# _ = (w[name] + w_other[name]) / 2 -# weights.append(_) -# norms.append(w_norm[name]) -# elif name in w: -# weights.append(w[name]) -# norms.append(w_norm[name]) -# else: -# weights.append(w_other[name]) -# norms.append(other_norm[name]) -# weights = np.asarray(weights, order='F') -# weights = weights / np.c_[np.linalg.norm(weights, axis=1)] -# self.weights = np.asarray(weights * np.c_[np.array(norms)], -# dtype=self.precision, order='F') -# self.names = np.array(names) -# return self - -# def __sklearn_clone__(self): -# klass = self.__class__ -# params = self.get_params() -# ins = klass(**params) -# ins.weights = self.weights -# ins.bow = self.bow -# ins.names = self.names -# ins.enc_training_size = self.enc_training_size -# if hasattr(self, '_tailored_built'): -# ins._tailored_built = self._tailored_built -# return ins - - -# @dataclass -# class EncExp(EncExpT): -# """EncExp (Encaje Explicable)""" - -# estimator_kwargs: dict=None -# kfold_class: StratifiedKFold=StratifiedKFold -# kfold_kwargs: dict=None - -# def get_params(self, deep=None): -# """Parameters""" -# params = super(EncExp, self).get_params() -# params.update(dict(estimator_kwargs=self.estimator_kwargs, -# kfold_class=self.kfold_class, -# kfold_kwargs=self.kfold_kwargs)) -# return params - -# def fit(self, D, y=None): -# """Estimate the parameters""" -# super(EncExp, self).fit(D, y=y) -# if y is None: -# y = [x['klass'] for x in D] -# if not hasattr(self, '_estimator') and len(D) > 2**17: -# self.estimator = SGDClassifier(class_weight='balanced') -# X = self.transform(D) -# self.estimator.fit(X, y) -# return self - -# @property -# def estimator(self): -# """Estimator (classifier/regressor)""" -# try: -# return self._estimator -# except AttributeError: -# from sklearn.svm import LinearSVC -# params = dict(class_weight='balanced', -# dual='auto') -# if self.estimator_kwargs is not None: -# params.update(self.estimator_kwargs) -# self.estimator_kwargs = params -# self.estimator = LinearSVC(**self.estimator_kwargs) -# return self._estimator - -# @estimator.setter -# def estimator(self, value): -# self._estimator = value - -# def predict(self, texts): -# """Predict""" -# X = self.transform(texts) -# return self.estimator.predict(X) - -# def decision_function(self, texts): -# """Decision function""" -# X = self.transform(texts) -# hy = self.estimator.decision_function(X) -# if hy.ndim == 1: -# return np.c_[hy] -# return hy - -# def train_predict_decision_function(self, D, y=None): -# """Train and predict the decision""" -# if y is None: -# y = np.array([x['klass'] for x in D]) -# if not isinstance(y, np.ndarray): -# y = np.array(y) -# nclass = np.unique(y).shape[0] -# X = self.transform(D) -# if nclass == 2: -# hy = np.empty(X.shape[0]) -# else: -# hy = np.empty((X.shape[0], nclass)) -# kwargs = dict(random_state=0, shuffle=True) -# if self.kfold_kwargs is not None: -# kwargs.update(self.kfold_kwargs) -# for tr, vs in self.kfold_class(**kwargs).split(X, y): -# m = clone(self).estimator.fit(X[tr], y[tr]) -# hy[vs] = m.decision_function(X[vs]) -# if hy.ndim == 1: -# return np.c_[hy] -# return hy - -# def __sklearn_clone__(self): -# ins = super(EncExp, self).__sklearn_clone__() -# if hasattr(self, '_estimator'): -# ins.estimator = clone(self.estimator) -# return ins