From 1ed4cad50d48202a3f74b8c9d32a722c56fb3b30 Mon Sep 17 00:00:00 2001
From: Mario Graff <mgraffg@ieee.org>
Date: Wed, 23 Jul 2025 18:02:12 +0000
Subject: [PATCH] Bug: Precision in transform

---
 encexp/__init__.py             |   2 +-
 encexp/tests/test_text_repr.py | 312 +------------------------
 encexp/text_repr.py            | 403 +--------------------------------
 3 files changed, 9 insertions(+), 708 deletions(-)

diff --git a/encexp/__init__.py b/encexp/__init__.py
index 542790e..8df4d85 100644
--- a/encexp/__init__.py
+++ b/encexp/__init__.py
@@ -17,4 +17,4 @@
 if not '-m' in sys.argv:
     from encexp.text_repr import EncExpT, SeqTM, TextModel
 
-__version__ = "0.1.6"
+__version__ = "0.1.7"
diff --git a/encexp/tests/test_text_repr.py b/encexp/tests/test_text_repr.py
index 1b9061f..d54c5a6 100644
--- a/encexp/tests/test_text_repr.py
+++ b/encexp/tests/test_text_repr.py
@@ -235,309 +235,9 @@ def test_TextModel_diac():
     assert len(lst) > 3
 
 
-
-# def test_EncExp_filename():
-#     """Test EncExp"""
-#     if not isfile('encexp-es-mx.json.gz'):
-#         samples()
-#         data = compute_b4msa_vocabulary('es-mx-sample.json')
-#         voc = compute_seqtm_vocabulary(SeqTM, data,
-#                                        'es-mx-sample.json',
-#                                        voc_size_exponent=10)
-#         build_encexp(voc, 'es-mx-sample.json', 'encexp-es-mx.json.gz')
-#     enc = EncExp(EncExp_filename='encexp-es-mx.json.gz')
-#     assert enc.weights.dtype == np.float32
-#     assert len(enc.names) == 12
-#     os.unlink('encexp-es-mx.json.gz')
-    
-
-# def test_EncExp():
-#     """Test EncExp"""
-#     enc = EncExp(precision=np.float16)
-#     assert enc.weights.dtype == np.float16
-#     assert len(enc.names) == 8192
-
-
-# def test_EncExp_encode():
-#     """Test EncExp encode"""
-
-#     dense = EncExp(precision=np.float16)
-#     assert dense.encode('buenos días').shape[1] == 2
-
-
-# def test_EncExp_transform():
-#     """Test EncExp transform"""
-
-#     encexp = EncExp()
-#     X = encexp.transform(['buenos dias'])
-#     assert X.shape[0] == 1
-#     assert X.shape[1] == 8192
-#     assert X.dtype == np.float32
-
-
-# def test_EncExp_prefix_suffix():
-#     """Test EncExp prefix/suffix"""
-
-#     encexp = EncExp(lang='es',
-#                     precision=np.float16,
-#                     prefix_suffix=True)
-#     for k in encexp.bow.names:
-#         if k[:2] != 'q:':
-#             continue
-#         if len(k) >= 6:
-#             continue
-#         assert k[3] == '~' or k[-1] == '~'
-
-
-# def test_EncExp_fit():
-#     """Test EncExp fit"""
-#     from sklearn.svm import LinearSVC
-#     samples()
-#     mx = list(tweet_iterator('es-mx-sample.json'))
-#     samples(filename='es-ar-sample.json.zip')
-#     ar = list(tweet_iterator('es-ar-sample.json'))
-#     y = ['mx'] * len(mx)
-#     y += ['ar'] * len(ar)
-#     enc = EncExp(lang='es',
-#                  prefix_suffix=True,
-#                  precision=np.float16).fit(mx + ar, y)
-#     assert isinstance(enc.estimator, LinearSVC)
-#     hy = enc.predict(ar)
-#     assert hy.shape[0] == len(ar)
-#     df = enc.decision_function(ar)
-#     assert df.shape[0] == len(ar)
-#     assert df.dtype == np.float64
-
-
-# def test_EncExp_fit_sgd():
-#     """Test EncExp fit"""
-#     from sklearn.linear_model import SGDClassifier
-#     from itertools import repeat
-#     samples()
-#     mx = list(tweet_iterator('es-mx-sample.json'))
-#     samples(filename='es-ar-sample.json.zip')
-#     ar = list(tweet_iterator('es-ar-sample.json'))
-#     y = ['mx'] * len(mx)
-#     y += ['ar'] * len(ar)
-#     D = mx + ar
-#     # while len(D) < 2**17:
-#     for i in range(5):
-#         D.extend(D)
-#         y.extend(y)
-#     D.append(D[0])
-#     y.append(y[0])
-#     enc = EncExp(lang='es').fit(D, y)
-#     assert isinstance(enc.estimator, SGDClassifier)
-#     hy = enc.predict(ar)
-#     assert hy.shape[0] == len(ar)
-#     df = enc.decision_function(ar)
-#     assert df.shape[0] == len(ar)
-#     assert df.dtype == np.float64    
-
-
-# def test_EncExp_train_predict_decision_function():
-#     """Test EncExp train_predict_decision_function"""
-#     samples()
-#     mx = list(tweet_iterator('es-mx-sample.json'))
-#     samples(filename='es-ar-sample.json.zip')
-#     ar = list(tweet_iterator('es-ar-sample.json'))
-#     samples(filename='es-es-sample.json.zip')
-#     es = list(tweet_iterator('es-es-sample.json'))
-#     y = ['mx'] * len(mx)
-#     y += ['ar'] * len(ar)
-#     enc = EncExp(lang='es',
-#                  prefix_suffix=True,
-#                  precision=np.float16)
-#     hy = enc.train_predict_decision_function(mx + ar, y)
-#     assert hy.ndim == 2 and hy.shape[0] == len(y) and hy.shape[1] == 1
-#     y += ['es'] * len(es)
-#     hy = enc.train_predict_decision_function(mx + ar + es, y)
-#     assert hy.shape[1] == 3 and hy.shape[0] == len(y)
-
-
-# def test_EncExp_clone():
-#     """Test EncExp clone"""
-
-#     enc = EncExp(lang='es', prefix_suffix=True,
-#                  precision=np.float16)
-#     enc2 = clone(enc)
-#     assert isinstance(enc2, EncExp)
-#     assert np.all(enc2.weights == enc.weights)
-
-
-# def test_EncExp_merge_IDF():
-#     """Test EncExp without keyword's weight"""
-
-#     enc = EncExp(lang='es', prefix_suffix=True,
-#                  precision=np.float16, merge_IDF=False,
-#                  force_token=False)
-#     enc.fill(inplace=True)
-    
-#     for k, v in enc.bow.token2id.items():
-#         assert enc.weights[v, v] == 0
-#     enc2 = EncExp(lang='es', prefix_suffix=True,
-#                   precision=np.float16, merge_IDF=True,
-#                   force_token=False)
-#     enc2.fill(inplace=True)
-#     _ = (enc.weights * enc.bow.weights).astype(enc.precision)
-#     assert_almost_equal(_, enc2.weights, decimal=5)
-
-
-# def test_EncExp_fill():
-#     """Test EncExp fill weights"""
-#     from encexp.download import download_seqtm
-
-#     voc = download_seqtm(lang='es')
-#     samples()
-#     if not isfile('encexp-es-mx.json.gz'):
-#         build_encexp(voc, 'es-mx-sample.json', 'encexp-es-mx.json.gz',
-#                      min_pos=64)
-#     enc = EncExp(EncExp_filename='encexp-es-mx.json.gz')
-#     iden = {v:k for k, v in enumerate(enc.bow.names)}
-#     comp = [x for x in enc.bow.names if x not in enc.names]
-#     key = enc.names[0]
-#     enc.weights
-#     w = enc.fill()
-#     assert np.any(w[iden[key]] != 0)
-#     assert_almost_equal(w[iden[comp[0]]], 0)
-#     os.unlink('encexp-es-mx.json.gz')
-#     assert np.all(enc.names == enc.bow.names)
-
-
-# def test_EncExp_iadd():
-#     """Test EncExp iadd"""
-
-#     from encexp.download import download_seqtm
-
-#     voc = download_seqtm(lang='es')
-#     samples()
-#     if not isfile('encexp-es-mx.json.gz'):
-#         build_encexp(voc, 'es-mx-sample.json', 'encexp-es-mx.json.gz',
-#                      min_pos=64)
-#     enc = EncExp(EncExp_filename='encexp-es-mx.json.gz')
-#     w = enc.weights
-#     enc += enc
-#     assert_almost_equal(w, enc.weights, decimal=4)
-#     os.unlink('encexp-es-mx.json.gz')
-#     enc2 = EncExp(lang='es', voc_source='noGeo')
-#     enc2 += enc
-#     enc2 = EncExp(lang='es', voc_source='noGeo')
-#     r = enc2 + enc2
-#     r.weights[:, :] = 0
-#     assert enc2.weights[0, 0] != 0
-
-
-# def test_EncExp_force_tokens():
-#     """Test force tokens"""
-
-#     enc = EncExp(lang='es', prefix_suffix=True,
-#                  precision=np.float16,
-#                  force_token=False)
-#     w = enc.weights
-#     _max = w.max(axis=1)
-#     rows = np.arange(len(enc.names))
-#     cols = np.array([enc.bow.token2id[x] for x in enc.names])
-#     assert_almost_equal(w[rows, cols], 0)
-#     enc = EncExp(lang='es', prefix_suffix=True,
-#                  precision=np.float16,
-#                  force_token=True)
-#     w[rows, cols] = _max
-#     assert_almost_equal(enc.weights, w)
-#     enc = EncExp(lang='es', prefix_suffix=True,
-#                  precision=np.float16, merge_IDF=False,
-#                  force_token=False)
-#     assert enc.weights[0, 0] == 0
-#     enc.force_tokens_weights(IDF=True)
-#     enc2 = EncExp(lang='es', prefix_suffix=True,
-#                   precision=np.float16, merge_IDF=False,
-#                   force_token=True)
-#     assert enc.weights[0, 0] != enc2.weights[0, 0]
-#     assert_almost_equal(enc.weights[0, 1:], enc2.weights[0, 1:])
-
-
-# def test_EncExp_enc_training_size():
-#     """Test training size of the embeddings"""
-
-#     enc = EncExp(lang='es')
-#     assert isinstance(enc.enc_training_size, dict)
-#     for k in enc.enc_training_size:
-#         assert k in enc.names
-
-
-# def test_EncExp_distance():
-#     """Test distance to hyperplane"""
-
-#     txt = 'buenos días'
-#     enc = EncExp(lang='es', transform_distance=True)
-#     assert enc.weights_norm.shape[0] == enc.weights.shape[0]
-#     X = enc.transform([txt])
-#     X2 = EncExp(lang='es',
-#                 transform_distance=False).transform([txt])
-#     assert np.fabs(X - X2).sum() != 0
-
-
-# def test_EncExp_unit_vector():
-#     """Test distance to hyperplane"""
-
-#     txt = 'buenos días'
-#     enc = EncExp(lang='es', unit_vector=False)
-#     X = enc.transform([txt])
-#     assert np.linalg.norm(X) != 1
-#     enc = EncExp(lang='es')
-#     X = enc.transform([txt])
-#     assert_almost_equal(np.linalg.norm(X), 1)
-
-
-# def test_EncExp_build_tailored():
-#     """Test the development of tailored models"""
-
-#     samples()
-#     mx = list(tweet_iterator('es-mx-sample.json'))
-#     samples(filename='es-ar-sample.json.zip')
-#     ar = list(tweet_iterator('es-ar-sample.json'))
-#     y = ['mx'] * len(mx)
-#     y += ['ar'] * len(ar)
-
-#     enc = EncExp(lang='es',
-#                  tailored=True)
-#     w = enc.weights
-#     enc.build_tailored(mx + ar, load=True)    
-#     assert isfile(enc.tailored)
-#     assert hasattr(enc, '_tailored_built')
-#     enc = EncExp(lang='es',
-#                  tailored=enc.tailored).fit(mx + ar, y)
-#     assert np.fabs(w - enc.weights).sum() != 0
-#     enc2 = clone(enc)
-#     assert hasattr(enc2, '_tailored_built')
-#     assert hasattr(enc2, '_estimator')
-#     # os.unlink(enc.tailored)
-
-
-# def test_pipeline_encexp():
-#     """Test Pipeline in EncExpT"""
-#     from sklearn.pipeline import Pipeline
-#     from sklearn.svm import LinearSVC
-#     from sklearn.model_selection import GridSearchCV
-#     from sklearn.model_selection import StratifiedShuffleSplit
-
-#     samples()
-#     mx = list(tweet_iterator('es-mx-sample.json'))
-#     samples(filename='es-ar-sample.json.zip')
-#     ar = list(tweet_iterator('es-ar-sample.json'))
-#     y = ['mx'] * len(mx)
-#     y += ['ar'] * len(ar)
-
-#     pipe = Pipeline([('encexp', EncExpT(lang='es')),
-#                      ('cl', LinearSVC(class_weight='balanced'))])
-#     params = {'cl__C': [0.01, 0.1, 1, 10],
-#               'encexp__voc_source': ['mix', 'noGeo']}
-#     sss = StratifiedShuffleSplit(random_state=0,
-#                                 n_splits=1,
-#                                 test_size=0.3)
-
-#     grid = GridSearchCV(pipe,
-#                         param_grid=params,
-#                         cv=sss,
-#                         n_jobs=1,
-#                         scoring='f1_macro').fit(mx + ar, y)
-#     assert grid.best_score_ > 0.7
+def test_EncExpT_transform_dtype():
+    """Test EncExpT transform type"""
+    enc = EncExpT(lang='es',
+                  token_max_filter=2**13)
+    X = enc.transform(['buenos dias'])
+    assert X.dtype == enc.precision
\ No newline at end of file
diff --git a/encexp/text_repr.py b/encexp/text_repr.py
index ce2568f..735e926 100644
--- a/encexp/text_repr.py
+++ b/encexp/text_repr.py
@@ -545,12 +545,9 @@ def encode(self, text):
         W = self.weights
         tfidf = self.seqTM.weights
         if len(seq) == 0:
-            return np.ones((1, W.shape[1]), dtype=W.dtype)
+            return np.ones((1, W.shape[1]), dtype=self.precision)
         index, tf_ = np.unique(seq, return_counts=True)
-        # cnt = Counter(seq)
-        # seq = np.array(list(cnt.keys()))
-        # tf = np.array([cnt[k] for k in seq])
-        tf = tf_ / tf_.sum()
+        tf = np.divide(tf_, tf_.sum(), dtype=self.precision)
         _ = tfidf[index] * tf
         if self.merge_encode:
             return W[index] * np.c_[_ / norm(_)]
@@ -677,399 +674,3 @@ def set_weights(data):
             except PermissionError:
                 pass
         return self
-
-
-# @dataclass
-# class EncExpT:
-#     """EncExpT (Encaje Explicable)
-    
-#     Represent a text in the embedding using the `transform`method.
-#     """
-#     lang: str='es'
-#     voc_size_exponent: int=13
-#     EncExp_filename: str=None
-#     precision: np.dtype=np.float32
-#     voc_source: str='mix'
-#     enc_source: str=None
-#     prefix_suffix: bool=True
-#     merge_IDF: bool=True
-#     force_token: bool=True
-#     intercept: bool=False
-#     transform_distance: bool=False
-#     unit_vector: bool=True
-#     tailored: Union[bool, str]=False
-#     progress_bar: bool=False
-
-#     def get_params(self, deep=None):
-#         """Parameters"""
-#         return dict(lang=self.lang,
-#                     voc_size_exponent=self.voc_size_exponent,
-#                     EncExp_filename=self.EncExp_filename,
-#                     precision=self.precision,
-#                     voc_source=self.voc_source,
-#                     enc_source=self.enc_source,
-#                     prefix_suffix=self.prefix_suffix,
-#                     merge_IDF=self.merge_IDF,
-#                     force_token=self.force_token,
-#                     intercept=self.intercept,
-#                     transform_distance=self.transform_distance,
-#                     unit_vector=self.unit_vector,
-#                     tailored=self.tailored,
-#                     progress_bar=self.progress_bar)
-
-#     def set_params(self, **kwargs):
-#         """Set the parameters"""
-#         for key, value in kwargs.items():
-#             setattr(self, key, value)
-
-#     def fit(self, D, y=None):
-#         """Estimate the parameters"""
-#         if self.tailored is not False:
-#             self.build_tailored(D, load=True)
-#         return self
-
-#     def force_tokens_weights(self, IDF: bool=False):
-#         """Set the maximum weight"""
-#         # rows = np.arange(len(self.names))
-#         rows = np.array([i for i, k in enumerate(self.names)
-#                          if k in self.bow.token2id])
-
-#         cols = np.array([self.bow.token2id[x] for x in self.names
-#                          if x in self.bow.token2id])
-#         if cols.shape[0] == 0:
-#             return
-#         if IDF:
-#             w = self.weights[rows][:, cols] * self.bow.weights[cols]
-#             _max = (w.max(axis=1) / self.bow.weights[cols]).astype(self.precision)
-#         else:
-#             _max = self.weights[rows].max(axis=1)
-#         self.weights[rows, cols] = _max
-
-#     @property
-#     def bias(self):
-#         """Bias / Intercept"""
-#         try:
-#             return self._bias
-#         except AttributeError:
-#             self.weights
-#         return self._bias
-
-#     @bias.setter
-#     def bias(self, value):
-#         self._bias = value
-
-#     @property
-#     def weights(self):
-#         """Weights"""
-#         try:
-#             return self._weights
-#         except AttributeError:
-#             if self.EncExp_filename is not None:
-#                 data = download_encexp(output=self.EncExp_filename)
-#             else:
-#                 if self.intercept:
-#                     assert not self.merge_IDF
-#                 data = download_encexp(lang=self.lang,
-#                                        voc_size_exponent=self.voc_size_exponent,
-#                                        voc_source=self.voc_source,
-#                                        enc_source=self.enc_source,
-#                                        prefix_suffix=self.prefix_suffix,
-#                                        intercept=self.intercept)
-#             self.bow = SeqTM(vocabulary=data['seqtm'])
-#             w = self.bow.weights
-#             weights = []
-#             precision = self.precision
-#             for vec in data['coefs']:
-#                 if not self.merge_IDF:
-#                     coef = vec['coef']
-#                 else:
-#                     coef = (vec['coef'] * w).astype(precision)
-#                 weights.append(coef)
-#             self.weights = np.vstack(weights)
-#             self.bias = np.array([vec['intercept'] for vec in data['coefs']],
-#                                  dtype=self.precision)
-#             self.names = np.array([vec['label'] for vec in data['coefs']])
-#             self.enc_training_size = {vec['label']: vec['N'] for vec in data['coefs']}
-#             if self.force_token:
-#                 self.force_tokens_weights(IDF=self.intercept)
-#         self.weights = np.asarray(self._weights, order='F')
-#         return self._weights
-
-#     @property
-#     def weights_norm(self):
-#         """Weights norm"""
-#         try:
-#             return self._weights_norm
-#         except AttributeError:
-#             _ = np.linalg.norm(self.weights, axis=1)
-#             self._weights_norm = _
-#         return self._weights_norm
-
-#     @property
-#     def enc_training_size(self):
-#         """Training size of each embedding"""
-#         try:
-#             return self._enc_training_size
-#         except AttributeError:
-#             self.weights
-#         return self._enc_training_size
-
-#     @enc_training_size.setter
-#     def enc_training_size(self, value):
-#         self._enc_training_size = value
-
-#     @weights.setter
-#     def weights(self, value):
-#         self._weights = value
-
-#     @property
-#     def names(self):
-#         """Vector space components"""
-#         try:
-#             return self._names
-#         except AttributeError:
-#             self.weights
-#         return self._names
-
-#     @names.setter
-#     def names(self, value):
-#         self._names = value
-
-#     @property
-#     def bow(self):
-#         """BoW"""
-#         try:
-#             return self._bow
-#         except AttributeError:
-#             self.weights
-#         return self._bow
-
-#     @bow.setter
-#     def bow(self, value):
-#         self._bow = value
-
-#     def encode(self, text):
-#         """Encode utterace into a matrix"""
-
-#         token2id = self.bow.token2id
-#         seq = []
-#         for token in self.bow.tokenize(text):
-#             try:
-#                 seq.append(token2id[token])
-#             except KeyError:
-#                 continue
-#         W = self.weights
-#         if len(seq) == 0:
-#             return np.ones((W.shape[0], 1), dtype=W.dtype)
-#         return W[:, seq]
-
-#     def transform(self, texts):
-#         """Represents the texts into a matrix"""
-#         if self.intercept:
-#             X = self.bow.transform(texts) @ self.weights.T + self.bias
-#         else:
-#             X = np.r_[[self.encode(data).sum(axis=1)
-#                       for data in progress_bar(texts, total=len(texts),
-#                                                desc='Transform',
-#                                                use_tqdm=self.progress_bar)]]
-#         if self.transform_distance:
-#             X = X / self.weights_norm
-#         if self.unit_vector:
-#             _norm = norm(X, axis=1)
-#             _norm[_norm == 0] = 1
-#             return X / np.c_[_norm]
-#         return X
-
-#     def fill(self, inplace: bool=True, names: list=None):
-#         """Fill weights with the missing dimensions"""
-#         weights = self.weights
-#         if names is None:
-#             names = self.bow.names
-#         w = np.zeros((len(names), weights.shape[1]),
-#                      dtype=self.precision)
-#         iden = {v: k for k, v in enumerate(names)}
-#         for key, value in zip(self.names, weights):
-#             w[iden[key]] = value
-#         if inplace:
-#             self.weights = w
-#             self.names = names
-#         return w
-
-#     def build_tailored(self, data, load=False, **kwargs):
-#         """Build a tailored model with data"""
-
-#         import os
-#         from os.path import isfile
-#         from tempfile import mkstemp
-#         from json import dumps
-#         from microtc.utils import tweet_iterator
-#         from encexp.download import download_seqtm
-#         from encexp.build_encexp import build_encexp
-#         if hasattr(self, '_tailored_built'):
-#             return None
-
-#         get_text = self.bow.get_text
-#         if isinstance(self.tailored, str) and isfile(self.tailored):
-#             if load:
-#                 _ = self.__class__(EncExp_filename=self.tailored)
-#                 self.__iadd__(_)
-#                 self._tailored_built = True
-#             return None
-#         iden, path = mkstemp()
-#         with open(iden, 'w', encoding='utf-8') as fpt:
-#             for d in data:
-#                 print(dumps(dict(text=get_text(d))), file=fpt)
-#         if isinstance(self.tailored, bool):
-#             _, self.tailored = mkstemp(suffix='.gz')
-#         if self.EncExp_filename is not None:
-#             voc = next(tweet_iterator(self.EncExp_filename))
-#         else:
-#             voc = download_seqtm(self.lang, self.voc_size_exponent,
-#                                  voc_source=self.voc_source)
-#         build_kw = dict(min_pos=16, tokens=self.names)
-#         build_kw.update(kwargs)
-#         build_encexp(voc, path, self.tailored, **build_kw)
-#         os.unlink(path)
-#         if load:
-#             self.__iadd__(self.__class__(EncExp_filename=self.tailored))
-#             self._tailored_built = True
-
-#     def __add__(self, other):
-#         """Add weights"""
-#         ins = clone(self)
-#         return ins.__iadd__(other)
-
-#     def __iadd__(self, other):
-#         """Add weights"""
-
-#         assert np.all(self.bow.names == other.bow.names)
-#         _ = self.precision == np.float32
-#         weights_ = self.weights if _ else self.weights.astype(np.float32)
-#         _ = other.precision == np.float32
-#         w_other = other.weights if _ else other.weights.astype(np.float32)
-#         w_norm = np.linalg.norm(weights_, axis=1)
-#         other_norm = np.linalg.norm(w_other, axis=1)
-#         w = dict(zip(self.names, weights_ / np.c_[w_norm]))
-#         w_other = dict(zip(other.names, w_other / np.c_[other_norm]))
-#         w_norm = dict(zip(self.names, w_norm))
-#         other_norm = dict(zip(other.names, other_norm))
-#         names = sorted(set(self.names).union(set(other.names)))
-#         weights = []
-#         norms = []
-#         for name in names:
-#             if name in w and name in w_other:
-#                 _ = (w[name] + w_other[name]) / 2
-#                 weights.append(_)
-#                 norms.append(w_norm[name])
-#             elif name in w:
-#                 weights.append(w[name])
-#                 norms.append(w_norm[name])
-#             else:
-#                 weights.append(w_other[name])
-#                 norms.append(other_norm[name])
-#         weights = np.asarray(weights, order='F')
-#         weights = weights / np.c_[np.linalg.norm(weights, axis=1)]
-#         self.weights = np.asarray(weights * np.c_[np.array(norms)],
-#                                   dtype=self.precision, order='F')
-#         self.names = np.array(names)
-#         return self
-
-#     def __sklearn_clone__(self):
-#         klass = self.__class__
-#         params = self.get_params()
-#         ins = klass(**params)
-#         ins.weights = self.weights
-#         ins.bow = self.bow
-#         ins.names = self.names
-#         ins.enc_training_size = self.enc_training_size
-#         if hasattr(self, '_tailored_built'):
-#             ins._tailored_built = self._tailored_built
-#         return ins
-
-
-# @dataclass
-# class EncExp(EncExpT):
-#     """EncExp (Encaje Explicable)"""
-
-#     estimator_kwargs: dict=None
-#     kfold_class: StratifiedKFold=StratifiedKFold
-#     kfold_kwargs: dict=None
-
-#     def get_params(self, deep=None):
-#         """Parameters"""
-#         params = super(EncExp, self).get_params()
-#         params.update(dict(estimator_kwargs=self.estimator_kwargs,
-#                            kfold_class=self.kfold_class,
-#                            kfold_kwargs=self.kfold_kwargs))
-#         return params
-
-#     def fit(self, D, y=None):
-#         """Estimate the parameters"""
-#         super(EncExp, self).fit(D, y=y)
-#         if y is None:
-#             y = [x['klass'] for x in D]
-#         if not hasattr(self, '_estimator') and len(D) > 2**17:
-#             self.estimator = SGDClassifier(class_weight='balanced')
-#         X = self.transform(D)
-#         self.estimator.fit(X, y)
-#         return self
-    
-#     @property
-#     def estimator(self):
-#         """Estimator (classifier/regressor)"""
-#         try:
-#             return self._estimator
-#         except AttributeError:
-#             from sklearn.svm import LinearSVC
-#             params = dict(class_weight='balanced',
-#                           dual='auto')
-#             if self.estimator_kwargs is not None:
-#                 params.update(self.estimator_kwargs)
-#             self.estimator_kwargs = params
-#             self.estimator = LinearSVC(**self.estimator_kwargs)
-#         return self._estimator
-
-#     @estimator.setter
-#     def estimator(self, value):
-#         self._estimator = value
-
-#     def predict(self, texts):
-#         """Predict"""
-#         X = self.transform(texts)
-#         return self.estimator.predict(X)
-
-#     def decision_function(self, texts):
-#         """Decision function"""
-#         X = self.transform(texts)
-#         hy = self.estimator.decision_function(X)
-#         if hy.ndim == 1:
-#             return np.c_[hy]
-#         return hy
-
-#     def train_predict_decision_function(self, D, y=None):
-#         """Train and predict the decision"""
-#         if y is None:
-#             y = np.array([x['klass'] for x in D])
-#         if not isinstance(y, np.ndarray):
-#             y = np.array(y)
-#         nclass = np.unique(y).shape[0]
-#         X = self.transform(D)
-#         if nclass == 2:
-#             hy = np.empty(X.shape[0])
-#         else:
-#             hy = np.empty((X.shape[0], nclass))
-#         kwargs = dict(random_state=0, shuffle=True)
-#         if self.kfold_kwargs is not None:
-#             kwargs.update(self.kfold_kwargs)
-#         for tr, vs in self.kfold_class(**kwargs).split(X, y):
-#             m = clone(self).estimator.fit(X[tr], y[tr])
-#             hy[vs] = m.decision_function(X[vs])
-#         if hy.ndim == 1:
-#             return np.c_[hy]
-#         return hy
-
-#     def __sklearn_clone__(self):
-#         ins = super(EncExp, self).__sklearn_clone__()
-#         if hasattr(self, '_estimator'):
-#             ins.estimator = clone(self.estimator)
-#         return ins