mlverse · edgararuiz · Jul 31, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/python/mall/llm.py b/python/mall/llm.py
@@ -3,15 +3,17 @@
 import polars as pl
 import hashlib
 import ollama
+import copy
 import json
 import os
 
 
 def llm_use(backend="", model="", _cache="_mall_cache", **kwargs):
     out = dict()
     if isinstance(backend, Chat):
+        chat_copy = copy.deepcopy(backend)
         out.update(dict(backend="chatlas"))
-        out.update(dict(chat=backend))
+        out.update(dict(chat=chat_copy))
         backend = ""
         model = ""
     if isinstance(backend, Client):
@@ -42,6 +44,8 @@ def llm_map(df, col, msg, pred_name, use, valid_resps="", convert=None):
         pl_type = pl.Int8
         data_type = int
 
+    use = llm_init_use(use, msg)
+
     df = df.with_columns(
         pl.col(col)
         .map_elements(
@@ -61,18 +65,28 @@ def llm_map(df, col, msg, pred_name, use, valid_resps="", convert=None):
 
 
 def llm_loop(x, msg, use, valid_resps="", convert=None):
-    if isinstance(x, list) == False:
+    if not isinstance(x, list):
         raise TypeError("`x` is not a list object")
     out = list()
+    use = llm_init_use(use, msg)
     for row in x:
         out.append(
             llm_call(x=row, msg=msg, use=use, valid_resps=valid_resps, convert=convert)
         )
     return out
 
 
-def llm_call(x, msg, use, valid_resps="", convert=None, data_type=None):
+def llm_init_use(use, msg):
+    backend = use.get("backend")
+    if backend == "chatlas":
+        chat = use.get("chat")
+        chat.set_turns(list())
+        chat.system_prompt = msg
+        use.update(chat=chat)
+    return use
+
 
+def llm_call(x, msg, use, valid_resps="", convert=None, data_type=None):
     backend = use.get("backend")
     model = use.get("model")
     call = dict(
@@ -84,15 +98,13 @@ def llm_call(x, msg, use, valid_resps="", convert=None, data_type=None):
     out = ""
     cache = ""
     if use.get("_cache") != "":
-
         hash_call = build_hash(call)
         cache = cache_check(hash_call, use)
     if cache == "":
         if backend == "chatlas":
             chat = use.get("chat")
-            ch = chat.chat(msg[0].get("content") + x, echo="none")
+            ch = chat.chat(x, echo="none")
             out = ch.get_content()
-            chat.set_turns(list())
         if backend == "ollama" or backend == "ollama-client":
             if backend == "ollama":
                 chat_fun = ollama.chat
@@ -109,7 +121,7 @@ def llm_call(x, msg, use, valid_resps="", convert=None, data_type=None):
             if model == "echo":
                 out = x
             if model == "content":
-                out = msg[0]["content"]
+                out = msg
                 return out
     else:
         out = cache
@@ -143,10 +155,7 @@ def valid_output(x):
 
 
 def build_msg(x, msg):
-    out = []
-    for msgs in msg:
-        out.append({"role": msgs["role"], "content": msgs["content"].format(x)})
-    return out
+    return {'role': 'user', 'content': msg + str(x)}
 
 
 def build_hash(x):

diff --git a/python/mall/llmvec.py b/python/mall/llmvec.py
@@ -19,10 +19,11 @@ class LLMVec:
     from mall import LLMVec
 
     chat = ChatOllama(model = "llama3.2")
-    
-    llm = LLMVec(chat)    
+
+    llm = LLMVec(chat)
     ```
     """
+
     def __init__(self, backend="", model="", _cache="_mall_cache", **kwargs):
         self._use = llm_use(backend=backend, model=model, _cache=_cache, **kwargs)
 
@@ -49,10 +50,10 @@ def sentiment(
         ```{python}
         llm.sentiment(['I am happy', 'I am sad'])
         ```
-        """    
+        """
         return llm_loop(
             x=x,
-            msg=sentiment(options, additional=additional),
+            msg=sentiment(options, additional=additional, use=self._use),
             use=self._use,
             valid_resps=options,
         )
@@ -77,10 +78,10 @@ def summarize(self, x, max_words=10, additional="") -> list:
         ```{python}
         llm.summarize(['This has been the best TV Ive ever used. Great screen, and sound.'], max_words = 5)
         ```
-        """        
+        """
         return llm_loop(
             x=x,
-            msg=summarize(max_words, additional=additional),
+            msg=summarize(max_words, additional=additional, use=self._use),
             use=self._use,
         )
 
@@ -106,10 +107,10 @@ def translate(self, x, language="", additional="") -> list:
         llm.translate(['This has been the best TV Ive ever used. Great screen, and sound.'], language = 'spanish')
         ```
 
-        """        
+        """
         return llm_loop(
             x=x,
-            msg=translate(language, additional=additional),
+            msg=translate(language, additional=additional, use=self._use),
             use=self._use,
         )
 
@@ -135,10 +136,10 @@ def classify(self, x, labels="", additional="") -> list:
         ```{python}
         llm.classify(['this is important!', 'there is no rush'], ['urgent', 'not urgent'])
         ```
-        """        
+        """
         return llm_loop(
             x=x,
-            msg=classify(labels, additional=additional),
+            msg=classify(labels, additional=additional, use=self._use),
             use=self._use,
             valid_resps=labels,
         )
@@ -164,8 +165,12 @@ def extract(self, x, labels="", additional="") -> list:
         ```{python}
         llm.extract(['bob smith, 123 3rd street'], labels=['name', 'address'])
         ```
-        """        
-        return llm_loop(x=x, msg=extract(labels, additional=additional), use=self._use)
+        """
+        return llm_loop(
+            x=x,
+            msg=extract(labels, additional=additional, use=self._use),
+            use=self._use,
+        )
 
     def custom(self, x, prompt="", valid_resps="") -> list:
         """Provide the full prompt that the LLM will process.
@@ -178,7 +183,7 @@ def custom(self, x, prompt="", valid_resps="") -> list:
         prompt : str
             The prompt to send to the LLM along with the `col`
 
-        """        
+        """
         return llm_loop(x=x, msg=custom(prompt), use=self._use, valid_resps=valid_resps)
 
     def verify(self, x, what="", yes_no=[1, 0], additional="") -> list:
@@ -201,10 +206,10 @@ def verify(self, x, what="", yes_no=[1, 0], additional="") -> list:
         additional : str
             Inserts this text into the prompt sent to the LLM
 
-        """        
+        """
         return llm_loop(
             x=x,
-            msg=verify(what, additional=additional),
+            msg=verify(what, additional=additional, use=self._use),
             use=self._use,
             valid_resps=yes_no,
             convert=dict(yes=yes_no[0], no=yes_no[1]),

diff --git a/python/mall/polars.py b/python/mall/polars.py
@@ -150,7 +150,7 @@ def sentiment(
         df = llm_map(
             df=self._df,
             col=col,
-            msg=sentiment(options, additional=additional),
+            msg=sentiment(options, additional=additional, use=self._use),
             pred_name=pred_name,
             use=self._use,
             valid_resps=options,
@@ -197,7 +197,7 @@ def summarize(
         df = llm_map(
             df=self._df,
             col=col,
-            msg=summarize(max_words, additional=additional),
+            msg=summarize(max_words, additional=additional, use=self._use),
             pred_name=pred_name,
             use=self._use,
         )
@@ -243,7 +243,7 @@ def translate(
         df = llm_map(
             df=self._df,
             col=col,
-            msg=translate(language, additional=additional),
+            msg=translate(language, additional=additional, use=self._use),
             pred_name=pred_name,
             use=self._use,
         )
@@ -295,7 +295,7 @@ def classify(
         df = llm_map(
             df=self._df,
             col=col,
-            msg=classify(labels, additional=additional),
+            msg=classify(labels, additional=additional, use=self._use),
             pred_name=pred_name,
             use=self._use,
             valid_resps=labels,
@@ -379,7 +379,7 @@ def extract(
         df = llm_map(
             df=self._df,
             col=col,
-            msg=extract(lab_vals, additional=additional),
+            msg=extract(lab_vals, additional=additional, use=self._use),
             pred_name=pred_name,
             use=self._use,
         )
@@ -484,7 +484,7 @@ def verify(
         df = llm_map(
             df=self._df,
             col=col,
-            msg=verify(what, additional=additional),
+            msg=verify(what, additional=additional, use=self._use),
             pred_name=pred_name,
             use=self._use,
             valid_resps=yes_no,