@@ -402,16 +402,21 @@ def generate_bert(chat_data, output_path, message_col, batch_size=64):
402402 print (f"Generating RoBERTa sentiments..." )
403403
404404 messages = chat_data [message_col ].tolist ()
405- batch_sentiments_df = pd .DataFrame ()
405+ # batch_sentiments_df = pd.DataFrame()
406+ Path (output_path ).parent .mkdir (parents = True , exist_ok = True )
406407
408+ first = True
407409 for i in tqdm (range (0 , len (messages ), batch_size )):
408410 batch = messages [i :i + batch_size ]
409411 batch_df = get_sentiment (batch )
410- batch_sentiments_df = pd .concat ([batch_sentiments_df , batch_df ], ignore_index = True )
412+ batch_df .to_csv (output_path , mode = 'a' , header = first , index = False )
413+ first = False
414+ # batch_sentiments_df = pd.concat([batch_sentiments_df, batch_df], ignore_index=True)
411415
416+ # batch_sentiments_df = pd.concat(batch_sentiments_lst, ignore_index=True)
412417 # Create directories along the path if they don't exist
413- Path ( output_path ). parent . mkdir ( parents = True , exist_ok = True )
414- batch_sentiments_df .to_csv (output_path , index = False )
418+
419+ # batch_sentiments_df.to_csv(output_path, index=False)
415420
416421def get_sentiment (texts ):
417422 """
@@ -432,7 +437,8 @@ def get_sentiment(texts):
432437 return pd .DataFrame (np .nan , index = texts_series .index , columns = ['positive_bert' , 'negative_bert' , 'neutral_bert' ])
433438
434439 encoded = tokenizer (non_null_non_empty_texts , padding = True , truncation = True , max_length = 512 , return_tensors = 'pt' )
435- output = model_bert (** encoded )
440+ with torch .no_grad ():
441+ output = model_bert (** encoded )
436442
437443 scores = output [0 ].detach ().numpy ()
438444 scores = softmax (scores , axis = 1 )
0 commit comments