Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 28 additions & 16 deletions data/gsm/accuracy.csv
Original file line number Diff line number Diff line change
@@ -1,16 +1,28 @@
flan_gsm.py,2.6615969581749046
mistral_gsm.py,8.188021228203183
mistral_instruct.py,8.41546626231994
mistral_math.py,26.004548900682334
flan_gsm.py,2.687296416938111
mistral_gsm.py,8.188021228203183
mistral_instruct.py,8.41546626231994
mistral_math.py,26.004548900682334
flan_gsm.py,2.631578947368421
mistral_gsm.py,8.188021228203183
mistral_instruct.py,8.41546626231994
mistral_math.py,26.004548900682334
flan_gsm.py,2.577710386656558
mistral_gsm.py,8.188021228203183
mistral_instruct.py,8.41546626231994
mistral_math.py,26.004548900682334
mistral_math.py,42.77777777777778
mistral.py,7.777777777777778
mistral_instruct.py,11.11111111111111
flan.py,3.3333333333333335
mistral_math.py,42.77777777777778
mistral.py,7.777777777777778
mistral_instruct.py,11.11111111111111
flan.py,3.3333333333333335
flan_gsm.py,2.6535253980288096
mistral_gsm.py,7.429871114480667
mistral_instruct.py,8.567096285064443
mistral_math.py,22.062168309325248
flan_gsm.py,1.6679302501895377
mistral_gsm.py,7.202426080363912
mistral_instruct.py,7.505686125852919
mistral_math.py,19.711902956785444
mistral_math.py,33.33333333333333
mistral.py,9.444444444444445
mistral_instruct.py,6.666666666666667
flan.py,1.6666666666666667
mistral_math.py,36.666666666666664
mistral.py,3.888888888888889
mistral_instruct.py,6.666666666666667
flan.py,2.7777777777777777
flan_gsm.py,1.1372251705837757
mistral_gsm.py,5.913570887035633
mistral_instruct.py,7.202426080363912
mistral_math.py,21.37983320697498
8,792 changes: 1,319 additions & 7,473 deletions data/gsm/flan/flan_gsm_response.csv

Large diffs are not rendered by default.

1,320 changes: 1,320 additions & 0 deletions data/gsm/flan/flan_gsm_response_30.csv

Large diffs are not rendered by default.

1,320 changes: 1,320 additions & 0 deletions data/gsm/flan/flan_gsm_response_50.csv

Large diffs are not rendered by default.

8,792 changes: 1,319 additions & 7,473 deletions data/gsm/mistral/mistral_gsm_response.csv

Large diffs are not rendered by default.

1,320 changes: 1,320 additions & 0 deletions data/gsm/mistral/mistral_gsm_response_30.csv

Large diffs are not rendered by default.

1,320 changes: 1,320 additions & 0 deletions data/gsm/mistral/mistral_gsm_response_50.csv

Large diffs are not rendered by default.

8,792 changes: 1,319 additions & 7,473 deletions data/gsm/mistral_instruct/mistral_instruct_gsm_response.csv

Large diffs are not rendered by default.

1,325 changes: 1,325 additions & 0 deletions data/gsm/mistral_instruct/mistral_instruct_gsm_response_30.csv

Large diffs are not rendered by default.

1,330 changes: 1,330 additions & 0 deletions data/gsm/mistral_instruct/mistral_instruct_gsm_response_50.csv

Large diffs are not rendered by default.

29,473 changes: 1,976 additions & 27,497 deletions data/gsm/mistral_math/mistral_math_gsm_response.csv

Large diffs are not rendered by default.

1,990 changes: 1,990 additions & 0 deletions data/gsm/mistral_math/mistral_math_gsm_response_30.csv

Large diffs are not rendered by default.

2,175 changes: 2,175 additions & 0 deletions data/gsm/mistral_math/mistral_math_gsm_response_50.csv

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions data/multiArith/flan/flan_multiArith_response.csv

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions data/multiArith/flan/flan_multiArith_response_30.csv

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions data/multiArith/flan/flan_multiArith_response_50.csv

Large diffs are not rendered by default.

184 changes: 180 additions & 4 deletions data/multiArith/mistral/mistral_multiArith_response.csv

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions data/multiArith/mistral/mistral_multiArith_response_30.csv

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions data/multiArith/mistral/mistral_multiArith_response_50.csv

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

241 changes: 241 additions & 0 deletions data/multiArith/mistral_math/mistral_math_multiArith_response.csv

Large diffs are not rendered by default.

219 changes: 219 additions & 0 deletions data/multiArith/mistral_math/mistral_math_multiArith_response_30.csv

Large diffs are not rendered by default.

239 changes: 239 additions & 0 deletions data/multiArith/mistral_math/mistral_math_multiArith_response_50.csv

Large diffs are not rendered by default.

6,141 changes: 6,141 additions & 0 deletions data/noisy_datasets/gsm8k_test_noisy_punct_10.csv

Large diffs are not rendered by default.

6,141 changes: 6,141 additions & 0 deletions data/noisy_datasets/gsm8k_test_noisy_punct_30.csv

Large diffs are not rendered by default.

6,141 changes: 6,141 additions & 0 deletions data/noisy_datasets/gsm8k_test_noisy_punct_50.csv

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions data/noisy_datasets/multiArith_test_noisy_punct_10.csv

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions data/noisy_datasets/multiArith_test_noisy_punct_30.csv

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions data/noisy_datasets/multiArith_test_noisy_punct_50.csv

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@ accelerate
transformers
pandas
bitsandbytes
jsonformer
jsonformer
protobuf
sentencepiece
setuptools
27 changes: 17 additions & 10 deletions script/gsm/flan_gsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
import random
import sys, os, json

from tqdm import tqdm
from jsonformer import Jsonformer

from config import access_token, DIR_PATH
from utils import get_questions_and_answer_from_multiArith_dataset
from utils import get_noisy_questions_and_answer_from_dataset

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
model = T5ForConditionalGeneration.from_pretrained(
Expand All @@ -25,13 +26,12 @@
"answer": {"type": "string"},
},
}

csv_file = f"{DIR_PATH}/data/multiArith/test_preprocessed.csv"
questions, ground_truths = get_questions_and_answer_from_multiArith_dataset(csv_file)
csv_file = f"{DIR_PATH}/data/noisy_datasets/gsm8k_test_noisy_punct_50.csv"
questions, ground_truths = get_noisy_questions_and_answer_from_dataset(csv_file)


output_file = (
f"{DIR_PATH}/data/multiArith/flan/flan_gsm_response.csv"
f"{DIR_PATH}/data/gsm/flan/flan_gsm_response_50.csv"
)
counter = 0
with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
Expand All @@ -48,7 +48,7 @@
json_format = {
"answer": {"<contains the correct numerical answer>"},
}
for question, ground_truth in zip(questions, ground_truths):
for question, ground_truth in tqdm(zip(questions, ground_truths), total=len(questions)):

# prompt = f"""
# [INST]
Expand Down Expand Up @@ -87,15 +87,15 @@
max_number_tokens=1000,
max_array_length=1000,
max_string_token_length=1000,
temperature = 0
temperature=0
)

generated_data = jsonformer()
import pprint

pprint.pprint(prompt)
print("##RESPONSE##")
pprint.pprint(generated_data)
#pprint.pprint(prompt)
#print("##RESPONSE##")
#pprint.pprint(generated_data)

writer.writerow(
{
Expand All @@ -111,3 +111,10 @@


print(f"Questions and answers saved to {output_file}")






# github.com
18 changes: 9 additions & 9 deletions script/gsm/mistral_gsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
import random
import sys, os, json
import math
from tqdm import tqdm

from jsonformer import Jsonformer

from config import access_token, DIR_PATH
from utils import get_questions_and_answer_from_multiArith_dataset
from utils import get_noisy_questions_and_answer_from_dataset

access_token = access_token
model_name = "mistralai/Mistral-7B-v0.1"
Expand Down Expand Up @@ -46,12 +47,11 @@
},
}

csv_file = f"{DIR_PATH}/data/multiArith/test_preprocessed.csv"
questions, ground_truths = get_questions_and_answer_from_multiArith_dataset(csv_file)

csv_file = f"{DIR_PATH}/data/noisy_datasets/gsm8k_test_noisy_punct_50.csv"
questions, ground_truths = get_noisy_questions_and_answer_from_dataset(csv_file)

output_file = (
f"{DIR_PATH}/data/multiArith/mistral/mistral_multiArith_response.csv"
f"{DIR_PATH}/data/gsm/mistral/mistral_gsm_response_50.csv"
)
counter = 0
with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
Expand All @@ -68,7 +68,7 @@
json_format = {
"answer": {"<contains the correct numerical answer>"},
}
for question, ground_truth in zip(questions, ground_truths):
for question, ground_truth in tqdm(zip(questions, ground_truths), total=len(questions)):

prompt = f"""
[INST]
Expand Down Expand Up @@ -105,9 +105,9 @@
generated_data = jsonformer()
import pprint

pprint.pprint(prompt)
print("##RESPONSE##")
pprint.pprint(generated_data)
#pprint.pprint(prompt)
#print("##RESPONSE##")
#pprint.pprint(generated_data)

writer.writerow(
{
Expand Down
19 changes: 11 additions & 8 deletions script/gsm/mistral_instruct_gsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
import random
import sys, os, json

from tqdm import tqdm
from jsonformer import Jsonformer

from config import access_token, DIR_PATH
from utils import get_questions_and_answer_from_multiArith_dataset
from utils import get_noisy_questions_and_answer_from_dataset


access_token = access_token
Expand Down Expand Up @@ -46,11 +47,13 @@
},
}

csv_file = f"{DIR_PATH}/data/multiArith/test_preprocessed.csv"
questions, ground_truths = get_questions_and_answer_from_multiArith_dataset(csv_file)
csv_file = f"{DIR_PATH}/data/noisy_datasets/gsm8k_test_noisy_punct_50.csv"
questions, ground_truths = get_noisy_questions_and_answer_from_dataset(csv_file)


output_file = f"{DIR_PATH}/data/multiArith/mistral_instruct/mistral_instruct_gsm_response.csv"


output_file = f"{DIR_PATH}/data/gsm/mistral_instruct/mistral_instruct_gsm_response_50.csv"

counter = 0
with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
Expand All @@ -67,7 +70,7 @@
json_format = {
"answer": {},
}
for question, ground_truth in zip(questions, ground_truths):
for question, ground_truth in tqdm(zip(questions, ground_truths), total=len(questions)):

# prompt = f"""
# [INST]
Expand Down Expand Up @@ -112,9 +115,9 @@
generated_data = jsonformer()
import pprint

pprint.pprint(prompt)
print("##RESPONSE##")
pprint.pprint(generated_data)
#pprint.pprint(prompt)
#print("##RESPONSE##")
#pprint.pprint(generated_data)

writer.writerow(
{
Expand Down
19 changes: 11 additions & 8 deletions script/gsm/mistral_math_gsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
import random
import sys, os, json

from tqdm import tqdm
from jsonformer import Jsonformer

from config import access_token, DIR_PATH
from utils import get_questions_and_answer_from_multiArith_dataset
from utils import get_noisy_questions_and_answer_from_dataset


access_token = access_token
Expand Down Expand Up @@ -45,11 +46,13 @@
},
}

csv_file = f"{DIR_PATH}/data/multiArith/test_preprocessed.csv"
questions, ground_truths = get_questions_and_answer_from_multiArith_dataset(csv_file)
csv_file = f"{DIR_PATH}/data/noisy_datasets/gsm8k_test_noisy_punct_50.csv"
questions, ground_truths = get_noisy_questions_and_answer_from_dataset(csv_file)



#TODO: Change to relative path
output_file = f"{DIR_PATH}/data/multiArith/mistral_math/mistral_math_gsm_response.csv"
output_file = f"{DIR_PATH}/data/gsm/mistral_math/mistral_math_gsm_response_50.csv"
counter = 0
with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
fieldnames = [
Expand All @@ -65,7 +68,7 @@
json_format = {
"answer": {},
}
for question, ground_truth in zip(questions, ground_truths):
for question, ground_truth in tqdm(zip(questions, ground_truths), total=len(questions)):

prompt = f"""
[INST]
Expand All @@ -89,9 +92,9 @@
generated_data = jsonformer()
import pprint

pprint.pprint(prompt)
print("##RESPONSE##")
pprint.pprint(generated_data)
#pprint.pprint(prompt)
#print("##RESPONSE##")
#pprint.pprint(generated_data)

writer.writerow(
{
Expand Down
24 changes: 12 additions & 12 deletions script/gsm/script_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

from config import access_token, DIR_PATH

# output_files = [
# (f"{DIR_PATH}/data/gsm/flan/flan_gsm_response.csv","flan_gsm"),
# (f"{DIR_PATH}/data/gsm/mistral/mistral_gsm_response.csv","mistral_gsm"),
# (f"{DIR_PATH}/data/gsm/mistral_instruct/mistral_instruct_gsm_response.csv","mistral_instruct"),
# (f"{DIR_PATH}/data/gsm/mistral_math/mistral_math_gsm_response.csv","mistral_math")

# ]

output_files = [
(f"{DIR_PATH}/data/multiArith/mistral_math/mistral_math_multiArith_response.csv","mistral_math"),
(f"{DIR_PATH}/data/multiArith/mistral/mistral_multiArith_response.csv","mistral_gsm"),
(f"{DIR_PATH}/data/multiArith/mistral_instruct/mistral_instruct_multiArith_response.csv","mistral_instruct"),
(f"{DIR_PATH}/data/multiArith/flan/flan_multiArith_response.csv","flan_gsm"),
(f"{DIR_PATH}/data/gsm/flan/flan_gsm_response_50.csv","flan_gsm"),
(f"{DIR_PATH}/data/gsm/mistral/mistral_gsm_response_50.csv","mistral_gsm"),
(f"{DIR_PATH}/data/gsm/mistral_instruct/mistral_instruct_gsm_response_50.csv","mistral_instruct"),
(f"{DIR_PATH}/data/gsm/mistral_math/mistral_math_gsm_response_50.csv","mistral_math")

]

# output_files = [
# (f"{DIR_PATH}/data/multiArith/mistral_math/mistral_math_multiArith_response_50.csv","mistral_math"),
# (f"{DIR_PATH}/data/multiArith/mistral/mistral_multiArith_response_50.csv","mistral"),
# (f"{DIR_PATH}/data/multiArith/mistral_instruct/mistral_instruct_multiArith_response_50.csv","mistral_instruct"),
# (f"{DIR_PATH}/data/multiArith/flan/flan_multiArith_response_50.csv","flan"),
# ]

# Call the calculate_accuracy function for each output file
for output_file in output_files:
print(output_file)
Expand Down
9 changes: 5 additions & 4 deletions script/gsm/script_run.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import subprocess
from config import DIR_PATH

# List of scripts to run sequentially
scripts = [
"python mistral_gsm.py",
"python mistral_instruct_gsm.py",
"python mistral_math_gsm.py",
"python flan_gsm.py"
f"python {DIR_PATH}/script/gsm/mistral_gsm.py",
f"python {DIR_PATH}/script/gsm/mistral_instruct_gsm.py",
f"python {DIR_PATH}/script/gsm/mistral_math_gsm.py",
f"python {DIR_PATH}/script/gsm/flan_gsm.py"
]

# Run each script sequentially
Expand Down
15 changes: 15 additions & 0 deletions script/gsm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,18 @@ def safe_convert_llm_to_int(value):
)

print("Accuracy saved to accuracy.csv.")


def get_noisy_questions_and_answer_from_dataset(csv_file_path):
# Load the specific CSV file
data = pd.read_csv(csv_file_path)
questions = data["noisy_questions"].tolist()
groundTruths = data["numeric_answer"].tolist()
return questions,groundTruths

def get_noisy_questions_and_answer_from_multi_arith_dataset(csv_file_path):
# Load the specific CSV file
data = pd.read_csv(csv_file_path)
questions = data["noisy_questions"].tolist()
groundTruths = data["answer"].tolist()
return questions,groundTruths
6 changes: 4 additions & 2 deletions script/multiArith/multiArith_preprocess.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import pandas as pd
import json

from config import DIR_PATH

# Path to the JSON file
json_file_path = "/home/stud/abedinz1/localDisk/nlplab/data/multiArith/questions.json"
json_file_path = f"{DIR_PATH}/data/multiArith/questions.json"

# Read JSON data from the file
with open(json_file_path, 'r') as file:
Expand All @@ -19,7 +21,7 @@
df = pd.DataFrame(data)

# Save to CSV
output_path = "/home/stud/abedinz1/localDisk/nlplab/data/multiArith/test_preprocessed.csv"
output_path = f"{DIR_PATH}/data/multiArith/test_preprocessed.csv"
df.to_csv(output_path, index=False)

# Display the DataFrame
Expand Down
6 changes: 4 additions & 2 deletions script/noise_creation/create_noise.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from tqdm import tqdm
import pandas as pd

from config import DIR_PATH

tqdm.pandas()

random.seed(0)
Expand Down Expand Up @@ -36,10 +38,10 @@ def insert_punctuation_marks(sentence, punc_ratio):


def main(dataset):
data_df = pd.read_csv(dataset + '/train_preprocessed.csv')
data_df = pd.read_csv(DIR_PATH + '/data/multiArith/test_preprocessed.csv')
for punct in tqdm(PUNC_RATIO):
data_df[f'noisy_questions'] = data_df['question'].progress_apply(lambda x: insert_punctuation_marks(x,punct))
data_df.to_csv(f'../../data/noisy_datasets/gsm8k_noisy_punct_{int(punct*100)}.csv',index=False)
data_df.to_csv(f'{DIR_PATH}/data/noisy_datasets/multiArith_test_noisy_punct_{int(punct*100)}.csv',index=False)
if __name__ == "__main__":
#for dataset in DATASETS:
main(DATASET)