From 12d69332a9d397a7f9910acb61a2220995ede7e9 Mon Sep 17 00:00:00 2001 From: Derek Date: Tue, 5 Apr 2022 13:08:35 -0700 Subject: [PATCH 1/5] AAdd if not exist condition to copying prompt file commands --- Completions/run_davinci.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Completions/run_davinci.py b/Completions/run_davinci.py index fa526276e..b4a585e32 100644 --- a/Completions/run_davinci.py +++ b/Completions/run_davinci.py @@ -21,6 +21,7 @@ def run_davinci(path, out_dir): print("--------------------------") + # Add loop for sampling TEMPERATURE & N_SOLUTIONS... response = openai.Completion.create( engine=ENGINE, prompt=input_prompt, @@ -49,12 +50,17 @@ def run_davinci(path, out_dir): if(finish_reason == "stop"): if("text" in choice): solution_set.add(choice["text"]) - shutil.copyfile(path, f"{out_dir}/question.txt") + if not os.path.exists(f"{out_dir}/question.txt"): + shutil.copyfile(path, f"{out_dir}/question.txt") prompt_file_folder = os.path.dirname(path) try: - shutil.copyfile(f"{prompt_file_folder}/metadata.json", f"{out_dir}/metadata.json") - shutil.copyfile(f"{prompt_file_folder}/solutions.json", f"{out_dir}/solutions.json") - shutil.copyfile(f"{prompt_file_folder}/input_output.json", f"{out_dir}/input_output.json") + # REVISE TO COPYFILE ONLY IF FILES DNE (FOR ALL THREE) + if not os.path.exists(f"{out_dir}/metadata.json"): + shutil.copyfile(f"{prompt_file_folder}/metadata.json", f"{out_dir}/metadata.json") + if not os.path.exists(f"{out_dir}/solutions.json"): + shutil.copyfile(f"{prompt_file_folder}/solutions.json", f"{out_dir}/solutions.json") + if not os.path.exists(f"{out_dir}/input_output.json"): + shutil.copyfile(f"{prompt_file_folder}/input_output.json", f"{out_dir}/input_output.json") except Exception as e: print(path, e) @@ -67,6 +73,8 @@ def run_davinci(path, out_dir): + # REPLACE out_dir with folder = out_dir + '/' + TEMPERATURE + 'T_' + N_SOLUTIONS + 'k' + # need to check if it exists...if DNE, mkdir!!! with open(f'{out_dir}/codex_solutions.json', 'w') as outfile: json.dump(output["solutions"], outfile) From 62d4110f61367fd4be82a0bfa21f3d78950e1c8e Mon Sep 17 00:00:00 2001 From: Derek Date: Tue, 5 Apr 2022 22:39:52 -0700 Subject: [PATCH 2/5] Add loop sampling --- Completions/run_davinci.py | 125 ++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 63 deletions(-) diff --git a/Completions/run_davinci.py b/Completions/run_davinci.py index b4a585e32..cae11eb30 100644 --- a/Completions/run_davinci.py +++ b/Completions/run_davinci.py @@ -4,15 +4,14 @@ import openai import json import shutil +import numpy as np openai.api_key = os.getenv("OPENAI_API_KEY") -TEMPERATURE = 0.5 #T -N_SOLUTIONS = 2 #k ENGINE = "code-davinci-002" -MAX_TOKENS=4096 +MAX_TOKENS=4000 -def run_davinci(path, out_dir): +def run_davinci(path, out_dir, n_itr): with open(path) as f: prompt = f.read() @@ -21,70 +20,70 @@ def run_davinci(path, out_dir): print("--------------------------") - # Add loop for sampling TEMPERATURE & N_SOLUTIONS... - response = openai.Completion.create( - engine=ENGINE, - prompt=input_prompt, - temperature=TEMPERATURE, - max_tokens=MAX_TOKENS, - top_p=1, - frequency_penalty=0, - presence_penalty=0, - n=N_SOLUTIONS - ) - - print(response) - - output = {"prompt":input_prompt, "solutions":[]} - solution_set = set() - - for i in range(len(response["choices"])): - # for choice in response["choices"]: - choice = response["choices"][i] - print(i, choice) - - - - finish_reason = choice["finish_reason"] - print(f"REASON {finish_reason}") - if(finish_reason == "stop"): - if("text" in choice): - solution_set.add(choice["text"]) - if not os.path.exists(f"{out_dir}/question.txt"): - shutil.copyfile(path, f"{out_dir}/question.txt") - prompt_file_folder = os.path.dirname(path) - try: - # REVISE TO COPYFILE ONLY IF FILES DNE (FOR ALL THREE) - if not os.path.exists(f"{out_dir}/metadata.json"): - shutil.copyfile(f"{prompt_file_folder}/metadata.json", f"{out_dir}/metadata.json") - if not os.path.exists(f"{out_dir}/solutions.json"): - shutil.copyfile(f"{prompt_file_folder}/solutions.json", f"{out_dir}/solutions.json") - if not os.path.exists(f"{out_dir}/input_output.json"): - shutil.copyfile(f"{prompt_file_folder}/input_output.json", f"{out_dir}/input_output.json") - except Exception as e: - print(path, e) - - # shutil.copyfile(path, f"{out_dir}/solutions.json") - # shutil.copyfile(path, f"{out_dir}/input_output.json") - # with open(f"{out_dir}/gen_code_out_{i}.py", "w") as fp: - # fp.write(choice["text"]) + for itr in n_itr: + TEMPERATURE = np.random.randint(0, 10)/10 + N_SOLUTIONS = np.random.randint(1, 100) + + response = openai.Completion.create( + engine=ENGINE, + prompt=input_prompt, + temperature=TEMPERATURE, + max_tokens=MAX_TOKENS, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + n=N_SOLUTIONS + ) + + print(response) + + output = {"prompt":input_prompt, "solutions":[]} + solution_set = set() + + for i in range(len(response["choices"])): + # for choice in response["choices"]: + choice = response["choices"][i] + print(i, choice) + + + finish_reason = choice["finish_reason"] + print(f"REASON {finish_reason}") + if(finish_reason == "stop"): + if("text" in choice): + solution_set.add(choice["text"]) + if not os.path.exists(f"{out_dir}/question.txt"): + shutil.copyfile(path, f"{out_dir}/question.txt") + prompt_file_folder = os.path.dirname(path) + try: + if not os.path.exists(f"{out_dir}/metadata.json"): + shutil.copyfile(f"{prompt_file_folder}/metadata.json", f"{out_dir}/metadata.json") + if not os.path.exists(f"{out_dir}/solutions.json"): + shutil.copyfile(f"{prompt_file_folder}/solutions.json", f"{out_dir}/solutions.json") + if not os.path.exists(f"{out_dir}/input_output.json"): + shutil.copyfile(f"{prompt_file_folder}/input_output.json", f"{out_dir}/input_output.json") + except Exception as e: + print(path, e) + + # shutil.copyfile(path, f"{out_dir}/solutions.json") + # shutil.copyfile(path, f"{out_dir}/input_output.json") + # with open(f"{out_dir}/gen_code_out_{i}.py", "w") as fp: + # fp.write(choice["text"]) + + output["solutions"].extend(solution_set) + + + pref = str(TEMPERATURE) + 'T_' + str(N_SOLUTIONS) + 'k_' + if not os.path.exists(f"{out_dir}/{pref}codex_solutions.json"): + with open(f'{out_dir}/{pref}codex_solutions.json', 'w') as outfile: + json.dump(output["solutions"], outfile) - output["solutions"].extend(solution_set) - - - - # REPLACE out_dir with folder = out_dir + '/' + TEMPERATURE + 'T_' + N_SOLUTIONS + 'k' - # need to check if it exists...if DNE, mkdir!!! - with open(f'{out_dir}/codex_solutions.json', 'w') as outfile: - json.dump(output["solutions"], outfile) - - if __name__ == "__main__": #Example: python3 ./test/0179/question.txt path = sys.argv[1] #test/sort-questions.txt_dir/4997/question.txt out_dir = sys.argv[2] #davinci_runs/test/sort-questions.txt_dir + n_itr = sys.argv[3] # number of times want to sample # split_parts = prompt_file_path.split('/') # num = split_parts[2] @@ -95,5 +94,5 @@ def run_davinci(path, out_dir): # os.makedirs(out_dir, exist_ok=True) sys.stdout = open(f'{out_dir}/out.log', 'w') - run_davinci(path, out_dir) - sys.stdout.close() \ No newline at end of file + run_davinci(path, out_dir, n_itr) + sys.stdout.close() From 60edb816ccbc6cabc280d08a0d3e84864dad2ea8 Mon Sep 17 00:00:00 2001 From: Derek Date: Sat, 9 Apr 2022 17:55:24 -0700 Subject: [PATCH 3/5] Update run function in run_edit_module.py --- Edit_Insert/mass_run_edit.sh | 2 +- Edit_Insert/run_edit_module.py | 39 ++++++++++++++++++++++------------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/Edit_Insert/mass_run_edit.sh b/Edit_Insert/mass_run_edit.sh index 4815e8ec2..d2c8ec94c 100755 --- a/Edit_Insert/mass_run_edit.sh +++ b/Edit_Insert/mass_run_edit.sh @@ -2,7 +2,7 @@ input_dir="davinci_runs/test/intro-questions.txt_dir" COUNTER=$(( 0 )) LIMIT=$(( 200 )) -for question in $input_dir/*/codex_solutions.json; do +for question in $input_dir/*/*codex_solutions.json; do echo $question python3 run_edit_module.py $question 2>&1 (( COUNTER++ )) diff --git a/Edit_Insert/run_edit_module.py b/Edit_Insert/run_edit_module.py index 3040473ac..511a5b787 100644 --- a/Edit_Insert/run_edit_module.py +++ b/Edit_Insert/run_edit_module.py @@ -6,6 +6,7 @@ from datetime import datetime import random import time +import numpy as np old_print = print @@ -134,26 +135,35 @@ def save_strings_to_py_file(solution_strings, folder_name="edit_sol_pys"): They are multiple outputs for a single input depending upon k The file has outputs for multiple prompts """ -def run(file_name,out_dir="."): +def run(file_name,out_dir=".",n_itr): with open(file_name,"r") as input_fp: data = json.load(input_fp) - output = {"solutions":[]} + # PUT REMAINDER OF FUNCTION IN LOOP... + for itr in n_itr: + TEMPERATURE = np.random.randint(0, 10)/10 + N_SOLUTIONS = np.random.randint(1, 100) - total_output_set = set() - for solution in data: - outputs = run_edit_multiple_op(solution, EDIT_OPERATIONS) - total_output_set.update(outputs) + output = {"solutions":[]} - output["solutions"].extend(total_output_set) + total_output_set = set() + for solution in data: + outputs = run_edit_multiple_op(solution, EDIT_OPERATIONS, TEMPERATURE, N_SOLUTIONS) + total_output_set.update(outputs) - # json_output = json.dumps(output) + output["solutions"].extend(total_output_set) + # json_output = json.dumps(output) - with open(f'{out_dir}/codex_edit_solutions.json', 'w') as outfile: - json.dump(output["solutions"], outfile) - - save_strings_to_py_file(output["solutions"], f"{out_dir}/edit_sol_pys") + + pref = 'edit_' + str(TEMPERATURE) + 'T_' + str(N_SOLUTIONS) + 'k_init_' + if not os.path.exists(f"{out_dir}/{pref}{file_name}"): + return + with open(f'{out_dir}/{pref}{file_name}', 'w') as outfile: + json.dump(output["solutions"], outfile) + + fold_name = pref + file_name[:-4] + "_pys" + save_strings_to_py_file(output["solutions"], f"{out_dir}/{fold_name}") @@ -161,9 +171,10 @@ def run(file_name,out_dir="."): if __name__ == "__main__": - #python3 run_edit_module.py example_output.json + #python3 run_edit_module.py example_output.json 100 input_file_name = sys.argv[1] out_dir = os.path.dirname(input_file_name) + n_itr = sys.argv[2] #The following code is to save the example prompt and outputs """ @@ -181,7 +192,7 @@ def run(file_name,out_dir="."): sys.stdout = open(f'{out_dir}/edit_out.log', 'w') - run(input_file_name,out_dir=out_dir) + run(input_file_name,out_dir=out_dir,n_itr) sys.stdout.close() From e1305bf6d83342045f1acdd570ce86582cc5a71b Mon Sep 17 00:00:00 2001 From: Derek Date: Sat, 9 Apr 2022 18:29:08 -0700 Subject: [PATCH 4/5] Made corresponding changes to run_edit_multiple_op and run_edit functions to account for automation of temperature and pass_k--perhaps need to automate selection of edit/insert operations --- Edit_Insert/run_edit_module.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/Edit_Insert/run_edit_module.py b/Edit_Insert/run_edit_module.py index 511a5b787..786853f11 100644 --- a/Edit_Insert/run_edit_module.py +++ b/Edit_Insert/run_edit_module.py @@ -29,11 +29,11 @@ def set_api_key_rand(): EDIT_ENGINE = "code-davinci-edit-001" -TEMPERATURE = 0.3 -N_SOLUTIONS = 2 +# TEMPERATURE = 0.3 +# N_SOLUTIONS = 2 # EDIT_OPERATIONS = ["fix spelling mistakes", "fix syntax error", "cleanup code"] -EDIT_OPERATIONS = ["fix spelling mistakes", "fix syntax errors"] +EDIT_OPERATIONS = ["fix spelling mistakes", "fix syntax errors"] # PROBABLY NEED TO AUTOMATE THIS PART AS WELL, BUT NEED TO DISCUSS PROCESS IN MORE DETAIL... """ @@ -44,7 +44,7 @@ def set_api_key_rand(): output_codes: list of strings containing the code after application of operation """ -def run_edit(input_code, operation): +def run_edit(input_code, operation, temp, k): # set_api_key_rand() @@ -52,8 +52,8 @@ def run_edit(input_code, operation): engine= EDIT_ENGINE, input=input_code, instruction=operation, - temperature=TEMPERATURE, - n=N_SOLUTIONS + temperature=temp, + n=k ) time.sleep(2) @@ -92,7 +92,7 @@ def run_edit(input_code, operation): Should we save intermediary states? """ -def run_edit_multiple_op(input_code, operations): +def run_edit_multiple_op(input_code, operations, temp, k): states = [input_code] num_operations = 0 @@ -104,7 +104,7 @@ def run_edit_multiple_op(input_code, operations): print(f"size on input set {len(current_input_set)}") for code in current_input_set: - gen_codes = run_edit(code, operation) + gen_codes = run_edit(code, operation, temp, k) print(operation, len(gen_codes), gen_codes) current_output_set.update(gen_codes) @@ -139,10 +139,11 @@ def run(file_name,out_dir=".",n_itr): with open(file_name,"r") as input_fp: data = json.load(input_fp) - # PUT REMAINDER OF FUNCTION IN LOOP... for itr in n_itr: + # can later change values of TEMPERATURE and N_SOLUTIONS TEMPERATURE = np.random.randint(0, 10)/10 N_SOLUTIONS = np.random.randint(1, 100) + # would sample different edit operations here (ultimately put in array format) as well, but need to discuss further on automating selection of edit operations, as mentioned in beginning of file output = {"solutions":[]} @@ -155,14 +156,13 @@ def run(file_name,out_dir=".",n_itr): # json_output = json.dumps(output) - + # File naming format: _codex_solutions.json pref = 'edit_' + str(TEMPERATURE) + 'T_' + str(N_SOLUTIONS) + 'k_init_' if not os.path.exists(f"{out_dir}/{pref}{file_name}"): - return with open(f'{out_dir}/{pref}{file_name}', 'w') as outfile: json.dump(output["solutions"], outfile) - fold_name = pref + file_name[:-4] + "_pys" + fold_name = pref + file_name[:-4] + "_pys" # removes .json extension save_strings_to_py_file(output["solutions"], f"{out_dir}/{fold_name}") @@ -195,9 +195,3 @@ def run(file_name,out_dir=".",n_itr): run(input_file_name,out_dir=out_dir,n_itr) sys.stdout.close() - - - - - - From fada5fc9f13c37721cd55c564657b2653711f3ec Mon Sep 17 00:00:00 2001 From: Derek Date: Sun, 10 Apr 2022 10:54:40 -0700 Subject: [PATCH 5/5] Change file-naming format of output files --- Completions/run_davinci.py | 2 +- Edit_Insert/run_edit_module.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Completions/run_davinci.py b/Completions/run_davinci.py index cae11eb30..4a06ea51d 100644 --- a/Completions/run_davinci.py +++ b/Completions/run_davinci.py @@ -72,7 +72,7 @@ def run_davinci(path, out_dir, n_itr): output["solutions"].extend(solution_set) - pref = str(TEMPERATURE) + 'T_' + str(N_SOLUTIONS) + 'k_' + pref = str(TEMPERATURE) + 'T1_' + str(N_SOLUTIONS) + 'k1_' if not os.path.exists(f"{out_dir}/{pref}codex_solutions.json"): with open(f'{out_dir}/{pref}codex_solutions.json', 'w') as outfile: json.dump(output["solutions"], outfile) diff --git a/Edit_Insert/run_edit_module.py b/Edit_Insert/run_edit_module.py index 786853f11..d7a7ff177 100644 --- a/Edit_Insert/run_edit_module.py +++ b/Edit_Insert/run_edit_module.py @@ -157,7 +157,7 @@ def run(file_name,out_dir=".",n_itr): # json_output = json.dumps(output) # File naming format: _codex_solutions.json - pref = 'edit_' + str(TEMPERATURE) + 'T_' + str(N_SOLUTIONS) + 'k_init_' + pref = str(TEMPERATURE) + 'T2_' + str(N_SOLUTIONS) + 'k2_' if not os.path.exists(f"{out_dir}/{pref}{file_name}"): with open(f'{out_dir}/{pref}{file_name}', 'w') as outfile: json.dump(output["solutions"], outfile)