diff --git a/Completions/run_davinci.py b/Completions/run_davinci.py index fa526276e..4a06ea51d 100644 --- a/Completions/run_davinci.py +++ b/Completions/run_davinci.py @@ -4,15 +4,14 @@ import openai import json import shutil +import numpy as np openai.api_key = os.getenv("OPENAI_API_KEY") -TEMPERATURE = 0.5 #T -N_SOLUTIONS = 2 #k ENGINE = "code-davinci-002" -MAX_TOKENS=4096 +MAX_TOKENS=4000 -def run_davinci(path, out_dir): +def run_davinci(path, out_dir, n_itr): with open(path) as f: prompt = f.read() @@ -21,62 +20,70 @@ def run_davinci(path, out_dir): print("--------------------------") - response = openai.Completion.create( - engine=ENGINE, - prompt=input_prompt, - temperature=TEMPERATURE, - max_tokens=MAX_TOKENS, - top_p=1, - frequency_penalty=0, - presence_penalty=0, - n=N_SOLUTIONS - ) - - print(response) - - output = {"prompt":input_prompt, "solutions":[]} - solution_set = set() - - for i in range(len(response["choices"])): - # for choice in response["choices"]: - choice = response["choices"][i] - print(i, choice) - - - - finish_reason = choice["finish_reason"] - print(f"REASON {finish_reason}") - if(finish_reason == "stop"): - if("text" in choice): - solution_set.add(choice["text"]) - shutil.copyfile(path, f"{out_dir}/question.txt") - prompt_file_folder = os.path.dirname(path) - try: - shutil.copyfile(f"{prompt_file_folder}/metadata.json", f"{out_dir}/metadata.json") - shutil.copyfile(f"{prompt_file_folder}/solutions.json", f"{out_dir}/solutions.json") - shutil.copyfile(f"{prompt_file_folder}/input_output.json", f"{out_dir}/input_output.json") - except Exception as e: - print(path, e) - - # shutil.copyfile(path, f"{out_dir}/solutions.json") - # shutil.copyfile(path, f"{out_dir}/input_output.json") - # with open(f"{out_dir}/gen_code_out_{i}.py", "w") as fp: - # fp.write(choice["text"]) + for itr in n_itr: + TEMPERATURE = np.random.randint(0, 10)/10 + N_SOLUTIONS = np.random.randint(1, 100) + + response = openai.Completion.create( + engine=ENGINE, + prompt=input_prompt, + temperature=TEMPERATURE, + max_tokens=MAX_TOKENS, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + n=N_SOLUTIONS + ) + + print(response) + + output = {"prompt":input_prompt, "solutions":[]} + solution_set = set() + + for i in range(len(response["choices"])): + # for choice in response["choices"]: + choice = response["choices"][i] + print(i, choice) + + + finish_reason = choice["finish_reason"] + print(f"REASON {finish_reason}") + if(finish_reason == "stop"): + if("text" in choice): + solution_set.add(choice["text"]) + if not os.path.exists(f"{out_dir}/question.txt"): + shutil.copyfile(path, f"{out_dir}/question.txt") + prompt_file_folder = os.path.dirname(path) + try: + if not os.path.exists(f"{out_dir}/metadata.json"): + shutil.copyfile(f"{prompt_file_folder}/metadata.json", f"{out_dir}/metadata.json") + if not os.path.exists(f"{out_dir}/solutions.json"): + shutil.copyfile(f"{prompt_file_folder}/solutions.json", f"{out_dir}/solutions.json") + if not os.path.exists(f"{out_dir}/input_output.json"): + shutil.copyfile(f"{prompt_file_folder}/input_output.json", f"{out_dir}/input_output.json") + except Exception as e: + print(path, e) + + # shutil.copyfile(path, f"{out_dir}/solutions.json") + # shutil.copyfile(path, f"{out_dir}/input_output.json") + # with open(f"{out_dir}/gen_code_out_{i}.py", "w") as fp: + # fp.write(choice["text"]) + + output["solutions"].extend(solution_set) + + + pref = str(TEMPERATURE) + 'T1_' + str(N_SOLUTIONS) + 'k1_' + if not os.path.exists(f"{out_dir}/{pref}codex_solutions.json"): + with open(f'{out_dir}/{pref}codex_solutions.json', 'w') as outfile: + json.dump(output["solutions"], outfile) - output["solutions"].extend(solution_set) - - - - with open(f'{out_dir}/codex_solutions.json', 'w') as outfile: - json.dump(output["solutions"], outfile) - - if __name__ == "__main__": #Example: python3 ./test/0179/question.txt path = sys.argv[1] #test/sort-questions.txt_dir/4997/question.txt out_dir = sys.argv[2] #davinci_runs/test/sort-questions.txt_dir + n_itr = sys.argv[3] # number of times want to sample # split_parts = prompt_file_path.split('/') # num = split_parts[2] @@ -87,5 +94,5 @@ def run_davinci(path, out_dir): # os.makedirs(out_dir, exist_ok=True) sys.stdout = open(f'{out_dir}/out.log', 'w') - run_davinci(path, out_dir) - sys.stdout.close() \ No newline at end of file + run_davinci(path, out_dir, n_itr) + sys.stdout.close() diff --git a/Edit_Insert/mass_run_edit.sh b/Edit_Insert/mass_run_edit.sh index 4815e8ec2..d2c8ec94c 100755 --- a/Edit_Insert/mass_run_edit.sh +++ b/Edit_Insert/mass_run_edit.sh @@ -2,7 +2,7 @@ input_dir="davinci_runs/test/intro-questions.txt_dir" COUNTER=$(( 0 )) LIMIT=$(( 200 )) -for question in $input_dir/*/codex_solutions.json; do +for question in $input_dir/*/*codex_solutions.json; do echo $question python3 run_edit_module.py $question 2>&1 (( COUNTER++ )) diff --git a/Edit_Insert/run_edit_module.py b/Edit_Insert/run_edit_module.py index 3040473ac..d7a7ff177 100644 --- a/Edit_Insert/run_edit_module.py +++ b/Edit_Insert/run_edit_module.py @@ -6,6 +6,7 @@ from datetime import datetime import random import time +import numpy as np old_print = print @@ -28,11 +29,11 @@ def set_api_key_rand(): EDIT_ENGINE = "code-davinci-edit-001" -TEMPERATURE = 0.3 -N_SOLUTIONS = 2 +# TEMPERATURE = 0.3 +# N_SOLUTIONS = 2 # EDIT_OPERATIONS = ["fix spelling mistakes", "fix syntax error", "cleanup code"] -EDIT_OPERATIONS = ["fix spelling mistakes", "fix syntax errors"] +EDIT_OPERATIONS = ["fix spelling mistakes", "fix syntax errors"] # PROBABLY NEED TO AUTOMATE THIS PART AS WELL, BUT NEED TO DISCUSS PROCESS IN MORE DETAIL... """ @@ -43,7 +44,7 @@ def set_api_key_rand(): output_codes: list of strings containing the code after application of operation """ -def run_edit(input_code, operation): +def run_edit(input_code, operation, temp, k): # set_api_key_rand() @@ -51,8 +52,8 @@ def run_edit(input_code, operation): engine= EDIT_ENGINE, input=input_code, instruction=operation, - temperature=TEMPERATURE, - n=N_SOLUTIONS + temperature=temp, + n=k ) time.sleep(2) @@ -91,7 +92,7 @@ def run_edit(input_code, operation): Should we save intermediary states? """ -def run_edit_multiple_op(input_code, operations): +def run_edit_multiple_op(input_code, operations, temp, k): states = [input_code] num_operations = 0 @@ -103,7 +104,7 @@ def run_edit_multiple_op(input_code, operations): print(f"size on input set {len(current_input_set)}") for code in current_input_set: - gen_codes = run_edit(code, operation) + gen_codes = run_edit(code, operation, temp, k) print(operation, len(gen_codes), gen_codes) current_output_set.update(gen_codes) @@ -134,26 +135,35 @@ def save_strings_to_py_file(solution_strings, folder_name="edit_sol_pys"): They are multiple outputs for a single input depending upon k The file has outputs for multiple prompts """ -def run(file_name,out_dir="."): +def run(file_name,out_dir=".",n_itr): with open(file_name,"r") as input_fp: data = json.load(input_fp) - output = {"solutions":[]} + for itr in n_itr: + # can later change values of TEMPERATURE and N_SOLUTIONS + TEMPERATURE = np.random.randint(0, 10)/10 + N_SOLUTIONS = np.random.randint(1, 100) + # would sample different edit operations here (ultimately put in array format) as well, but need to discuss further on automating selection of edit operations, as mentioned in beginning of file - total_output_set = set() - for solution in data: - outputs = run_edit_multiple_op(solution, EDIT_OPERATIONS) - total_output_set.update(outputs) + output = {"solutions":[]} - output["solutions"].extend(total_output_set) + total_output_set = set() + for solution in data: + outputs = run_edit_multiple_op(solution, EDIT_OPERATIONS, TEMPERATURE, N_SOLUTIONS) + total_output_set.update(outputs) - # json_output = json.dumps(output) + output["solutions"].extend(total_output_set) + # json_output = json.dumps(output) - with open(f'{out_dir}/codex_edit_solutions.json', 'w') as outfile: - json.dump(output["solutions"], outfile) - - save_strings_to_py_file(output["solutions"], f"{out_dir}/edit_sol_pys") + # File naming format: _codex_solutions.json + pref = str(TEMPERATURE) + 'T2_' + str(N_SOLUTIONS) + 'k2_' + if not os.path.exists(f"{out_dir}/{pref}{file_name}"): + with open(f'{out_dir}/{pref}{file_name}', 'w') as outfile: + json.dump(output["solutions"], outfile) + + fold_name = pref + file_name[:-4] + "_pys" # removes .json extension + save_strings_to_py_file(output["solutions"], f"{out_dir}/{fold_name}") @@ -161,9 +171,10 @@ def run(file_name,out_dir="."): if __name__ == "__main__": - #python3 run_edit_module.py example_output.json + #python3 run_edit_module.py example_output.json 100 input_file_name = sys.argv[1] out_dir = os.path.dirname(input_file_name) + n_itr = sys.argv[2] #The following code is to save the example prompt and outputs """ @@ -181,12 +192,6 @@ def run(file_name,out_dir="."): sys.stdout = open(f'{out_dir}/edit_out.log', 'w') - run(input_file_name,out_dir=out_dir) + run(input_file_name,out_dir=out_dir,n_itr) sys.stdout.close() - - - - - -