From 8b46a11aa13abdfdbfc16c989760d96a4f4d34ab Mon Sep 17 00:00:00 2001 From: finnless Date: Sat, 15 Apr 2023 15:47:32 -0700 Subject: [PATCH 1/4] pdfreader written & feature for reading started --- app.py | 17 +++++++++++++++++ pdfreader.py | 14 ++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 pdfreader.py diff --git a/app.py b/app.py index cf96bea..8ae361d 100644 --- a/app.py +++ b/app.py @@ -2,6 +2,17 @@ import streamlit as st import pandas as pd +import sys +# Add the directory containing mypdf.py to sys.path +sys.path.append('/Users/finn/desktop/waam/pdfreader.py') +# Now you should be able to import mypdf module + +import pdfreader as pdf_read + +pdf_read.pdf_reader() + + + # Setting page title and header st.set_page_config(page_icon=":bulb:", page_title="WAAM-GPT") st.markdown("

đź’ˇWAAM-GPT

homework help
", unsafe_allow_html=True) @@ -81,6 +92,12 @@ def generate_response(prompt): st.session_state['messages'].append({"role": "user", "content": prompt}) + + # Call the PDF reading functionality from mypdf module + pdf_data = pdf_read.pdf_reader(prompt) + # Process the PDF data and generate a response + response = pdf_data + completion = openai.ChatCompletion.create( model=model, messages=st.session_state['messages'] diff --git a/pdfreader.py b/pdfreader.py new file mode 100644 index 0000000..081043f --- /dev/null +++ b/pdfreader.py @@ -0,0 +1,14 @@ +def pdf_reader(myPdf): + from langchain.document_loaders import PyPDFLoader + + loader = PyPDFLoader(myPDF) + pages = loader.load_and_split() + pages[0] + + from langchain.vectorstores import FAISS + from langchain.embeddings.openai import OpenAIEmbeddings + + faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings()) + docs = faiss_index.similarity_search("How will the community be engaged?", k=2) + for doc in docs: + print(str(doc.metadata["page"]) + ":", doc.page_content) \ No newline at end of file From 0bc519d8773fc2f3171f9910ec0cdd56891bb53f Mon Sep 17 00:00:00 2001 From: finnless Date: Sat, 15 Apr 2023 16:57:01 -0700 Subject: [PATCH 2/4] made pdf work with character restrictions --- app.py | 38 ++++++++++++++++++++++++-------------- pdfreader.py | 2 ++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/app.py b/app.py index 8ae361d..a128071 100644 --- a/app.py +++ b/app.py @@ -1,15 +1,22 @@ import openai import streamlit as st import pandas as pd +import PyPDF2 -import sys -# Add the directory containing mypdf.py to sys.path -sys.path.append('/Users/finn/desktop/waam/pdfreader.py') -# Now you should be able to import mypdf module +def pdf_reader(file): + """Reads text from a PDF file""" + try: + # Read the uploaded file using PyPDF2 + pdf = PyPDF2.PdfReader(file) + text = '' + for page in range(len(pdf.pages)): + text += pdf.pages[page].extract_text() -import pdfreader as pdf_read - -pdf_read.pdf_reader() + return text + except Exception as e: + # Handle any exceptions that may occur + print("Error reading PDF file:", e) + return None @@ -18,8 +25,8 @@ st.markdown("

đź’ˇWAAM-GPT

homework help
", unsafe_allow_html=True) # Set org ID and API key -openai.organization = st.secrets["openai_org"] -openai.api_key = st.secrets["openai_key"] +openai.organization = "org-g7sHaD0F2nWbXgynAv8nbmXI" +openai.api_key = "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" system_prompt = "You are a waam, a helpful large language model STEM tutor created during the 2023 5C Hackathon. You help users learn quantitative skills by guiding them through concepts and practice problems step by step instead of immediately giving away the final answer. Never give a student the direct answer. Always use markdown for your responses. Always render equations using LaTeX." @@ -92,11 +99,6 @@ def generate_response(prompt): st.session_state['messages'].append({"role": "user", "content": prompt}) - - # Call the PDF reading functionality from mypdf module - pdf_data = pdf_read.pdf_reader(prompt) - # Process the PDF data and generate a response - response = pdf_data completion = openai.ChatCompletion.create( model=model, @@ -125,7 +127,15 @@ def generate_response(prompt): user_input = st.text_area("", placeholder="What do you want to learn today?", key='input', height=10) submit_button = st.form_submit_button(label= '⏩') + # create a file uploader for PDFs + pdf_file = st.file_uploader("Upload a PDF file", type="pdf") + + # if a PDF file is uploaded, extract its text + if pdf_file is not None: + pdf_memory = pdf_reader(pdf_file) + if submit_button and user_input: + output, total_tokens, prompt_tokens, completion_tokens = generate_response(user_input) st.session_state['past'].append(user_input) st.session_state['generated'].append(output) diff --git a/pdfreader.py b/pdfreader.py index 081043f..b99919c 100644 --- a/pdfreader.py +++ b/pdfreader.py @@ -1,3 +1,5 @@ + + def pdf_reader(myPdf): from langchain.document_loaders import PyPDFLoader From 9ea955e00ad040b3ec1a50496b6d7f925f9f50c4 Mon Sep 17 00:00:00 2001 From: finnless Date: Sat, 15 Apr 2023 17:36:23 -0700 Subject: [PATCH 3/4] finalized pdf functionality --- app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app.py b/app.py index a128071..f489f89 100644 --- a/app.py +++ b/app.py @@ -25,8 +25,8 @@ def pdf_reader(file): st.markdown("

đź’ˇWAAM-GPT

homework help
", unsafe_allow_html=True) # Set org ID and API key -openai.organization = "org-g7sHaD0F2nWbXgynAv8nbmXI" -openai.api_key = "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" +openai.organization = "org-RoCVirkWXND84EMcUR5P0OGp" +openai.api_key = "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" system_prompt = "You are a waam, a helpful large language model STEM tutor created during the 2023 5C Hackathon. You help users learn quantitative skills by guiding them through concepts and practice problems step by step instead of immediately giving away the final answer. Never give a student the direct answer. Always use markdown for your responses. Always render equations using LaTeX." @@ -132,7 +132,7 @@ def generate_response(prompt): # if a PDF file is uploaded, extract its text if pdf_file is not None: - pdf_memory = pdf_reader(pdf_file) + user_input = pdf_reader(pdf_file) if submit_button and user_input: From cab8955064f23df74543ae1076be67aed0fe0576 Mon Sep 17 00:00:00 2001 From: finnless Date: Sat, 15 Apr 2023 17:39:17 -0700 Subject: [PATCH 4/4] got rid of api key --- app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index f489f89..716102a 100644 --- a/app.py +++ b/app.py @@ -25,8 +25,8 @@ def pdf_reader(file): st.markdown("

đź’ˇWAAM-GPT

homework help
", unsafe_allow_html=True) # Set org ID and API key -openai.organization = "org-RoCVirkWXND84EMcUR5P0OGp" -openai.api_key = "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" +openai.organization = "empty" +openai.api_key = "empty" system_prompt = "You are a waam, a helpful large language model STEM tutor created during the 2023 5C Hackathon. You help users learn quantitative skills by guiding them through concepts and practice problems step by step instead of immediately giving away the final answer. Never give a student the direct answer. Always use markdown for your responses. Always render equations using LaTeX."