diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6769e21
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
\ No newline at end of file
diff --git a/README.md b/README.md
index e1423aa..96b194c 100644
--- a/README.md
+++ b/README.md
@@ -7,3 +7,11 @@ Harnessing cutting-edge artificial intelligence, QuickTrace accelerates the inve
 providing journalists with lightning-fast access to critical information. Maximize efficiency, uncover the truth,
 and elevate your investigations with QuickTrace—the trusted companion of every 
 journalist committed to impactful and comprehensive reporting.
+
+## Connecting to Google Drive
+Enable your Google Drive API by following the instructions [here](https://developers.google.com/drive/api/quickstart/python). Save the `credentials.json` file in the top level directory.
+
+To download all files from your Google Drive account to be uploaded to QuickTrace,
+run `python google_drive.py`. 
+
+To search for a specific filetype, use `python -c "from google_drive import search_filetype('filename.ext')"`
diff --git a/app.py b/app.py
index 7021c1c..61d888e 100644
--- a/app.py
+++ b/app.py
@@ -7,6 +7,7 @@
 from langchain.chat_models import ChatOpenAI
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.vectorstores import FAISS
+from pathlib import Path
 
 from audio_utils import convert_audio_to_text
 from file_knowledge import FileKnowledge
@@ -66,11 +67,11 @@ def initialize_sidebar(session):
     with st.sidebar:
         show_all_konwledge = st.button("Show all knowledge", key="show_all_konwledge")
         with st.expander("Upload files"):
-            process_files("pdf", get_splitter(), session)
-            process_files("m4a", get_splitter(), session)
+            process_files(get_splitter(), session)
+                
 
         st.header("Journalist toolbox")
-        st.write("Upload your PDF file or audio file")
+        st.write("Upload your PDF, audio, text, or csv files")
         st.write("Then ask a question and get an answer")
         st.write("You can also download the text of the uploaded files")
         st.divider()
@@ -86,9 +87,10 @@ def get_splitter():
         length_function=len,
     )
 
-def process_files(file_type, splitter, session):
-    files = st.file_uploader(f"Upload your {file_type} file", type=[file_type], accept_multiple_files=True)
+def process_files(splitter, session):
+    files = st.file_uploader(f"Upload your files!", accept_multiple_files=True)
     for file in files:
+        file_type = Path(file.name).suffix.split('.')[1]
         if file.name not in st.session_state["knowledge"].keys():
             file_knowledge = FileKnowledge(name=file.name, file=file, filetype=file_type, splitter=splitter)
             session[file.name] = file_knowledge
diff --git a/file_knowledge.py b/file_knowledge.py
index 3abf7fb..b836bb8 100644
--- a/file_knowledge.py
+++ b/file_knowledge.py
@@ -2,6 +2,7 @@
 import tempfile
 from dataclasses import dataclass, field
 from typing import Any, List, TypeVar
+from io import StringIO
 
 from langchain.text_splitter import CharacterTextSplitter
 from PyPDF2 import PdfReader
@@ -9,6 +10,7 @@
 from audio_utils import convert_audio_to_text
 
 UploadedFile = TypeVar('UploadedFile', bound=Any)
+SUPPORTED_FILE_TYPES = ['pdf', 'csv', 'txt', 'html', 'm4a', 'eml', 'msg', 'mbox']
 
 
 @dataclass
@@ -23,6 +25,7 @@ class FileKnowledge:
     def __post_init__(self):
         self.content = self.extract_text()
         self.chunks = self.splitter.split_text(self.content)
+        
 
     @property
     def content(self):
@@ -42,6 +45,7 @@ def chunks(self, value):
         self._chunks = value
         self.save_to_session_state()
 
+    
     def save_to_session_state(self):
         st.session_state.knowledge[self.name] = self
 
@@ -50,8 +54,14 @@ def extract_text(self):
             return self.extract_text_from_pdf()
         elif self.filetype == 'm4a':
             return self.extract_text_from_audio()
+        elif self.filetype == 'txt':
+            return self.extract_text_generic()
+        elif self.filetype == 'csv':
+            return self.extract_text_generic()
+        
         else:
-            raise ValueError(f'Unsupported filetype: {self.filetype}')
+            if not self.filetype in SUPPORTED_FILE_TYPES:
+                raise ValueError(f'Unsupported filetype: {self.filetype}')
 
     def extract_text_from_pdf(self):
         # Add your code here to extract text from a PDF file
@@ -60,7 +70,11 @@ def extract_text_from_pdf(self):
         for page in pdf_reader.pages:
             text += page.extract_text()
         return text
-
+    
+    def extract_text_generic(self):
+        stringio = StringIO(self.file.getvalue().decode("utf-8"))
+        return stringio.read()    
+    
     def extract_text_from_audio(self):
         with tempfile.NamedTemporaryFile(delete=False, suffix=".m4a") as tmp:
             tmp.write(self.file.read())
diff --git a/google_drive.py b/google_drive.py
new file mode 100644
index 0000000..d8171ba
--- /dev/null
+++ b/google_drive.py
@@ -0,0 +1,192 @@
+# Taken from https://www.thepythoncode.com/article/using-google-drive--api-in-python
+from __future__ import print_function
+
+import re
+import pickle
+import os
+from googleapiclient.discovery import build
+from google_auth_oauthlib.flow import InstalledAppFlow
+from google.auth.transport.requests import Request
+from tabulate import tabulate
+import requests
+import tqdm
+
+import os.path
+from googleapiclient.http import MediaFileUpload
+# If modifying these scopes, delete the file token.pickle.
+SCOPES = ['https://www.googleapis.com/auth/drive.metadata',
+          'https://www.googleapis.com/auth/drive',
+          'https://www.googleapis.com/auth/drive.file'
+          ]
+
+def get_gdrive_service():
+    creds = None
+    # The file token.pickle stores the user's access and refresh tokens, and is
+    # created automatically when the authorization flow completes for the first
+    # time.
+    if os.path.exists('token.pickle'):
+        with open('token.pickle', 'rb') as token:
+            creds = pickle.load(token)
+    # If there are no (valid) credentials available, let the user log in.
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            creds.refresh(Request())
+        else:
+            flow = InstalledAppFlow.from_client_secrets_file(
+                'credentials.json', SCOPES)
+            creds = flow.run_local_server(port=0)
+        # Save the credentials for the next run
+        with open('token.pickle', 'wb') as token:
+            pickle.dump(creds, token)
+    # return Google Drive API service
+    return build('drive', 'v3', credentials=creds)
+
+def download_all_files(n: int = 50):
+    """Shows basic usage of the Drive v3 API.
+    Prints the names and ids of the first 5 files the user has access to.
+    """
+    service = get_gdrive_service()
+    # Call the Drive v3 API
+    results = service.files().list(
+        pageSize=n, fields="nextPageToken, files(id, name, mimeType, size, parents, modifiedTime)").execute()
+    # get the results
+    items = results.get('files', [])
+    # list all 20 files & folders
+    for file in list_files(items):
+        download_file_from_google_drive(file['id'], file['name'])
+    
+
+def search_filetype(filetype: str = "text/plain"):
+    # You can also use "filename.ext"
+    # authenticate Google Drive API
+    service = get_gdrive_service()
+    # search for files that has type of text/plain
+    search_result = search(service, query=f"mimeType='{filetype}'")
+    # convert to table to print well
+    table = tabulate(search_result, headers=["ID", "Name", "Type"])
+    print(table)
+
+
+def list_files(items):
+    """given items returned by Google Drive API, prints them in a tabular way"""
+    if not items:
+        # empty drive
+        print('No files found.')
+    else:
+        rows = []
+        for item in items:
+            # get the File ID
+            id = item["id"]
+            # get the name of file
+            name = item["name"]
+            try:
+                # parent directory ID
+                parents = item["parents"]
+            except:
+                # has no parrents
+                parents = "N/A"
+            try:
+                # get the size in nice bytes format (KB, MB, etc.)
+                size = get_size_format(int(item["size"]))
+            except:
+                # not a file, may be a folder
+                size = "N/A"
+            # get the Google Drive type of file
+            mime_type = item["mimeType"]
+            # get last modified date time
+            modified_time = item["modifiedTime"]
+            # append everything to the list
+            rows.append((id, name, parents, size, mime_type, modified_time))
+        print("Files:")
+        # convert to a human readable table
+        table = tabulate(rows, headers=["ID", "Name", "Parents", "Size", "Type", "Modified Time"])
+        # print the table
+        print(table)
+        return rows
+
+def get_size_format(b, factor=1024, suffix="B"):
+    """
+    Scale bytes to its proper byte format
+    e.g:
+        1253656 => '1.20MB'
+        1253656678 => '1.17GB'
+    """
+    for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
+        if b < factor:
+            return f"{b:.2f}{unit}{suffix}"
+        b /= factor
+    return f"{b:.2f}Y{suffix}"
+
+def search(service, query):
+    # search for the file
+    result = []
+    page_token = None
+    while True:
+        response = service.files().list(q=query,
+                                        spaces="drive",
+                                        fields="nextPageToken, files(id, name, mimeType)",
+                                        pageToken=page_token).execute()
+        # iterate over filtered files
+        for file in response.get("files", []):
+            result.append((file["id"], file["name"], file["mimeType"]))
+        page_token = response.get('nextPageToken', None)
+        if not page_token:
+            # no more files
+            break
+    return result
+
+def download(filename):
+    service = get_gdrive_service()
+    # the name of the file you want to download from Google Drive 
+    # search for the file by name
+    search_result = search(service, query=f"name='{filename}'")
+    # get the GDrive ID of the file
+    file_id = search_result[0][0]
+    # make it shareable
+    service.permissions().create(body={"role": "reader", "type": "anyone"}, fileId=file_id).execute()
+    # download file
+    download_file_from_google_drive(file_id, filename)
+
+def download_file_from_google_drive(id, destination):
+    def get_confirm_token(response):
+        for key, value in response.cookies.items():
+            if key.startswith('download_warning'):
+                return value
+        return None
+
+    def save_response_content(response, destination):
+        CHUNK_SIZE = 32768
+        # get the file size from Content-length response header
+        file_size = int(response.headers.get("Content-Length", 0))
+        # extract Content disposition from response headers
+        content_disposition = response.headers.get("content-disposition")
+        # parse filename
+        filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
+        print("[+] File size:", file_size)
+        print("[+] File name:", filename)
+        progress = tqdm(response.iter_content(CHUNK_SIZE), f"Downloading {filename}", total=file_size, unit="Byte", unit_scale=True, unit_divisor=1024)
+        with open(destination, "wb") as f:
+            for chunk in progress:
+                if chunk: # filter out keep-alive new chunks
+                    f.write(chunk)
+                    # update the progress bar
+                    progress.update(len(chunk))
+        progress.close()
+
+    # base URL for download
+    URL = "https://docs.google.com/uc?export=download"
+    # init a HTTP session
+    session = requests.Session()
+    # make a request
+    response = session.get(URL, params = {'id': id}, stream=True)
+    print("[+] Downloading", response.url)
+    # get confirmation token
+    token = get_confirm_token(response)
+    if token:
+        params = {'id': id, 'confirm':token}
+        response = session.get(URL, params=params, stream=True)
+    # download to disk
+    save_response_content(response, destination)  
+
+if __name__ == '__main__':
+    download_all_files()
diff --git a/requirements.txt b/requirements.txt
index c6af25d..39f8b2b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,10 @@ openai==0.27.6
 tiktoken==0.4.0
 faiss_cpu==1.7.4
 pydub==0.25.1
-ffmpeg-python==0.2.0
\ No newline at end of file
+ffmpeg-python==0.2.0
+google-api-python-client==2.88.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==1.0.0 
+tabulate==0.9.0 
+requests==2.31.0
+tqdm==4.65.0
\ No newline at end of file