From dab36e55f64ef0489ebd48c3e9aecdfb6bc12a0c Mon Sep 17 00:00:00 2001
From: pookam90 <pookam@microsoft.com>
Date: Wed, 13 Nov 2024 16:54:05 +0530
Subject: [PATCH 1/5] adding sample for Langchain SQL Vector Store

---
 Langchain-SQL-RAG/.env.sample             |  14 +
 Langchain-SQL-RAG/.gitignore              |  21 +
 Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb | 617 ++++++++++++++++++++++
 Langchain-SQL-RAG/readme.md               |  55 ++
 4 files changed, 707 insertions(+)
 create mode 100644 Langchain-SQL-RAG/.env.sample
 create mode 100644 Langchain-SQL-RAG/.gitignore
 create mode 100644 Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
 create mode 100644 Langchain-SQL-RAG/readme.md
diff --git a/Langchain-SQL-RAG/.env.sample b/Langchain-SQL-RAG/.env.sample
new file mode 100644
index 0000000..8fbc2a9
--- /dev/null
+++ b/Langchain-SQL-RAG/.env.sample
@@ -0,0 +1,14 @@
+
+# Azure OpenAI Service details
+AZURE_ENDPOINT="https://<yourdeployment>.openai.azure.com/"
+AZURE_DEPLOYMENT_EMBEDDING_NAME="<embeddingmodeldeploymentname"
+AZURE_DEPLOYMENT_CHATCOMPLETION_NAME="<chatcompletionmodeldeploymentname>"
+AZURE_API_VERSION="2023-05-15"
+AZURE_API_KEY="<yourkey"
+
+# Use only one of the below. The one you are not using should be commented out.
+# For Entra ID Service Principle Authentication
+ENTRA_CONNECTION_STRING="Driver={ODBC Driver 18 for SQL Server};LongAsMax=yes;Server=tcp:<yourdbserver>.database.windows.net;Database=<yourdbname>;"
+
+# For SQL Authentication
+SQL_CONNECTION_STRING="Driver={ODBC Driver 18 for SQL Server};LongAsMax=yes;Server=tcp:<yourserver>.database.windows.net;Database=<database>;Uid=<your-username>;Pwd=<your-password>;"
diff --git a/Langchain-SQL-RAG/.gitignore b/Langchain-SQL-RAG/.gitignore
new file mode 100644
index 0000000..7f8d48b
--- /dev/null
+++ b/Langchain-SQL-RAG/.gitignore
@@ -0,0 +1,21 @@
+*.pdf
+*.docx
+*.doc
+*.xls
+*.xlsx
+*.ppt
+*.pptx
+*.txt
+*.csv
+*.jpg
+*.jpeg
+*.png
+*.gif
+*.bmp
+*.tif
+*.tiff
+*.svg
+*.eps
+*.ai
+*.psd
+*.env
\ No newline at end of file
diff --git a/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb b/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
new file mode 100644
index 0000000..c25845c
--- /dev/null
+++ b/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
@@ -0,0 +1,617 @@
+{
+    "metadata": {
+        "kernelspec": {
+            "name": "python3",
+            "display_name": "Python 3",
+            "language": "python"
+        },
+        "language_info": {
+            "name": "python",
+            "version": "3.11.9",
+            "mimetype": "text/x-python",
+            "codemirror_mode": {
+                "name": "ipython",
+                "version": 3
+            },
+            "pygments_lexer": "ipython3",
+            "nbconvert_exporter": "python",
+            "file_extension": ".py"
+        }
+    },
+    "nbformat_minor": 2,
+    "nbformat": 4,
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "source": [
+                "# Building AI-powered apps on Azure SQL Database using LLMs and LangChain\n",
+                "\n",
+                "Azure SQL Database now supports native vector search capabilities, bringing the power of vector search operations directly to your SQL databases. You can read the full announcement of the public preview [here](https:\\devblogs.microsoft.com\\azure-sql\\exciting-announcement-public-preview-of-native-vector-support-in-azure-sql-database\\)\n",
+                "\n",
+                "We are also thrilled to announce the release of [langchain-sqlserver](https:\\pypi.org\\project\\langchain-sqlserver\\) version 0.1.1. You can use this package to manage Langchain vectorstores in SQL Server. This new release brings enhanced capabilities by parsing both ODBC connection strings and SQLAlchemy format connection strings, making it easier than ever to integrate with Azure SQL DB\n",
+                "\n",
+                "In this step-by-step tutorial, we will show you how to add generative AI features to your own applications with just a few lines of code using Azure SQL DB, [LangChain](https:\\pypi.org\\project\\langchain-sqlserver\\), and LLMs.\n",
+                "\n",
+                "## Dataset\n",
+                "\n",
+                "The Harry Potter series, written by J.K. Rowling, is a globally beloved collection of seven books that follow the journey of a young wizard, Harry Potter, and his friends as they battle the dark forces led by the evil Voldemort. Its captivating plot, rich characters, and imaginative world have made it one of the most famous and cherished series in literary history. \n",
+                "\n",
+                "This Sample dataset from [Kaggle](https:\\www.kaggle.com\\datasets\\shubhammaindola\\harry-potter-books) contains 7 .txt files of 7 books of Harry Potter. For this demo we will only be using the first book - Harry Potter and the Sorcerer's Stone.\n",
+                "\n",
+                "In this notebook, we will showcase two exciting use cases:\n",
+                "1. A sample Python application that can understand and respond to human language queries about the data stored in your Azure SQL Database. This **Q&A system** leverages the power of SQL Vectore Store & LangChain to provide accurate and context-rich answers from the Harry Potter Book.\n",
+                "1. Next, we will push the creative limits of the application by teaching it to generate new AI-driven **Harry Potter fan fiction** based on our existing dataset of Harry Potter books. This feature is sure to delight Potterheads, allowing them to explore new adventures and create their own magical stories.\n",
+                "\n",
+                "## Prerequisites\n",
+                "\n",
+                "- **Azure Subscription**: [Create one for free](https:\\azure.microsoft.com\\free\\cognitive-services?azure-portal=true)\n",
+                "    \n",
+                "- **Azure SQL Database**: [Set up your database for free](https:\\learn.microsoft.com\\azure\\azure-sql\\database\\free-offer?view=azuresql)\n",
+                "    \n",
+                "- **Azure OpenAI Access**: Apply for access in the desired Azure subscription at [https://aka.ms/oai/access](https:\\aka.ms\\oai\\access)\n",
+                "    \n",
+                "- **Azure OpenAI Resource**: Deploy an embeddings model (e.g., `text-embedding-small` or `text-embedding-ada-002`) and a `GPT-4.0` model for chat completion. Refer to the [resource deployment guide](https:\\learn.microsoft.com\\azure\\ai-services\\openai\\how-to\\create-resource) \n",
+                "\n",
+                "- **Azure Blob Storage** Deploy a Azure [Blob Storage Account](https:\\learn.microsoft.com\\azure\\storage\\blobs\\storage-quickstart-blobs-portal) to upload your dataset\n",
+                "    \n",
+                "- **Python**: Version 3.7.1 or later from Python.org. (Sample has been tested with Python 3.11)\n",
+                "    \n",
+                "- **Python Libraries**: Install the required libraries from the requirements.txt\n",
+                "    \n",
+                "- **Jupyter Notebooks**: Use within [Azure Data Studio](https:\\learn.microsoft.com\\en-us\\azure-data-studio\\notebooks\\notebooks-guidance) or Visual Studio Code .\n",
+                "    \n",
+                "\n",
+                "## Getting Started\n",
+                "\n",
+                "1. **Model Deployment**: Deploy an embeddings model (`text-embedding-small` or `text-embedding-ada-002`) and a `GPT-4` model for chat completion. Note the 2 models deployment names for use in the `.env` file\n",
+                "\n",
+                "![Deployed OpenAI Models](..\\Assets\\modeldeployment.png)\n",
+                "\n",
+                "2. **Connection String**: Find your Azure SQL DB connection string in the Azure portal under your database settings.\n",
+                "3. **Configuration**: Populate the `.env` file with your SQL server connection details , Azure OpenAI key and endpoint , api-version & Model deploymentname\n",
+                "\n",
+                "You can retrieve the Azure OpenAI _endpoint_ and _key_:\n",
+                "\n",
+                "![Azure OpenAI Endpoint and Key](..\\Assets\\endpoint.png)\n",
+                "\n",
+                "4. **Upload dataset** In your [Blob Storage Account](https:\\learn.microsoft.com\\en-us\\azure\\storage\\blobs\\storage-quickstart-blobs-portal) create a container and upload the .txt file using the steps [here](https:\\learn.microsoft.com\\azure\\storage\\blobs\\storage-quickstart-blobs-portal)\n",
+                "\n",
+                "## Running the Notebook\n",
+                "\n",
+                "To [execute the notebook](https:\\learn.microsoft.com\\azure-data-studio\\notebooks\\notebooks-python-kernel), connect to your Azure SQL database using Azure Data Studio, which can be downloaded [here](https:\\azure.microsoft.com\\products\\data-studio)"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "96433b37-81d8-4f88-9a66-439f581bd77d"
+            },
+            "attachments": {}
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "#Setup the python libraries required for this notebook\r\n",
+                "#Please ensure that you navigate to the directory containing the `requirements.txt` file in your terminal\r\n",
+                "%pip install -r requirements.txt"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "d66158fb-7d16-46af-8a04-d66bc04ed532"
+            },
+            "outputs": [],
+            "execution_count": null
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "#Load the env details\r\n",
+                "from dotenv import load_dotenv\r\n",
+                "load_dotenv()"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "bae2aa8a-c576-4665-929e-a719d3f41820"
+            },
+            "outputs": [
+                {
+                    "output_type": "execute_result",
+                    "execution_count": 49,
+                    "data": {
+                        "text/plain": "True"
+                    },
+                    "metadata": {}
+                }
+            ],
+            "execution_count": 49
+        },
+        {
+            "cell_type": "markdown",
+            "source": [
+                "## Install the new [langchain-sqlserver](https://pypi.org/project/langchain-sqlserver/) python package.\r\n",
+                "\r\n",
+                "The code lives in an integration package called:[langchain-sqlserver](https://github.com/langchain-ai/langchain-azure/tree/main/libs/sqlserver)."
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "6077fe80-15ec-486e-9c64-20b568c39bc8"
+            },
+            "attachments": {}
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "!pip install langchain-sqlserver==0.1.1"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "6b5e6658-20ab-4995-bd8b-be148fcbb293"
+            },
+            "outputs": [],
+            "execution_count": null
+        },
+        {
+            "cell_type": "markdown",
+            "source": [
+                "## Loading Our Harry Potter Dataset\r\n",
+                "\r\n",
+                "In this example, we will use a dataset consisting of text files from the Harry Potter books, which are stored in Azure Blob Storage. This dataset will be used to demonstrate the capabilities of LangChain for RAG (question-answering and fan fiction generation)\r\n",
+                "\r\n",
+                "LangChain has a seamless integration with [AzureBlobStorage](https://python.langchain.com/docs/integrations/document_loaders/azure_blob_storage_container/), making it easy to load documents directly from Azure Blob Storage. \r\n",
+                "\r\n",
+                "Additionally, LangChain provides a method to [split long text](https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/recursive_text_splitter/) into smaller chunks, using langchain-text-splitter which is essential since Azure OpenAI embeddings have an input token limit.\r\n",
+                "\r\n",
+                ""
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "11ea6887-5b45-4232-8d27-d040f3738961"
+            },
+            "attachments": {}
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "import magic\r\n",
+                "from langchain.document_loaders import AzureBlobStorageFileLoader\r\n",
+                "from langchain_core.documents import Document\r\n",
+                "from langchain.text_splitter import RecursiveCharacterTextSplitter\r\n",
+                "\r\n",
+                "# Define your connection string and blob details\r\n",
+                "conn_str = \"DefaultEndpointsProtocol=https;AccountName=pkblob;AccountKey=WtQWvdDAKMggEVpAE6+qPpKT5qlsBUPSICtwsNtH4JpEhJV6hixBZD5m26BPqjQKUlv6SapX8G8P+AStJ5MSRw==;EndpointSuffix=core.windows.net\"\r\n",
+                "container_name = \"pookamcontainer\"\r\n",
+                "blob_name = \"01 Harry Potter and the Sorcerers Stone.txt\"\r\n",
+                "\r\n",
+                "# Create an instance of AzureBlobStorageFileLoader\r\n",
+                "loader = AzureBlobStorageFileLoader(conn_str=conn_str, container=container_name, blob_name=blob_name)\r\n",
+                "\r\n",
+                "# Load the document from Azure Blob Storage\r\n",
+                "documents = loader.load()\r\n",
+                "\r\n",
+                "# Split the document into smaller chunks if necessary\r\n",
+                "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\r\n",
+                "split_documents = text_splitter.split_documents(documents)\r\n",
+                "\r\n",
+                "# Print the number of split documents\r\n",
+                "print(f\"Number of split documents: {len(split_documents)}\")\r\n",
+                ""
+            ],
+            "metadata": {
+                "azdata_cell_guid": "53569555-bc01-417a-9234-962f26a6d7d8",
+                "language": "python",
+                "tags": []
+            },
+            "outputs": [
+                {
+                    "output_type": "stream",
+                    "name": "stdout",
+                    "text": "Number of split documents: 572\n"
+                }
+            ],
+            "execution_count": 50
+        },
+        {
+            "cell_type": "markdown",
+            "source": [
+                "## Define function for Embedding Generation & Chat Completion\n",
+                "\n",
+                "\n",
+                "In this example we use Azure OpenAI to generate embeddings of the split documents, however you can use any of the different embeddings provided in LangChain."
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "898143c8-f65c-4b44-b8e3-b88055f33fb2"
+            },
+            "attachments": {}
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "from langchain_openai import AzureOpenAIEmbeddings\r\n",
+                "from langchain_openai import AzureChatOpenAI\r\n",
+                "\r\n",
+                "#Use environment variables\r\n",
+                "azure_endpoint = os.getenv(\"AZURE_ENDPOINT\")\r\n",
+                "azure_deployment_chatcompletion_name = os.getenv(\"AZURE_DEPLOYMENT_CHATCOMPLETION_NAME\")\r\n",
+                "azure_api_version = os.getenv(\"AZURE_API_VERSION\")\r\n",
+                "azure_api_key = os.getenv(\"AZURE_API_KEY\")\r\n",
+                "azure_deployment_embedding_name = os.getenv(\"AZURE_DEPLOYMENT_EMBEDDING_NAME\")\r\n",
+                "connection_string = os.getenv(\"CONNECTION_STRING\")\r\n",
+                "\r\n",
+                "\r\n",
+                "#Use AzureChatOpenAI for chat completions\r\n",
+                "llm = AzureChatOpenAI(\r\n",
+                "    azure_endpoint=azure_endpoint,\r\n",
+                "    azure_deployment=azure_deployment_chatcompletion_name,\r\n",
+                "    openai_api_version=azure_api_version,\r\n",
+                "    openai_api_key=azure_api_key\r\n",
+                ")\r\n",
+                "\r\n",
+                "#Use AzureOpenAIEmbeddings for embeddings\r\n",
+                "embeddings = AzureOpenAIEmbeddings(\r\n",
+                "    azure_endpoint=azure_endpoint,\r\n",
+                "    azure_deployment=azure_deployment_embedding_name, \r\n",
+                "    openai_api_version=azure_api_version,\r\n",
+                "    openai_api_key=azure_api_key\r\n",
+                ")"
+            ],
+            "metadata": {
+                "azdata_cell_guid": "726ad155-dd68-4351-8c5c-81fba2d35fe3",
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 51
+        },
+        {
+            "cell_type": "markdown",
+            "source": [
+                "## Initialize the Vector Store & insert the documents into Azure SQL with their embeddings\n",
+                "\n",
+                "After splitting the long text files of Harry Potter books into smaller chunks, you can generate vector embeddings for each chunk using the Text Embedding Model available through [AzureOpenAI](https://python.langchain.com/docs/integrations/llms/azure_openai/). Notice how we can accomplish this in just a few lines of code!\n",
+                "\n",
+                "- First, initialize the vector store and set up the embeddings using AzureOpenAI\n",
+                "- Once we have our Vector Store we can add items to our vector store by using the add\\_documents function."
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "7dc583d1-22b0-482e-850a-2b5ceb422bf2"
+            },
+            "attachments": {}
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "from langchain_sqlserver import SQLServer_VectorStore\r\n",
+                "from langchain_community.vectorstores.utils import DistanceStrategy\r\n",
+                "\r\n",
+                "from dotenv import load_dotenv\r\n",
+                "load_dotenv()\r\n",
+                "connection_string = os.getenv(\"CONNECTION_STRING\")\r\n",
+                "\r\n",
+                "\r\n",
+                "# Initialize the vector store\r\n",
+                "vector_store = SQLServer_VectorStore(\r\n",
+                "    connection_string=connection_string,\r\n",
+                "    distance_strategy=DistanceStrategy.COSINE, #optional, if not provided, defaults to COSINE\r\n",
+                "    embedding_function=embeddings, # you can use different embeddings provided in LangChain\r\n",
+                "    embedding_length=1536,\r\n",
+                "    table_name = \"hpbook_1\" #using a table with custom name\r\n",
+                ")  \r\n",
+                "\r\n",
+                "# Add split documents to the vector store individually\r\n",
+                "for i, doc in enumerate(split_documents):\r\n",
+                "    vector_store.add_documents(documents=[doc], ids=[f\"doc_{i}\"])\r\n",
+                "\r\n",
+                "print(\"Documents added to the vector store successfully!\")"
+            ],
+            "metadata": {
+                "azdata_cell_guid": "acf5c8b2-c5a6-40f4-a251-173efd2836de",
+                "language": "python"
+            },
+            "outputs": [
+                {
+                    "output_type": "stream",
+                    "name": "stdout",
+                    "text": "Documents added to the vector store successfully!\n"
+                }
+            ],
+            "execution_count": 52
+        },
+        {
+            "cell_type": "markdown",
+            "source": [
+                "## Querying Data - Similarity Search:\r\n",
+                "\r\n",
+                "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent.\r\n",
+                "\r\n",
+                "The vectorstore also supports a set of filters that can be applied against the metadata fields of the documents. By applying filters based on specific metadata attributes, users can limit the scope of their searches, concentrating only on the most relevant data subsets.\r\n",
+                "\r\n",
+                "Performing a simple similarity search can be done as follows with the `similarity_search_with_score`"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "8144c416-22ef-4036-8a22-a5cb77022ac9"
+            },
+            "attachments": {}
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "from typing import List, Tuple\r\n",
+                "\r\n",
+                "# Perform similarity search\r\n",
+                "query = \"Wizarding world snacks\"\r\n",
+                "docs_with_score: List[Tuple[Document, float]] = vector_store.similarity_search_with_score(query)\r\n",
+                "\r\n",
+                "for doc, score in docs_with_score:\r\n",
+                "    print(\"-\" * 80)\r\n",
+                "    print(\"Score: \", score)\r\n",
+                "    print(\"Source Doc: \", doc.metadata.get(\"source\", \"N/A\").split('/')[-1] )\r\n",
+                "    print(\"Content: \", doc.page_content)\r\n",
+                "    print(\"-\" * 80)"
+            ],
+            "metadata": {
+                "azdata_cell_guid": "51e30cbc-4d97-4ea7-bfc0-8b78478e6063",
+                "language": "python"
+            },
+            "outputs": [
+                {
+                    "output_type": "stream",
+                    "name": "stdout",
+                    "text": "--------------------------------------------------------------------------------\nScore:  0.42822275405758725\nSource Doc:  01 Harry Potter and the Sorcerers Stone.txt\nContent:  Around half past twelve there was a great clattering outside in the corridor and a smiling, dimpled woman slid back their door and said, “Anything off the cart, dears?”\n\nHarry, who hadn’t had any breakfast, leapt to his feet, but Ron’s ears went pink again and he muttered that he’d brought sandwiches. Harry went out into the corridor.\n\nHe had never had any money for candy with the Dursleys, and now that he had pockets rattling with gold and silver he was ready to buy as many Mars Bars as he could carry — but the woman didn’t have Mars Bars. What she did have were Bettie Bott’s Every Flavor Beans, Drooble’s Best Blowing Gum, Chocolate Frogs. Pumpkin Pasties, Cauldron Cakes, Licorice Wands, and a number of other strange things Harry had never seen in his life. Not wanting to miss anything, he got some of everything and paid the woman eleven silver Sickles and seven bronze Knuts.\n\nRon stared as Harry brought it all back in to the compartment and tipped it onto an empty seat.\n--------------------------------------------------------------------------------\n--------------------------------------------------------------------------------\nScore:  0.457401083683064\nSource Doc:  01 Harry Potter and the Sorcerers Stone.txt\nContent:  “Help yourself,” said Harry. “But in, you know, the Muggle world, people just stay put in photos.”\n\n“Do they? What, they don’t move at all?” Ron sounded amazed. “Weird!”\n\nHarry stared as Dumbledore sidled back into the picture on his card and gave him a small smile. Ron was more interested in eating the frogs than looking at the Famous Witches and Wizards cards, but Harry couldn’t keep his eyes off them. Soon he had not only Dumbledore and Morgana, but Hengist of Woodcroft, Alberic Grunnion, Circe, Paracelsus, and Merlin. He finally tore his eyes away from the Druidess Cliodna, who was scratching her nose, to open a bag of Bertie Bott’s Every Flavor Beans.\n\n“You want to be careful with those,” Ron warned Harry. “When they say every flavor, they mean every flavor — you know, you get all the ordinary ones like chocolate and peppermint and marmalade, but then you can get spinach and liver and tripe. George reckons he had a booger-flavored one once.”\n--------------------------------------------------------------------------------\n--------------------------------------------------------------------------------\nScore:  0.5041777167280412\nSource Doc:  01 Harry Potter and the Sorcerers Stone.txt\nContent:  Harry looked over at the Slytherin table and saw a horrible ghost sitting there, with blank staring eyes, a gaunt face, and robes stained with silver blood. He was right next to Malfoy who, Harry was pleased to see, didn’t look too pleased with the seating arrangements.\n\n“How did he get covered in blood?” asked Seamus with great interest.\n\n“I’ve never asked,” said Nearly Headless Nick delicately.\n\nWhen everyone had eaten as much as they could, the remains of the food faded from the plates, leaving them sparkling clean as before. A moment later the desserts appeared. Blocks of ice cream in every flavor you could think of, apple pies, treacle tarts, chocolate eclairs and jam doughnuts, trifle, strawberries, Jell-O, rice pudding…\n\nAs Harry helped himself to a treacle tart, the talk turned to their families.\n\n“I’m half-and-half,” said Seamus. “Me dad’s a Muggle. Mom didn’t tell him she was a witch ’til after they were married. Bit of a nasty shock for him.”\n\nThe others laughed.\n--------------------------------------------------------------------------------\n--------------------------------------------------------------------------------\nScore:  0.5058382285015484\nSource Doc:  01 Harry Potter and the Sorcerers Stone.txt\nContent:  A low, soft hooting came from a dark shop with a sign saying Eeylops Owl Emporium — Tawny, Screech, Barn, Brown, and Snowy. Several boys of about Harry’s age had their noses pressed against a window with broomsticks in it. “Look,” Harry heard one of them say, “the new Nimbus Two Thousand — fastest ever —” There were shops selling robes, shops selling telescopes and strange silver instruments Harry had never seen before, windows stacked with barrels of bat spleens and eels’ eyes, tottering piles of spell books, quills, and rolls of parchment, potion bottles, globes of the moon.…\n\n“Gringotts,” said Hagrid.\n\nThey had reached a snowy white building that towered over the other little shops. Standing beside its burnished bronze doors, wearing a uniform of scarlet and gold, was —\n--------------------------------------------------------------------------------\n"
+                }
+            ],
+            "execution_count": 53
+        },
+        {
+            "cell_type": "markdown",
+            "source": [
+                "# RAG with SQLDB, Langchain & LLMs:\n",
+                "\n",
+                "## Use Case 1: Q&A System based on the Story Book\n",
+                "\n",
+                "The Q&A function allows users to ask specific questions about the story, characters, and events, and get concise, context-rich answers. This not only enhances their understanding of the books but also makes them feel like they're part of the magical universe.\n",
+                "\n",
+                "The LangChain Vector store simplifies building sophisticated Q&A systems by enabling efficient **similarity searches** to find the top 10 relevant documents based on the user's query. The retriever is created from the **vector\\_store,** and the question-answer chain is built using the **create\\_stuff\\_documents\\_chain** function. A prompt template is crafted using the **ChatPromptTemplate** class, ensuring structured and context-rich responses. Often in Q&A applications it's important to show users the sources that were used to generate the answer. LangChain's built-in **create\\_retrieval\\_chain** will propagate retrieved source documents to the output under the \"**context**\" key:\n",
+                "\n",
+                "Read more about Langchain RAG tutorials & the terminologies mentioned above [here](https:\\python.langchain.com\\docs\\tutorials\\rag\\)"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "4bcd6530-6a84-434b-b0a5-1df17d2b3390"
+            },
+            "attachments": {}
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "import pandas as pd\r\n",
+                "from langchain_core.prompts import ChatPromptTemplate\r\n",
+                "from typing import List, Tuple\r\n",
+                "from langchain.chains import create_retrieval_chain\r\n",
+                "from langchain.chains.combine_documents import create_stuff_documents_chain\r\n",
+                "\r\n",
+                "# Define the function to perform the RAG chain invocation \r\n",
+                "def get_answer_and_sources(user_query: str):\r\n",
+                "    # Perform similarity search with scores\r\n",
+                "    docs_with_score: List[Tuple[Document, float]] = vector_store.similarity_search_with_score(\r\n",
+                "        user_query, \r\n",
+                "        k=10, \r\n",
+                "    )\r\n",
+                "\r\n",
+                "    # Extract the context from the top results\r\n",
+                "    context = \"\\n\".join([doc.page_content for doc, score in docs_with_score])\r\n",
+                "\r\n",
+                "    # Define the system prompt\r\n",
+                "    system_prompt = (\r\n",
+                "        \"You are an assistant for question-answering tasks based on the story in the book. \"\r\n",
+                "        \"Use the following pieces of retrieved context to answer the question. \"\r\n",
+                "        \"If you don't know the answer, say that you don't know, but also suggest that the user can use the fan fiction function to generate fun stories. \"\r\n",
+                "        \"Use 5 sentences maximum and keep the answer concise by also providing some background context of 1-2 sentences.\"\r\n",
+                "        \"\\n\\n\"\r\n",
+                "        \"{context}\"\r\n",
+                "    )\r\n",
+                "\r\n",
+                "    # Create the prompt template\r\n",
+                "    prompt = ChatPromptTemplate.from_messages(\r\n",
+                "        [\r\n",
+                "            (\"system\", system_prompt),\r\n",
+                "            (\"human\", \"{input}\"),\r\n",
+                "        ]\r\n",
+                "    )\r\n",
+                "\r\n",
+                "    # Create the retriever and chains\r\n",
+                "    retriever = vector_store.as_retriever()\r\n",
+                "    question_answer_chain = create_stuff_documents_chain(llm, prompt)\r\n",
+                "    rag_chain = create_retrieval_chain(retriever, question_answer_chain)\r\n",
+                "\r\n",
+                "    # Define the input\r\n",
+                "    input_data = {\"input\": user_query}\r\n",
+                "\r\n",
+                "    # Invoke the RAG chain\r\n",
+                "    response = rag_chain.invoke(input_data)\r\n",
+                "\r\n",
+                "    # Print the answer\r\n",
+                "    print(\"Answer:\", response[\"answer\"])\r\n",
+                "\r\n",
+                "    \r\n",
+                "    # Prepare the data for the table\r\n",
+                "    data = {\r\n",
+                "         \"Doc ID\": [doc.metadata.get(\"source\", \"N/A\").split('/')[-1] for doc in response[\"context\"]],\r\n",
+                "        \"Content\": [doc.page_content[:50] + \"...\" if len(doc.page_content) > 100 else doc.page_content for doc in response[\"context\"]],\r\n",
+                "    }\r\n",
+                "\r\n",
+                "\r\n",
+                "    # Create a DataFrame\r\n",
+                "    df = pd.DataFrame(data)\r\n",
+                "\r\n",
+                "    # Print the table\r\n",
+                "    print(\"\\nSources:\")\r\n",
+                "    print(df.to_markdown(index=False))\r\n",
+                ""
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "73e93a6c-9acd-4697-9046-085743f11939"
+            },
+            "outputs": [],
+            "execution_count": 54
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "# Define the user query\r\n",
+                "user_query = \"How did Harry feel when he first learnt that he was a Wizard?\"\r\n",
+                "\r\n",
+                "# Call the function to get the answer and sources\r\n",
+                "get_answer_and_sources(user_query)"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "1792a440-3b48-4f94-a358-690c0e052579"
+            },
+            "outputs": [
+                {
+                    "output_type": "stream",
+                    "name": "stdout",
+                    "text": "Answer: When Harry first learned that he was a wizard, he felt quite sure there had been a horrible mistake. He struggled to believe it because he had spent his life being bullied and mistreated by the Dursleys. If he was really a wizard, he wondered why he hadn't been able to use magic to defend himself. This disbelief and surprise were evident when he gasped, “I’m a what?”\n\nSources:\n| Doc ID                                      | Content                                               |\n|:--------------------------------------------|:------------------------------------------------------|\n| 01 Harry Potter and the Sorcerers Stone.txt | Harry was wondering what a wizard did once he’d fi... |\n| 01 Harry Potter and the Sorcerers Stone.txt | Harry realized his mouth was open and closed it qu... |\n| 01 Harry Potter and the Sorcerers Stone.txt | “Most of us reckon he’s still out there somewhere ... |\n| 01 Harry Potter and the Sorcerers Stone.txt | “Ah, go boil yer heads, both of yeh,” said Hagrid.... |\n"
+                }
+            ],
+            "execution_count": 56
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "# Define the user query\r\n",
+                "user_query = \"Did Harry have a pet? What was it\"\r\n",
+                "\r\n",
+                "# Call the function to get the answer and sources\r\n",
+                "get_answer_and_sources(user_query)"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "81bb6f25-2a5f-494b-a389-1ab4ba6308ae"
+            },
+            "outputs": [
+                {
+                    "output_type": "stream",
+                    "name": "stdout",
+                    "text": "Answer: Yes, Harry had a pet owl named Hedwig. He decided to call her Hedwig after finding the name in a book titled *A History of Magic*.\n\nSources:\n| Doc ID                                      | Content                                               |\n|:--------------------------------------------|:------------------------------------------------------|\n| 01 Harry Potter and the Sorcerers Stone.txt | Harry sank down next to the bowl of peas. “What di... |\n| 01 Harry Potter and the Sorcerers Stone.txt | Harry kept to his room, with his new owl for compa... |\n| 01 Harry Potter and the Sorcerers Stone.txt | As the snake slid swiftly past him, Harry could ha... |\n| 01 Harry Potter and the Sorcerers Stone.txt | Ron reached inside his jacket and pulled out a fat... |\n"
+                }
+            ],
+            "execution_count": 57
+        },
+        {
+            "cell_type": "markdown",
+            "source": [
+                "## Use Case 2 : Generate fan fiction based on user prompts:\n",
+                "\n",
+                "Potterheads are known for their creativity and passion for the series. With this they can craft their own stories based on user prompt given , explore new adventures, and even create alternate endings. Whether it's imagining a new duel between Harry and Voldemort or crafting a personalized Hogwarts bedtime story for you kiddo, the possibilities are endless.\n",
+                "\n",
+                "The fan fiction function uses the embeddings in the vector store to generate new stories :\n",
+                "\n",
+                "- **Retrieving Relevant Passages**: When a user provides a prompt for a fan fiction story, the function first retrieves relevant passages from the SQL vector store. The vector store contains embeddings of the text from the Harry Potter books, which allows it to find passages that are contextually similar to the user's prompt.\n",
+                "\n",
+                "- **Formatting the Retrieved Passages**: The retrieved passages are then formatted into a coherent context. This involves combining the text from the retrieved passages into a single string that can be used as input for the language model.\n",
+                "\n",
+                "- **Generating the Story:** The formatted context, along with the user's prompt, is fed into a language model GPT4o to generate the fan fiction story. The language model uses the context to ensure that the generated story is relevant and coherent, incorporating elements from the retrieved passages."
+            ],
+            "metadata": {
+                "azdata_cell_guid": "6e1af32d-90c8-4c85-b869-8ccf3be300f1"
+            },
+            "attachments": {}
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "# Define the function to perform the RAG chain invocation and create the DataFrame\r\n",
+                "def generate_fan_fiction(user_query: str):\r\n",
+                "    # Perform similarity search with scores\r\n",
+                "    docs_with_score: List[Tuple[Document, float]] = vector_store.similarity_search_with_score(\r\n",
+                "        user_query, \r\n",
+                "        k=10 \r\n",
+                "    )\r\n",
+                "\r\n",
+                "    # Extract the context from the top results\r\n",
+                "    context = \"\\n\".join([doc.page_content for doc, score in docs_with_score])\r\n",
+                "\r\n",
+                "    # Define the system prompt\r\n",
+                "    system_prompt = (\r\n",
+                "        \"You are an assistant for generating fan fiction bedtime stories for Harry Potter series fans\"\r\n",
+                "        \"Use the following pieces of retrieved context to create a story based on the prompt. \"\r\n",
+                "        \"Be creative and engaging.\"\r\n",
+                "        \"Limit the story to 15 sentences\"\r\n",
+                "        \"\\n\\n\"\r\n",
+                "        \"{context}\"\r\n",
+                "    )\r\n",
+                "\r\n",
+                "    # Create the prompt template\r\n",
+                "    prompt = ChatPromptTemplate.from_messages(\r\n",
+                "        [\r\n",
+                "            (\"system\", system_prompt),\r\n",
+                "            (\"human\", \"{input}\"),\r\n",
+                "        ]\r\n",
+                "    )\r\n",
+                "\r\n",
+                "    # Create the retriever and chains\r\n",
+                "    retriever = vector_store.as_retriever()\r\n",
+                "    story_generation_chain = create_stuff_documents_chain(llm, prompt)\r\n",
+                "    rag_chain = create_retrieval_chain(retriever, story_generation_chain)\r\n",
+                "\r\n",
+                "    # Define the input\r\n",
+                "    input_data = {\"input\": user_query}\r\n",
+                "\r\n",
+                "    # Invoke the RAG chain\r\n",
+                "    response = rag_chain.invoke(input_data)\r\n",
+                "\r\n",
+                "    # Print the generated story\r\n",
+                "    print(\"Generated Story:\", response[\"answer\"])\r\n",
+                "    print(\"-\" * 80)\r\n",
+                "\r\n",
+                "    # Prepare the data for the table\r\n",
+                "    data = {\r\n",
+                "        \"Doc ID\": [doc.metadata.get(\"source\", \"N/A\").split('/')[-1] for doc, score in docs_with_score],\r\n",
+                "        \"Score\": [f\"{score:.2f}\" for doc, score in docs_with_score],\r\n",
+                "        \"Content\": [doc.page_content[:50] + \"...\" if len(doc.page_content) > 100 else doc.page_content for doc, score in docs_with_score],\r\n",
+                "    }\r\n",
+                "\r\n",
+                "    # Create a DataFrame\r\n",
+                "    df = pd.DataFrame(data)\r\n",
+                "\r\n",
+                "    # Print the table\r\n",
+                "    print(\"\\nSources for Inspiration:\")\r\n",
+                "    print(df.to_markdown(index=False))"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "72f643a1-da30-4cf3-b87f-762b58bfa825"
+            },
+            "outputs": [],
+            "execution_count": null
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "# Define the user query\r\n",
+                "user_query = \"Write a short story about how Harry meets a boy called Davide Mauri on Hogwarts express on their first day. Davide also tells Harry & the others about Native Vector Support feature Azure SQL DB going Public preview in the Muggle world\"\r\n",
+                "\r\n",
+                "# Call the function to generate the fan fiction story\r\n",
+                "generate_fan_fiction(user_query)"
+            ],
+            "metadata": {
+                "language": "python",
+                "azdata_cell_guid": "37d284dc-1ea2-4e18-a286-59032b6a9576"
+            },
+            "outputs": [],
+            "execution_count": null
+        },
+        {
+            "cell_type": "markdown",
+            "source": [
+                "Thus Combining the Q&A system with the fan fiction generator offers a unique and immersive reading experience. If users come across a puzzling moment in the books, they can ask the Q&A system for clarification. If they're inspired by a particular scene, they can use the fan fiction generator to expand on it and create their own version of events. This interactive approach makes reading more engaging and enjoyable."
+            ],
+            "metadata": {
+                "azdata_cell_guid": "8e737c80-4ea9-4122-af0a-fa482edb4aae"
+            },
+            "attachments": {}
+        }
+    ]
+}
\ No newline at end of file
diff --git a/Langchain-SQL-RAG/readme.md b/Langchain-SQL-RAG/readme.md
new file mode 100644
index 0000000..0deb402
--- /dev/null
+++ b/Langchain-SQL-RAG/readme.md
@@ -0,0 +1,55 @@
+# Building AI-powered apps on Azure SQL Database using LLMs and LangChain
+
+Azure SQL Database now supports native vector search capabilities, bringing the power of vector search operations directly to your SQL databases. You can read the full announcement of the public preview [here](https:\devblogs.microsoft.com\azure-sql\exciting-announcement-public-preview-of-native-vector-support-in-azure-sql-database\)
+
+We are also thrilled to announce the release of [langchain-sqlserver](https:\pypi.org\project\langchain-sqlserver\) version 0.1.1. You can use this package to manage Langchain vectorstores in SQL Server. This new release brings enhanced capabilities by parsing both ODBC connection strings and SQLAlchemy format connection strings, making it easier than ever to integrate with Azure SQL DB
+
+In this step-by-step tutorial, we will show you how to add generative AI features to your own applications with just a few lines of code using Azure SQL DB, [LangChain](https:\pypi.org\project\langchain-sqlserver\), and LLMs.
+
+## Dataset
+
+The Harry Potter series, written by J.K. Rowling, is a globally beloved collection of seven books that follow the journey of a young wizard, Harry Potter, and his friends as they battle the dark forces led by the evil Voldemort. Its captivating plot, rich characters, and imaginative world have made it one of the most famous and cherished series in literary history. 
+
+This Sample dataset from [Kaggle](https:\www.kaggle.com\datasets\shubhammaindola\harry-potter-books) contains 7 .txt files of 7 books of Harry Potter. For this demo we will only be using the first book - Harry Potter and the Sorcerer's Stone.
+
+In this notebook, we will showcase two exciting use cases:
+1. A sample Python application that can understand and respond to human language queries about the data stored in your Azure SQL Database. This **Q&A system** leverages the power of SQL Vectore Store & LangChain to provide accurate and context-rich answers from the Harry Potter Book.
+1. Next, we will push the creative limits of the application by teaching it to generate new AI-driven **Harry Potter fan fiction** based on our existing dataset of Harry Potter books. This feature is sure to delight Potterheads, allowing them to explore new adventures and create their own magical stories.
+
+## Prerequisites
+
+- **Azure Subscription**: [Create one for free](https:\azure.microsoft.com\free\cognitive-services?azure-portal=true)
+    
+- **Azure SQL Database**: [Set up your database for free](https:\learn.microsoft.com\azure\azure-sql\database\free-offer?view=azuresql)
+    
+- **Azure OpenAI Access**: Apply for access in the desired Azure subscription at [https://aka.ms/oai/access](https:\aka.ms\oai\access)
+    
+- **Azure OpenAI Resource**: Deploy an embeddings model (e.g., `text-embedding-small` or `text-embedding-ada-002`) and a `GPT-4.0` model for chat completion. Refer to the [resource deployment guide](https:\learn.microsoft.com\azure\ai-services\openai\how-to\create-resource) 
+
+- **Azure Blob Storage** Deploy a Azure [Blob Storage Account](https:\learn.microsoft.com\azure\storage\blobs\storage-quickstart-blobs-portal) to upload your dataset
+    
+- **Python**: Version 3.7.1 or later from Python.org. (Sample has been tested with Python 3.11)
+    
+- **Python Libraries**: Install the required libraries from the requirements.txt
+    
+- **Jupyter Notebooks**: Use within [Azure Data Studio](https:\learn.microsoft.com\en-us\azure-data-studio\notebooks\notebooks-guidance) or Visual Studio Code .
+    
+
+## Getting Started
+
+1. **Model Deployment**: Deploy an embeddings model (`text-embedding-small` or `text-embedding-ada-002`) and a `GPT-4` model for chat completion. Note the 2 models deployment names for use in the `.env` file
+
+![Deployed OpenAI Models](..\Assets\modeldeployment.png)
+
+2. **Connection String**: Find your Azure SQL DB connection string in the Azure portal under your database settings.
+3. **Configuration**: Populate the `.env` file with your SQL server connection details , Azure OpenAI key and endpoint , api-version & Model deploymentname
+
+You can retrieve the Azure OpenAI _endpoint_ and _key_:
+
+![Azure OpenAI Endpoint and Key](..\Assets\endpoint.png)
+
+4. **Upload dataset** In your [Blob Storage Account](https:\learn.microsoft.com\en-us\azure\storage\blobs\storage-quickstart-blobs-portal) create a container and upload the .txt file using the steps [here](https:\learn.microsoft.com\azure\storage\blobs\storage-quickstart-blobs-portal)
+
+## Running the Notebook
+
+To [execute the notebook](https:\learn.microsoft.com\azure-data-studio\notebooks\notebooks-python-kernel), connect to your Azure SQL database using Azure Data Studio, which can be downloaded [here](https:\azure.microsoft.com\products\data-studio)
\ No newline at end of file

From c65651da8eec59bcac14d1eee529926a2e6983dd Mon Sep 17 00:00:00 2001
From: pookam90 <pookam@microsoft.com>
Date: Wed, 13 Nov 2024 16:57:03 +0530
Subject: [PATCH 2/5] adding sample for Langchain SQL Vector Store

---
 Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb b/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
index c25845c..b2cde75 100644
--- a/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
+++ b/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
@@ -176,8 +176,8 @@
                 "from langchain.text_splitter import RecursiveCharacterTextSplitter\r\n",
                 "\r\n",
                 "# Define your connection string and blob details\r\n",
-                "conn_str = \"DefaultEndpointsProtocol=https;AccountName=pkblob;AccountKey=WtQWvdDAKMggEVpAE6+qPpKT5qlsBUPSICtwsNtH4JpEhJV6hixBZD5m26BPqjQKUlv6SapX8G8P+AStJ5MSRw==;EndpointSuffix=core.windows.net\"\r\n",
-                "container_name = \"pookamcontainer\"\r\n",
+                "conn_str = \"DefaultEndpointsProtocol=https;AccountName=<YourBlobName>;AccountKey=<YourAccountKey>;EndpointSuffix=core.windows.net\"\r\n",
+                "container_name = \"YourContainerName\"\r\n",
                 "blob_name = \"01 Harry Potter and the Sorcerers Stone.txt\"\r\n",
                 "\r\n",
                 "# Create an instance of AzureBlobStorageFileLoader\r\n",

From aaba734a6bdb18ede0d583ffe45d96d7943d389b Mon Sep 17 00:00:00 2001
From: pookam90 <pookam@microsoft.com>
Date: Wed, 13 Nov 2024 17:05:00 +0530
Subject: [PATCH 3/5] Adding Sample for Langchain + SQL Vector Store

---
 Langchain-SQL-RAG/.env.sample             |  5 +++++
 Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb | 11 +++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/Langchain-SQL-RAG/.env.sample b/Langchain-SQL-RAG/.env.sample
index 8fbc2a9..7edde75 100644
--- a/Langchain-SQL-RAG/.env.sample
+++ b/Langchain-SQL-RAG/.env.sample
@@ -6,6 +6,11 @@ AZURE_DEPLOYMENT_CHATCOMPLETION_NAME="<chatcompletionmodeldeploymentname>"
 AZURE_API_VERSION="2023-05-15"
 AZURE_API_KEY="<yourkey"
 
+
+#Blob Storage Account details
+AZURE_CONN_STR="DefaultEndpointsProtocol=https;AccountName=<YourBlobName>;AccountKey=<YourAccountKey>;EndpointSuffix=core.windows.net"
+AZURE_CONTAINER_NAME="YourContainerName"
+
 # Use only one of the below. The one you are not using should be commented out.
 # For Entra ID Service Principle Authentication
 ENTRA_CONNECTION_STRING="Driver={ODBC Driver 18 for SQL Server};LongAsMax=yes;Server=tcp:<yourdbserver>.database.windows.net;Database=<yourdbname>;"
diff --git a/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb b/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
index b2cde75..82ad4aa 100644
--- a/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
+++ b/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
@@ -175,10 +175,13 @@
                 "from langchain_core.documents import Document\r\n",
                 "from langchain.text_splitter import RecursiveCharacterTextSplitter\r\n",
                 "\r\n",
-                "# Define your connection string and blob details\r\n",
-                "conn_str = \"DefaultEndpointsProtocol=https;AccountName=<YourBlobName>;AccountKey=<YourAccountKey>;EndpointSuffix=core.windows.net\"\r\n",
-                "container_name = \"YourContainerName\"\r\n",
-                "blob_name = \"01 Harry Potter and the Sorcerers Stone.txt\"\r\n",
+                "# Load environment variables from a .env file\r\n",
+                "load_dotenv()\r\n",
+                "\r\n",
+                "# Get the connection string and container name from the environment variables\r\n",
+                "conn_str = os.getenv(\"AZURE_CONN_STR\")\r\n",
+                "container_name = os.getenv(\"AZURE_CONTAINER_NAME\")\r\n",
+                "blob_name = \"01 Harry Potter and the Sorcerers Stone.txt\" #Name of the .txt file\r\n",
                 "\r\n",
                 "# Create an instance of AzureBlobStorageFileLoader\r\n",
                 "loader = AzureBlobStorageFileLoader(conn_str=conn_str, container=container_name, blob_name=blob_name)\r\n",

From e8ee55c773b4671c79a588201e2762cf556fe796 Mon Sep 17 00:00:00 2001
From: pookam90 <pookam@microsoft.com>
Date: Wed, 13 Nov 2024 18:04:08 +0530
Subject: [PATCH 4/5] "adding notebook for Langchain + SQLDB"

---
 Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb | 58 ++++++++++++-----------
 Langchain-SQL-RAG/readme.md               | 24 +++++-----
 2 files changed, 43 insertions(+), 39 deletions(-)

diff --git a/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb b/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
index 82ad4aa..10c5613 100644
--- a/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
+++ b/Langchain-SQL-RAG/Langchain-SQL-RAG.ipynb
@@ -26,17 +26,17 @@
             "source": [
                 "# Building AI-powered apps on Azure SQL Database using LLMs and LangChain\n",
                 "\n",
-                "Azure SQL Database now supports native vector search capabilities, bringing the power of vector search operations directly to your SQL databases. You can read the full announcement of the public preview [here](https:\\devblogs.microsoft.com\\azure-sql\\exciting-announcement-public-preview-of-native-vector-support-in-azure-sql-database\\)\n",
+                "Azure SQL Database now supports native vector search capabilities, bringing the power of vector search operations directly to your SQL databases. You can read the full announcement of the public preview [here](https:/devblogs.microsoft.com/azure-sql/exciting-announcement-public-preview-of-native-vector-support-in-azure-sql-database)\n",
                 "\n",
-                "We are also thrilled to announce the release of [langchain-sqlserver](https:\\pypi.org\\project\\langchain-sqlserver\\) version 0.1.1. You can use this package to manage Langchain vectorstores in SQL Server. This new release brings enhanced capabilities by parsing both ODBC connection strings and SQLAlchemy format connection strings, making it easier than ever to integrate with Azure SQL DB\n",
+                "We are also thrilled to announce the release of [langchain-sqlserver](https:/pypi.org/project/langchain-sqlserver) version 0.1.1. You can use this package to manage Langchain vectorstores in SQL Server. This new release brings enhanced capabilities by parsing both ODBC connection strings and SQLAlchemy format connection strings, making it easier than ever to integrate with Azure SQL DB\n",
                 "\n",
-                "In this step-by-step tutorial, we will show you how to add generative AI features to your own applications with just a few lines of code using Azure SQL DB, [LangChain](https:\\pypi.org\\project\\langchain-sqlserver\\), and LLMs.\n",
+                "In this step-by-step tutorial, we will show you how to add generative AI features to your own applications with just a few lines of code using Azure SQL DB, [LangChain](https:/pypi.org/project/langchain-sqlserver), and LLMs.\n",
                 "\n",
                 "## Dataset\n",
                 "\n",
                 "The Harry Potter series, written by J.K. Rowling, is a globally beloved collection of seven books that follow the journey of a young wizard, Harry Potter, and his friends as they battle the dark forces led by the evil Voldemort. Its captivating plot, rich characters, and imaginative world have made it one of the most famous and cherished series in literary history. \n",
                 "\n",
-                "This Sample dataset from [Kaggle](https:\\www.kaggle.com\\datasets\\shubhammaindola\\harry-potter-books) contains 7 .txt files of 7 books of Harry Potter. For this demo we will only be using the first book - Harry Potter and the Sorcerer's Stone.\n",
+                "This Sample dataset from [Kaggle](https:/www.kaggle.com/datasets/shubhammaindola/harry-potter-books) contains 7 .txt files of 7 books of Harry Potter. For this demo we will only be using the first book - Harry Potter and the Sorcerer's Stone.\n",
                 "\n",
                 "In this notebook, we will showcase two exciting use cases:\n",
                 "1. A sample Python application that can understand and respond to human language queries about the data stored in your Azure SQL Database. This **Q&A system** leverages the power of SQL Vectore Store & LangChain to provide accurate and context-rich answers from the Harry Potter Book.\n",
@@ -44,21 +44,21 @@
                 "\n",
                 "## Prerequisites\n",
                 "\n",
-                "- **Azure Subscription**: [Create one for free](https:\\azure.microsoft.com\\free\\cognitive-services?azure-portal=true)\n",
+                "- **Azure Subscription**: [Create one for free](https:/azure.microsoft.com/free/cognitive-services?azure-portal=true)\n",
                 "    \n",
-                "- **Azure SQL Database**: [Set up your database for free](https:\\learn.microsoft.com\\azure\\azure-sql\\database\\free-offer?view=azuresql)\n",
+                "- **Azure SQL Database**: [Set up your database for free](https:/learn.microsoft.com/azure/azure-sql/database/free-offer?view=azuresql)\n",
                 "    \n",
-                "- **Azure OpenAI Access**: Apply for access in the desired Azure subscription at [https://aka.ms/oai/access](https:\\aka.ms\\oai\\access)\n",
+                "- **Azure OpenAI Access**: Apply for access in the desired Azure subscription at [https://aka.ms/oai/access](https:/aka.ms/oai/access)\n",
                 "    \n",
-                "- **Azure OpenAI Resource**: Deploy an embeddings model (e.g., `text-embedding-small` or `text-embedding-ada-002`) and a `GPT-4.0` model for chat completion. Refer to the [resource deployment guide](https:\\learn.microsoft.com\\azure\\ai-services\\openai\\how-to\\create-resource) \n",
+                "- **Azure OpenAI Resource**: Deploy an embeddings model (e.g., `text-embedding-small` or `text-embedding-ada-002`) and a `GPT-4.0` model for chat completion. Refer to the [resource deployment guide](https:/learn.microsoft.com/azure/ai-services/openai/how-to/create-resource) \n",
                 "\n",
-                "- **Azure Blob Storage** Deploy a Azure [Blob Storage Account](https:\\learn.microsoft.com\\azure\\storage\\blobs\\storage-quickstart-blobs-portal) to upload your dataset\n",
+                "- **Azure Blob Storage** Deploy a Azure [Blob Storage Account](https:/learn.microsoft.com/azure/storage/blobs/storage-quickstart-blobs-portal) to upload your dataset\n",
                 "    \n",
                 "- **Python**: Version 3.7.1 or later from Python.org. (Sample has been tested with Python 3.11)\n",
                 "    \n",
                 "- **Python Libraries**: Install the required libraries from the requirements.txt\n",
                 "    \n",
-                "- **Jupyter Notebooks**: Use within [Azure Data Studio](https:\\learn.microsoft.com\\en-us\\azure-data-studio\\notebooks\\notebooks-guidance) or Visual Studio Code .\n",
+                "- **Jupyter Notebooks**: Use within [Azure Data Studio](https:/learn.microsoft.com/azure-data-studio/notebooks/notebooks-guidance) or Visual Studio Code .\n",
                 "    \n",
                 "\n",
                 "## Getting Started\n",
@@ -74,11 +74,11 @@
                 "\n",
                 "![Azure OpenAI Endpoint and Key](..\\Assets\\endpoint.png)\n",
                 "\n",
-                "4. **Upload dataset** In your [Blob Storage Account](https:\\learn.microsoft.com\\en-us\\azure\\storage\\blobs\\storage-quickstart-blobs-portal) create a container and upload the .txt file using the steps [here](https:\\learn.microsoft.com\\azure\\storage\\blobs\\storage-quickstart-blobs-portal)\n",
+                "4. **Upload dataset** In your [Blob Storage Account](https:/learn.microsoft.com/azure/storage/blobs/storage-quickstart-blobs-portal) create a container and upload the .txt file using the steps [here](https:/learn.microsoft.com/azure/storage/blobs/storage-quickstart-blobs-portal)\n",
                 "\n",
                 "## Running the Notebook\n",
                 "\n",
-                "To [execute the notebook](https:\\learn.microsoft.com\\azure-data-studio\\notebooks\\notebooks-python-kernel), connect to your Azure SQL database using Azure Data Studio, which can be downloaded [here](https:\\azure.microsoft.com\\products\\data-studio)"
+                "To [execute the notebook](https:/learn.microsoft.com/azure-data-studio/notebooks/notebooks-python-kernel), connect to your Azure SQL database using Azure Data Studio, which can be downloaded [here](https:/azure.microsoft.com/products/data-studio)"
             ],
             "metadata": {
                 "language": "python",
@@ -113,15 +113,15 @@
             },
             "outputs": [
                 {
-                    "output_type": "execute_result",
-                    "execution_count": 49,
                     "data": {
                         "text/plain": "True"
                     },
-                    "metadata": {}
+                    "metadata": {},
+                    "execution_count": 2,
+                    "output_type": "execute_result"
                 }
             ],
-            "execution_count": 49
+            "execution_count": 2
         },
         {
             "cell_type": "markdown",
@@ -170,7 +170,6 @@
         {
             "cell_type": "code",
             "source": [
-                "import magic\r\n",
                 "from langchain.document_loaders import AzureBlobStorageFileLoader\r\n",
                 "from langchain_core.documents import Document\r\n",
                 "from langchain.text_splitter import RecursiveCharacterTextSplitter\r\n",
@@ -181,7 +180,7 @@
                 "# Get the connection string and container name from the environment variables\r\n",
                 "conn_str = os.getenv(\"AZURE_CONN_STR\")\r\n",
                 "container_name = os.getenv(\"AZURE_CONTAINER_NAME\")\r\n",
-                "blob_name = \"01 Harry Potter and the Sorcerers Stone.txt\" #Name of the .txt file\r\n",
+                "blob_name = \"01 Harry Potter and the Sorcerers Stone.txt\" # Name of the .txt file\r\n",
                 "\r\n",
                 "# Create an instance of AzureBlobStorageFileLoader\r\n",
                 "loader = AzureBlobStorageFileLoader(conn_str=conn_str, container=container_name, blob_name=blob_name)\r\n",
@@ -194,8 +193,7 @@
                 "split_documents = text_splitter.split_documents(documents)\r\n",
                 "\r\n",
                 "# Print the number of split documents\r\n",
-                "print(f\"Number of split documents: {len(split_documents)}\")\r\n",
-                ""
+                "print(f\"Number of split documents: {len(split_documents)}\")"
             ],
             "metadata": {
                 "azdata_cell_guid": "53569555-bc01-417a-9234-962f26a6d7d8",
@@ -204,12 +202,12 @@
             },
             "outputs": [
                 {
-                    "output_type": "stream",
                     "name": "stdout",
-                    "text": "Number of split documents: 572\n"
+                    "text": "Number of split documents: 572\n",
+                    "output_type": "stream"
                 }
             ],
-            "execution_count": 50
+            "execution_count": 7
         },
         {
             "cell_type": "markdown",
@@ -375,7 +373,7 @@
                 "\n",
                 "The LangChain Vector store simplifies building sophisticated Q&A systems by enabling efficient **similarity searches** to find the top 10 relevant documents based on the user's query. The retriever is created from the **vector\\_store,** and the question-answer chain is built using the **create\\_stuff\\_documents\\_chain** function. A prompt template is crafted using the **ChatPromptTemplate** class, ensuring structured and context-rich responses. Often in Q&A applications it's important to show users the sources that were used to generate the answer. LangChain's built-in **create\\_retrieval\\_chain** will propagate retrieved source documents to the output under the \"**context**\" key:\n",
                 "\n",
-                "Read more about Langchain RAG tutorials & the terminologies mentioned above [here](https:\\python.langchain.com\\docs\\tutorials\\rag\\)"
+                "Read more about Langchain RAG tutorials & the terminologies mentioned above [here](https:/python.langchain.com/docs/tutorials/rag)"
             ],
             "metadata": {
                 "language": "python",
@@ -588,7 +586,7 @@
                 "azdata_cell_guid": "72f643a1-da30-4cf3-b87f-762b58bfa825"
             },
             "outputs": [],
-            "execution_count": null
+            "execution_count": 2
         },
         {
             "cell_type": "code",
@@ -603,8 +601,14 @@
                 "language": "python",
                 "azdata_cell_guid": "37d284dc-1ea2-4e18-a286-59032b6a9576"
             },
-            "outputs": [],
-            "execution_count": null
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "text": "Generated Story: Harry boarded the Hogwarts Express with a mix of excitement and nerves fluttering in his chest. He wandered through the train's narrow corridors, looking for an empty compartment. Most were already filled with chattering students, but finally, he found one with only a single boy inside.\n\n“Mind if I join you?” Harry asked, peeking in.\n\nThe boy looked up from a thick book with a smile. “Not at all. I’m Davide. Davide Mauri.”\n\n“I’m Harry. Harry Potter.”\n\nDavide’s eyes widened for a moment, but he quickly recovered. “Nice to meet you, Harry. I’ve read about you. Quite the story.”\n\nHarry shrugged, feeling a bit self-conscious. “Yeah, it’s...a lot. What are you reading?”\n\nDavide’s face lit up. “Oh, it’s a bit technical. You see, my parents are Muggles, and they work in technology. There’s this new feature in Azure SQL Database called Native Vector Support that’s gone public preview. It’s amazing!”\n\nHarry blinked, trying to keep up. “What’s that?”\n\n“It’s a way to store, index, and query high-dimensional data efficiently,” Davide explained, his enthusiasm infectious. “Perfect for machine learning, AI, similarity search, and recommendation systems. Imagine having a spell that helps you find exactly what you need among thousands of options, instantly!”\n\nRon, who had just entered the compartment munching on a chocolate frog, raised an eyebrow. “Sounds complicated.”\n\n“It is, but it’s fascinating!” Davide continued. “In the Muggle world, it’s a huge step forward. My parents were so excited, I couldn’t help but read up on it.”\n\nHarry grinned. “Sounds like you’re already a bit of a wizard in your own right, Davide.”\n\nAs the train chugged along, the three boys chatted about magic, Muggle technology, and the adventures that awaited them at Hogwarts. Harry felt a warmth spread through him, knowing he had already made a new friend who could bridge both his worlds.\n--------------------------------------------------------------------------------\n\nSources for Inspiration:\n| Doc ID                                      |   Score | Content                                               |\n|:--------------------------------------------|--------:|:------------------------------------------------------|\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.58 | Hagrid leaned across the table. Behind the wild be... |\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.58 | Harry was wondering what a wizard did once he’d fi... |\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.58 | Harry was rather quiet as he ate the ice cream Hag... |\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.59 | Harry kept to his room, with his new owl for compa... |\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.59 | At this moment the boat bumped gently into the har... |\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.6  | Harry was strongly reminded of Dudley.                |\n|                                             |         |                                                       |\n|                                             |         | “Have you ...                                         |\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.6  | Hardly anyone had seen Harry play because Wood had... |\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.6  | “What’s your Quidditch team?” Ron asked.              |\n|                                             |         |                                                       |\n|                                             |         | “Er — I ...                                           |\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.6  | But from that moment on, Hermione Granger became t... |\n| 01 Harry Potter and the Sorcerers Stone.txt |    0.61 | “Er — Uncle Vernon?”                                  |\n|                                             |         |                                                       |\n|                                             |         | Uncle Vernon grunted to show...                       |\n",
+                    "output_type": "stream"
+                }
+            ],
+            "execution_count": 14
         },
         {
             "cell_type": "markdown",
diff --git a/Langchain-SQL-RAG/readme.md b/Langchain-SQL-RAG/readme.md
index 0deb402..6421432 100644
--- a/Langchain-SQL-RAG/readme.md
+++ b/Langchain-SQL-RAG/readme.md
@@ -1,16 +1,16 @@
 # Building AI-powered apps on Azure SQL Database using LLMs and LangChain
 
-Azure SQL Database now supports native vector search capabilities, bringing the power of vector search operations directly to your SQL databases. You can read the full announcement of the public preview [here](https:\devblogs.microsoft.com\azure-sql\exciting-announcement-public-preview-of-native-vector-support-in-azure-sql-database\)
+Azure SQL Database now supports native vector search capabilities, bringing the power of vector search operations directly to your SQL databases. You can read the full announcement of the public preview [here](https:/devblogs.microsoft.com/azure-sql/exciting-announcement-public-preview-of-native-vector-support-in-azure-sql-database)
 
-We are also thrilled to announce the release of [langchain-sqlserver](https:\pypi.org\project\langchain-sqlserver\) version 0.1.1. You can use this package to manage Langchain vectorstores in SQL Server. This new release brings enhanced capabilities by parsing both ODBC connection strings and SQLAlchemy format connection strings, making it easier than ever to integrate with Azure SQL DB
+We are also thrilled to announce the release of [langchain-sqlserver](https:/pypi.org/project/langchain-sqlserver) version 0.1.1. You can use this package to manage Langchain vectorstores in SQL Server. This new release brings enhanced capabilities by parsing both ODBC connection strings and SQLAlchemy format connection strings, making it easier than ever to integrate with Azure SQL DB
 
-In this step-by-step tutorial, we will show you how to add generative AI features to your own applications with just a few lines of code using Azure SQL DB, [LangChain](https:\pypi.org\project\langchain-sqlserver\), and LLMs.
+In this step-by-step tutorial, we will show you how to add generative AI features to your own applications with just a few lines of code using Azure SQL DB, [LangChain](https:/pypi.org/project/langchain-sqlserver), and LLMs.
 
 ## Dataset
 
 The Harry Potter series, written by J.K. Rowling, is a globally beloved collection of seven books that follow the journey of a young wizard, Harry Potter, and his friends as they battle the dark forces led by the evil Voldemort. Its captivating plot, rich characters, and imaginative world have made it one of the most famous and cherished series in literary history. 
 
-This Sample dataset from [Kaggle](https:\www.kaggle.com\datasets\shubhammaindola\harry-potter-books) contains 7 .txt files of 7 books of Harry Potter. For this demo we will only be using the first book - Harry Potter and the Sorcerer's Stone.
+This Sample dataset from [Kaggle](https:/www.kaggle.com/datasets/shubhammaindola/harry-potter-books) contains 7 .txt files of 7 books of Harry Potter. For this demo we will only be using the first book - Harry Potter and the Sorcerer's Stone.
 
 In this notebook, we will showcase two exciting use cases:
 1. A sample Python application that can understand and respond to human language queries about the data stored in your Azure SQL Database. This **Q&A system** leverages the power of SQL Vectore Store & LangChain to provide accurate and context-rich answers from the Harry Potter Book.
@@ -18,21 +18,21 @@ In this notebook, we will showcase two exciting use cases:
 
 ## Prerequisites
 
-- **Azure Subscription**: [Create one for free](https:\azure.microsoft.com\free\cognitive-services?azure-portal=true)
+- **Azure Subscription**: [Create one for free](https:/azure.microsoft.com/free/cognitive-services?azure-portal=true)
     
-- **Azure SQL Database**: [Set up your database for free](https:\learn.microsoft.com\azure\azure-sql\database\free-offer?view=azuresql)
+- **Azure SQL Database**: [Set up your database for free](https:/learn.microsoft.com/azure/azure-sql/database/free-offer?view=azuresql)
     
-- **Azure OpenAI Access**: Apply for access in the desired Azure subscription at [https://aka.ms/oai/access](https:\aka.ms\oai\access)
+- **Azure OpenAI Access**: Apply for access in the desired Azure subscription at [https://aka.ms/oai/access](https:/aka.ms/oai/access)
     
-- **Azure OpenAI Resource**: Deploy an embeddings model (e.g., `text-embedding-small` or `text-embedding-ada-002`) and a `GPT-4.0` model for chat completion. Refer to the [resource deployment guide](https:\learn.microsoft.com\azure\ai-services\openai\how-to\create-resource) 
+- **Azure OpenAI Resource**: Deploy an embeddings model (e.g., `text-embedding-small` or `text-embedding-ada-002`) and a `GPT-4.0` model for chat completion. Refer to the [resource deployment guide](https:/learn.microsoft.com/azure/ai-services/openai/how-to/create-resource) 
 
-- **Azure Blob Storage** Deploy a Azure [Blob Storage Account](https:\learn.microsoft.com\azure\storage\blobs\storage-quickstart-blobs-portal) to upload your dataset
+- **Azure Blob Storage** Deploy a Azure [Blob Storage Account](https:/learn.microsoft.com/azure/storage/blobs/storage-quickstart-blobs-portal) to upload your dataset
     
 - **Python**: Version 3.7.1 or later from Python.org. (Sample has been tested with Python 3.11)
     
 - **Python Libraries**: Install the required libraries from the requirements.txt
     
-- **Jupyter Notebooks**: Use within [Azure Data Studio](https:\learn.microsoft.com\en-us\azure-data-studio\notebooks\notebooks-guidance) or Visual Studio Code .
+- **Jupyter Notebooks**: Use within [Azure Data Studio](https:/learn.microsoft.com/azure-data-studio/notebooks/notebooks-guidance) or Visual Studio Code .
     
 
 ## Getting Started
@@ -48,8 +48,8 @@ You can retrieve the Azure OpenAI _endpoint_ and _key_:
 
 ![Azure OpenAI Endpoint and Key](..\Assets\endpoint.png)
 
-4. **Upload dataset** In your [Blob Storage Account](https:\learn.microsoft.com\en-us\azure\storage\blobs\storage-quickstart-blobs-portal) create a container and upload the .txt file using the steps [here](https:\learn.microsoft.com\azure\storage\blobs\storage-quickstart-blobs-portal)
+4. **Upload dataset** In your [Blob Storage Account](https:/learn.microsoft.com/azure/storage/blobs/storage-quickstart-blobs-portal) create a container and upload the .txt file using the steps [here](https:/learn.microsoft.com/azure/storage/blobs/storage-quickstart-blobs-portal)
 
 ## Running the Notebook
 
-To [execute the notebook](https:\learn.microsoft.com\azure-data-studio\notebooks\notebooks-python-kernel), connect to your Azure SQL database using Azure Data Studio, which can be downloaded [here](https:\azure.microsoft.com\products\data-studio)
\ No newline at end of file
+To [execute the notebook](https:/learn.microsoft.com/azure-data-studio/notebooks/notebooks-python-kernel), connect to your Azure SQL database using Azure Data Studio, which can be downloaded [here](https:/azure.microsoft.com/products/data-studio)
\ No newline at end of file

From 55242d1925e60e5c39bd0d25fcd8211fb8c545f9 Mon Sep 17 00:00:00 2001
From: utsav verma <utsav.verma@gmail.com>
Date: Fri, 27 Dec 2024 15:41:33 +0530
Subject: [PATCH 5/5] when inserting value in vector data type, parameter needs
 to be converted to nvarchar max. Otherwise it is getting treated as nText and
 throwing conversion error.

---
 Hybrid-Search/hybrid_search.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Hybrid-Search/hybrid_search.py b/Hybrid-Search/hybrid_search.py
index d1aae94..1fb432d 100644
--- a/Hybrid-Search/hybrid_search.py
+++ b/Hybrid-Search/hybrid_search.py
@@ -35,7 +35,8 @@
         
         for id, (content, embedding) in enumerate(zip(sentences, embeddings)):
             cursor.execute(f"""
-                INSERT INTO dbo.documents (id, content, embedding) VALUES (?, ?, CAST(? AS VECTOR(384)));
+                INSERT INTO dbo.documents (id, content, embedding)
+                VALUES (?, ?, CAST(CAST(? as NVARCHAR(MAX)) AS VECTOR(384)));
             """,
             id,
             content, 
@@ -59,7 +60,7 @@
         results  = cursor.execute(f"""
             DECLARE @k INT = ?;
             DECLARE @q NVARCHAR(1000) = ?;
-            DECLARE @v VECTOR(384) = CAST(? AS VECTOR(384));
+            DECLARE @v VECTOR(384) = CAST(CAST(? as NVARCHAR(MAX)) AS VECTOR(384));
             WITH keyword_search AS (
                 SELECT TOP(@k)
                     id,