From acf87f4527be7da2d109b21f59af57ca609e37c9 Mon Sep 17 00:00:00 2001
From: Will Luna <wluna@anaconda.com>
Date: Thu, 12 Dec 2024 14:04:17 -0800
Subject: [PATCH 1/9] added initial folder structure

---
 samples/notebooks/anaconda_webinar/README.md  |  2 ++
 .../anaconda_webinar_notebook.ipynb           | 34 +++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 samples/notebooks/anaconda_webinar/README.md
 create mode 100644 samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb

diff --git a/samples/notebooks/anaconda_webinar/README.md b/samples/notebooks/anaconda_webinar/README.md
new file mode 100644
index 00000000..f15ffdf1
--- /dev/null
+++ b/samples/notebooks/anaconda_webinar/README.md
@@ -0,0 +1,2 @@
+# Title
+Placeholder title
diff --git a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
new file mode 100644
index 00000000..e1dfb1c6
--- /dev/null
+++ b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
@@ -0,0 +1,34 @@
+{
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Streamlit Notebook",
+   "name": "streamlit"
+  }
+ },
+ "nbformat_minor": 5,
+ "nbformat": 4,
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "3775908f-ca36-4846-8f38-5adca39217f2",
+   "metadata": {
+    "language": "python",
+    "name": "cell1"
+   },
+   "source": "# Import python packages\nimport streamlit as st\nimport pandas as pd\n\n# We can also use Snowpark for our analyses!\nfrom snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n",
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "id": "8d50cbf4-0c8d-4950-86cb-114990437ac9",
+   "metadata": {
+    "language": "sql",
+    "name": "cell2"
+   },
+   "source": "select\n    o_custkey as id,\n    date_trunc(year, o_orderdate) as order_year,\n    sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by all",
+   "execution_count": null,
+   "outputs": []
+  }
+ ]
+}
\ No newline at end of file

From 1f03ffb2cf66880f986dd7cad2a1e4d3b85e8641 Mon Sep 17 00:00:00 2001
From: Will Luna <wluna@anaconda.com>
Date: Thu, 12 Dec 2024 16:23:34 -0800
Subject: [PATCH 2/9] Update anaconda_webinar_notebook.ipynb

---
 .../anaconda_webinar_notebook.ipynb           | 139 +++++++++++++++++-
 1 file changed, 132 insertions(+), 7 deletions(-)

diff --git a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
index e1dfb1c6..747841c0 100644
--- a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
+++ b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
@@ -13,22 +13,147 @@
    "id": "3775908f-ca36-4846-8f38-5adca39217f2",
    "metadata": {
     "language": "python",
-    "name": "cell1"
+    "name": "cell1",
+    "collapsed": false,
+    "resultHeight": 0
    },
-   "source": "# Import python packages\nimport streamlit as st\nimport pandas as pd\n\n# We can also use Snowpark for our analyses!\nfrom snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n",
+   "source": "# Import python packages\nimport streamlit as st\nimport pandas as pd\nimport os\n# We can also use Snowpark for our analyses!\nfrom snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n",
    "execution_count": null,
    "outputs": []
   },
+  {
+   "cell_type": "markdown",
+   "id": "8ae58f97-bb31-4290-b2dd-2416f3c2ce15",
+   "metadata": {
+    "name": "cell9",
+    "collapsed": false,
+    "resultHeight": 74
+   },
+   "source": "# Growth Accounting"
+  },
   {
    "cell_type": "code",
-   "id": "8d50cbf4-0c8d-4950-86cb-114990437ac9",
+   "id": "435baefb-25ff-42a1-b4f8-236a98b4afac",
    "metadata": {
     "language": "sql",
-    "name": "cell2"
+    "name": "cell3",
+    "collapsed": false,
+    "resultHeight": 510
    },
-   "source": "select\n    o_custkey as id,\n    date_trunc(year, o_orderdate) as order_year,\n    sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by all",
-   "execution_count": null,
-   "outputs": []
+   "outputs": [],
+   "source": "select\n    o_custkey as id,\n    date_trunc(year, o_orderdate) as order_year,\n    sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by all\norder by id, order_year",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "20f1dd62-d796-4190-b34a-89a16fea1819",
+   "metadata": {
+    "language": "python",
+    "name": "cell10",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "df = cell3.to_pandas()\n\n#pivot data to add row for each id:year with no revenue\nresult = df.pivot_table(\n    index='ID',\n    columns='ORDER_YEAR', \n    values='TOTAL',\n    fill_value=0\n).reset_index().melt(\n    id_vars='ID',\n    var_name='ORDER_YEAR',\n    value_name='TOTAL'\n)\n\n# save the dataframe as table for SQL querying \ndf = session.create_dataframe(result)\ndf.write.mode(\"overwrite\").save_as_table(\"df\", table_type=\"temporary\")",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "52ae5a36-e143-4ebb-b884-e17750b0c77f",
+   "metadata": {
+    "language": "sql",
+    "name": "cell7",
+    "collapsed": false,
+    "resultHeight": 426
+   },
+   "outputs": [],
+   "source": "select * from df\norder by id, order_year\nlimit 10",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "11971c03-53a7-4429-870a-4b51bbef7aca",
+   "metadata": {
+    "language": "sql",
+    "name": "cell6",
+    "collapsed": false,
+    "resultHeight": 510
+   },
+   "outputs": [],
+   "source": "with windowed as (\n    \n    select\n        *,\n        sum(total) over(partition by id order by order_year asc) as lifetime_spend,\n        coalesce(lag(total) over(partition by id order by order_year asc), 0) as previous_year_total,\n    from df\n\n)\n\nselect *,\n  case\n    when total = previous_year_total and total > 0 then 'retained'\n    when total > 0 and previous_year_total = 0 and lifetime_spend = total then 'new'\n    when total = 0 and previous_year_total > 0 then 'churned'\n    when total > previous_year_total and previous_year_total > 0 then 'expanded'\n    when total < previous_year_total and previous_year_total > 0 then 'contracted'\n    when total > 0 and previous_year_total = 0 and lifetime_spend > total then 'resurrected'\n  else 'irrelevant' end as category,\n  case category\n    when 'retained' then 0\n    when 'new' then total\n    when 'churned' then (-1 * previous_year_total)\n    when 'expanded' then total - previous_year_total\n    when 'contracted' then (-1 * (previous_year_total - total))\n    when 'resurrected' then total\n  else 0 end as net_change\nfrom windowed\norder by id, order_year",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "13f099e5-4265-438d-ab46-b3315bfc1f1d",
+   "metadata": {
+    "language": "sql",
+    "name": "cell4",
+    "collapsed": false,
+    "resultHeight": 438
+   },
+   "outputs": [],
+   "source": "select\n    date_part(year, order_year) as order_year,\n    category,\n    round(sum(total)) as total,\n    round(sum(net_change)) as net_change\nfrom {{ cell6 }}\ngroup by all",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "735da8fc-91c0-4604-8041-1437208a1f01",
+   "metadata": {
+    "language": "python",
+    "name": "cell2",
+    "collapsed": false,
+    "resultHeight": 772
+   },
+   "outputs": [],
+   "source": "# Option to define dictionary to color code each category, may need to use matplotlib\n# Option to use altair for better control of ticks on Y axis\nst.bar_chart(cell4, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "06f083eb-ae70-42ad-af0d-261138126bed",
+   "metadata": {
+    "language": "python",
+    "name": "cell5",
+    "collapsed": false,
+    "resultHeight": 96
+   },
+   "outputs": [],
+   "source": "df = cell6.to_pandas()\nbutton_csv = df.to_csv().encode(\"utf-8\")\nst.download_button(label=\"Download\", data=button_csv, file_name=\"growth_accounting.csv\", mime=\"text/csv\")",
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "db63ea18-13d4-43a4-a29c-a734db89e796",
+   "metadata": {
+    "name": "cell8",
+    "collapsed": false,
+    "resultHeight": 74
+   },
+   "source": "# Forecasting"
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1d9d5e85-1ad1-422d-9859-20025e4b8561",
+   "metadata": {
+    "name": "cell11",
+    "collapsed": false,
+    "resultHeight": 74
+   },
+   "source": "# API Enrichment"
+  },
+  {
+   "cell_type": "code",
+   "id": "9bd53742-511c-4cf9-9e28-02bdbcaca463",
+   "metadata": {
+    "language": "python",
+    "name": "cell13",
+    "collapsed": false,
+    "resultHeight": 6166
+   },
+   "outputs": [],
+   "source": "import requests\n\ndef get_wiki_extract(title):\n    # Base URL for Wikipedia's API\n    url = \"https://en.wikipedia.org/w/api.php\"\n    \n    # Parameters for the API request\n    params = {\n        \"action\": \"query\",\n        \"format\": \"json\",\n        \"titles\": title,\n        \"prop\": \"extracts\",\n        \"exintro\": True,  # Only get the intro section\n        \"explaintext\": True,  # Get plain text instead of HTML\n    }\n    \n    # Make the request\n    response = requests.get(url, params=params)\n    \n    # Check if request was successful\n    if response.status_code == 200:\n        data = response.json()\n        # Navigate through the JSON response to get the extract\n        pages = data[\"query\"][\"pages\"]\n        # Get the first (and only) page's extract\n        page = list(pages.values())[0]\n        return page.get(\"extract\", \"No extract available\")\n    else:\n        return f\"Error: {response.status_code}\"\n\ncat_breeds = [\n    'Abyssinian_cat',\n    'Aegean_cat',\n    'American_Bobtail',\n    'American_Curl',\n    'American_Ringtail',\n    'American_Shorthair',\n    'American_Wirehair',\n    'Arabian_Mau',\n    'Asian_cat',\n    'Asian_Semi-longhair',\n    'Australian_Mist',\n    'Balinese_cat',\n    'Bambino_cat',\n    'Bengal_cat',\n    'Birman',\n    'Bombay_cat',\n    'Brazilian_Shorthair',\n    'British_Longhair',\n    'British_Shorthair',\n    'Burmese_cat',\n    'Burmilla',\n    'California_Spangled',\n    'Chantilly-Tiffany',\n    'Chartreux',\n    'Chausie',\n    'Colorpoint_Shorthair',\n    'Cornish_Rex',\n    'Cymric_cat',\n    'Cyprus_cat',\n    'Devon_Rex',\n    'Donskoy_cat',\n    'Dragon_Li',\n    'Egyptian_Mau',\n    'European_Shorthair',\n    'Exotic_Shorthair',\n    'Foldex_cat',\n    'German_Rex',\n    'Havana_Brown',\n    'Highlander_cat',\n    'Himalayan_cat',\n    'Japanese_Bobtail',\n    'Javanese_cat',\n    'Kanaani_cat',\n    'Khao_Manee',\n    'Kinkalow',\n    'Korat',\n    'Korean_Bobtail',\n    'Kurilian_Bobtail',\n    'Lambkin_cat',\n    'LaPerm',\n    'Lykoi',\n    'Maine_Coon',\n    'Manx_cat',\n    'Mekong_Bobtail',\n    'Minskin',\n    'Minuet_cat',\n    'Munchkin_cat',\n    'Nebelung',\n    'Neva_Masquerade',\n    'Norwegian_Forest_cat',\n    'Ocicat',\n    'Ojos_Azules',\n    'Oriental_bicolour',\n    'Oriental_Longhair',\n    'Oriental_Shorthair',\n    'Persian_cat',\n    'Traditional_Persian',\n    'Peterbald',\n    'Pixie-bob',\n    'Ragamuffin_cat',\n    'Ragdoll',\n    'Raas_cat',\n    'Russian_Blue',\n    'Savannah_cat',\n    'Scottish_Fold',\n    'Selkirk_Rex',\n    'Serengeti_cat',\n    'Siamese_cat',\n    'Siberian_cat',\n    'Singapura_cat',\n    'Snowshoe_cat',\n    'Sokoke',\n    'Somali_cat',\n    'Sphynx_cat',\n    'Suphalak',\n    'Thai_cat',\n    'Tonkinese_cat',\n    'Toybob',\n    'Toyger',\n    'Turkish_Angora',\n    'Turkish_Van',\n    'Van_cat',\n    'Ukrainian_Levkoy',\n    'York_Chocolate'\n]\ncsv_list = []\n\nfor cat in cat_breeds:\n    print(cat)\n    extract = get_wiki_extract(cat)\n    print(extract)\n    csv_list.append((cat, extract))\n\n# Convert to dataframe and save\ndf = pd.DataFrame(csv_list, columns=['breed', 'description'])\ndf.to_csv('cat_breeds.csv', index=False, encoding='utf-8')",
+   "execution_count": null
   }
  ]
 }
\ No newline at end of file

From 3bde76158ec24becfa2d9c134756b88fa74ff209 Mon Sep 17 00:00:00 2001
From: Will Luna <wluna@anaconda.com>
Date: Wed, 18 Dec 2024 11:32:40 -0800
Subject: [PATCH 3/9] Update anaconda_webinar_notebook.ipynb

---
 .../anaconda_webinar_notebook.ipynb           | 187 +++++++++++++++++-
 1 file changed, 177 insertions(+), 10 deletions(-)

diff --git a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
index 747841c0..5a39a4bc 100644
--- a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
+++ b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
@@ -17,7 +17,7 @@
     "collapsed": false,
     "resultHeight": 0
    },
-   "source": "# Import python packages\nimport streamlit as st\nimport pandas as pd\nimport os\n# We can also use Snowpark for our analyses!\nfrom snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n",
+   "source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()",
    "execution_count": null,
    "outputs": []
   },
@@ -29,7 +29,7 @@
     "collapsed": false,
     "resultHeight": 74
    },
-   "source": "# Growth Accounting"
+   "source": "# Growth Accounting\n"
   },
   {
    "cell_type": "code",
@@ -44,6 +44,19 @@
    "source": "select\n    o_custkey as id,\n    date_trunc(year, o_orderdate) as order_year,\n    sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by all\norder by id, order_year",
    "execution_count": null
   },
+  {
+   "cell_type": "code",
+   "id": "61f451db-8ff2-4d83-b9be-6c1a77365446",
+   "metadata": {
+    "language": "python",
+    "name": "cell12",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "import pandas as pd",
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "id": "20f1dd62-d796-4190-b34a-89a16fea1819",
@@ -106,7 +119,7 @@
     "resultHeight": 772
    },
    "outputs": [],
-   "source": "# Option to define dictionary to color code each category, may need to use matplotlib\n# Option to use altair for better control of ticks on Y axis\nst.bar_chart(cell4, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)",
+   "source": "import streamlit as st\n# Option to define dictionary to color code each category, may need to use matplotlib\n# Option to use altair for better control of ticks on Y axis\nst.bar_chart(cell4, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)",
    "execution_count": null
   },
   {
@@ -130,17 +143,118 @@
     "collapsed": false,
     "resultHeight": 74
    },
-   "source": "# Forecasting"
+   "source": "# Forecasting\n"
+  },
+  {
+   "cell_type": "code",
+   "id": "2a9b9481-4d24-4f6c-9b53-4f50add6458e",
+   "metadata": {
+    "language": "sql",
+    "name": "cell14",
+    "collapsed": false,
+    "resultHeight": 438
+   },
+   "outputs": [],
+   "source": "select\n    date_trunc(day, o_orderdate) as order_date,\n    sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by 1\norder by order_date asc",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "9d5d7b4a-43cc-4c62-844e-a1954c312cbf",
+   "metadata": {
+    "language": "python",
+    "name": "cell15",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "from prophet import Prophet\nfrom prophet.plot import plot_plotly, plot_components_plotly",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "87ca009b-4da8-46c2-a86c-9cad46fac89f",
+   "metadata": {
+    "language": "python",
+    "name": "cell17",
+    "collapsed": false,
+    "resultHeight": 150
+   },
+   "outputs": [],
+   "source": "df = cell14.to_pandas()\ndf = df.rename(columns={'ORDER_DATE': 'ds', 'TOTAL': 'y'})\nprint(df.head())",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "4efeff4d-da4b-4c1d-b3d5-a892bb2a2bc5",
+   "metadata": {
+    "language": "python",
+    "name": "cell19",
+    "collapsed": false,
+    "resultHeight": 372
+   },
+   "outputs": [],
+   "source": "st.line_chart(df, x='ds', y='y')",
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
-   "id": "1d9d5e85-1ad1-422d-9859-20025e4b8561",
+   "id": "cbffd526-a4b0-405b-9718-6c5c2f8f6144",
    "metadata": {
-    "name": "cell11",
+    "name": "cell21",
+    "collapsed": false,
+    "resultHeight": 120
+   },
+   "source": "Waiting on role permission to write UDFs for Prophet library to run properly. Until then, code cell below will return \n```<class 'Exception'> Failed with error [Errno 1] Operation not permitted: '/usr/lib/python_udf/d212b0f949a4a60cf75395f561f7016ea978bad39b2e60eee12ece87d118e861/lib/python3.9/site-packages/prophet/stan_model/prophet_model.bin'```"
+  },
+  {
+   "cell_type": "code",
+   "id": "9d2c4877-5815-4f49-a53d-816b38de4eb6",
+   "metadata": {
+    "language": "python",
+    "name": "cell26",
+    "collapsed": false,
+    "resultHeight": 95
+   },
+   "outputs": [],
+   "source": "m = Prophet()\ntry:\n    m.fit(df)\nexcept Exception as err:\n    print(Exception, err)",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "ce582f14-9490-4a54-8fe0-bbfc8b56f61f",
+   "metadata": {
+    "language": "python",
+    "name": "cell23",
+    "collapsed": false,
+    "resultHeight": 1126
+   },
+   "outputs": [],
+   "source": "future = m.make_future_dataframe(periods=365)\nforecast = m.predict(future)\nfig1 = m.plot(forecast)\n#fig2 = m.plot_components(forecast)",
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5dc1abf7-b9ea-4fe4-88ae-109342f6dc05",
+   "metadata": {
+    "name": "cell25",
     "collapsed": false,
     "resultHeight": 74
    },
-   "source": "# API Enrichment"
+   "source": "# Customer Segmentation"
+  },
+  {
+   "cell_type": "code",
+   "id": "939a7d50-2679-46ee-a43b-b7d03b627d61",
+   "metadata": {
+    "language": "sql",
+    "name": "cell16",
+    "collapsed": false,
+    "resultHeight": 426
+   },
+   "outputs": [],
+   "source": "select *\nfrom ADHOC_ANALYSIS.USER_UPLOADS.SP500_COMPANY_LIST\nlimit 10",
+   "execution_count": null
   },
   {
    "cell_type": "code",
@@ -149,11 +263,64 @@
     "language": "python",
     "name": "cell13",
     "collapsed": false,
-    "resultHeight": 6166
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "import requests\n\ndef get_wiki_extract(title):\n    # Base URL for Wikipedia's API\n    url = \"https://en.wikipedia.org/w/api.php\"\n    \n    # Parameters for the API request\n    params = {\n        \"action\": \"query\",\n        \"format\": \"json\",\n        \"titles\": title,\n        \"prop\": \"extracts\",\n        \"exintro\": True,  # Only get the intro section\n        \"explaintext\": True,  # Get plain text instead of HTML\n    }\n    \n    # Make the request\n    response = requests.get(url, params=params)\n    \n    # Check if request was successful\n    if response.status_code == 200:\n        data = response.json()\n        # Navigate through the JSON response to get the extract\n        pages = data[\"query\"][\"pages\"]\n        # Get the first (and only) page's extract\n        page = list(pages.values())[0]\n        return page.get(\"extract\", \"No extract available\")\n    else:\n        return f\"Error: {response.status_code}\"",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "0557102d-3584-469a-9fdc-be53fd0a249b",
+   "metadata": {
+    "language": "python",
+    "name": "cell22",
+    "collapsed": false,
+    "resultHeight": 60
+   },
+   "outputs": [],
+   "source": "df = cell16.to_pandas()\ncompany_names = df['NAME'].tolist()\ncsv_list = []\n\nprint(\"extracting descriptions\")\n\nfor name in company_names:\n    try:\n        extract = get_wiki_extract(name.replace(\" \", \"_\"))\n        #print(f'extracted description of {name} from Wikipedia')\n    except Exception as e:\n        #print(f\"Error getting Wikipedia extract for {name}: {str(e)}\")\n        extract = \"None available\"\n        \n    csv_list.append((name, extract))\n\nprint(\"finished extracting descriptions\")",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "e979ca68-494a-46d4-a92d-d106d52980fb",
+   "metadata": {
+    "language": "python",
+    "name": "cell18",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "# save the dataframe as table for SQL querying \ndf = pd.DataFrame(csv_list, columns=['name', 'description'])\ndf = session.create_dataframe(df)\ndf.write.mode(\"overwrite\").save_as_table(\"prospects\", table_type=\"temporary\")",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "3f5d40d9-ca69-4137-affa-905caef97c29",
+   "metadata": {
+    "language": "sql",
+    "name": "cell20",
+    "resultHeight": 426,
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": "select \"name\", \"description\" from prospects limit 10",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "51396730-f96a-476b-bb12-d7cac8c02576",
+   "metadata": {
+    "language": "sql",
+    "name": "cell24",
+    "codeCollapsed": false,
+    "collapsed": false,
+    "resultHeight": 135
    },
    "outputs": [],
-   "source": "import requests\n\ndef get_wiki_extract(title):\n    # Base URL for Wikipedia's API\n    url = \"https://en.wikipedia.org/w/api.php\"\n    \n    # Parameters for the API request\n    params = {\n        \"action\": \"query\",\n        \"format\": \"json\",\n        \"titles\": title,\n        \"prop\": \"extracts\",\n        \"exintro\": True,  # Only get the intro section\n        \"explaintext\": True,  # Get plain text instead of HTML\n    }\n    \n    # Make the request\n    response = requests.get(url, params=params)\n    \n    # Check if request was successful\n    if response.status_code == 200:\n        data = response.json()\n        # Navigate through the JSON response to get the extract\n        pages = data[\"query\"][\"pages\"]\n        # Get the first (and only) page's extract\n        page = list(pages.values())[0]\n        return page.get(\"extract\", \"No extract available\")\n    else:\n        return f\"Error: {response.status_code}\"\n\ncat_breeds = [\n    'Abyssinian_cat',\n    'Aegean_cat',\n    'American_Bobtail',\n    'American_Curl',\n    'American_Ringtail',\n    'American_Shorthair',\n    'American_Wirehair',\n    'Arabian_Mau',\n    'Asian_cat',\n    'Asian_Semi-longhair',\n    'Australian_Mist',\n    'Balinese_cat',\n    'Bambino_cat',\n    'Bengal_cat',\n    'Birman',\n    'Bombay_cat',\n    'Brazilian_Shorthair',\n    'British_Longhair',\n    'British_Shorthair',\n    'Burmese_cat',\n    'Burmilla',\n    'California_Spangled',\n    'Chantilly-Tiffany',\n    'Chartreux',\n    'Chausie',\n    'Colorpoint_Shorthair',\n    'Cornish_Rex',\n    'Cymric_cat',\n    'Cyprus_cat',\n    'Devon_Rex',\n    'Donskoy_cat',\n    'Dragon_Li',\n    'Egyptian_Mau',\n    'European_Shorthair',\n    'Exotic_Shorthair',\n    'Foldex_cat',\n    'German_Rex',\n    'Havana_Brown',\n    'Highlander_cat',\n    'Himalayan_cat',\n    'Japanese_Bobtail',\n    'Javanese_cat',\n    'Kanaani_cat',\n    'Khao_Manee',\n    'Kinkalow',\n    'Korat',\n    'Korean_Bobtail',\n    'Kurilian_Bobtail',\n    'Lambkin_cat',\n    'LaPerm',\n    'Lykoi',\n    'Maine_Coon',\n    'Manx_cat',\n    'Mekong_Bobtail',\n    'Minskin',\n    'Minuet_cat',\n    'Munchkin_cat',\n    'Nebelung',\n    'Neva_Masquerade',\n    'Norwegian_Forest_cat',\n    'Ocicat',\n    'Ojos_Azules',\n    'Oriental_bicolour',\n    'Oriental_Longhair',\n    'Oriental_Shorthair',\n    'Persian_cat',\n    'Traditional_Persian',\n    'Peterbald',\n    'Pixie-bob',\n    'Ragamuffin_cat',\n    'Ragdoll',\n    'Raas_cat',\n    'Russian_Blue',\n    'Savannah_cat',\n    'Scottish_Fold',\n    'Selkirk_Rex',\n    'Serengeti_cat',\n    'Siamese_cat',\n    'Siberian_cat',\n    'Singapura_cat',\n    'Snowshoe_cat',\n    'Sokoke',\n    'Somali_cat',\n    'Sphynx_cat',\n    'Suphalak',\n    'Thai_cat',\n    'Tonkinese_cat',\n    'Toybob',\n    'Toyger',\n    'Turkish_Angora',\n    'Turkish_Van',\n    'Van_cat',\n    'Ukrainian_Levkoy',\n    'York_Chocolate'\n]\ncsv_list = []\n\nfor cat in cat_breeds:\n    print(cat)\n    extract = get_wiki_extract(cat)\n    print(extract)\n    csv_list.append((cat, extract))\n\n# Convert to dataframe and save\ndf = pd.DataFrame(csv_list, columns=['breed', 'description'])\ndf.to_csv('cat_breeds.csv', index=False, encoding='utf-8')",
+   "source": "select \n    \"name\",\n    \"description\",\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['excellent', 'average', 'poor'],\n        {\n            'task_description': 'Return the likelihood that this company would be interested in purchasing manufacturing equipment based on this description.'\n        }\n    ):label::STRING as hair_type\nfrom prospects\n-- other class. ideas: industry, main product, region",
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}

From 5f5e01559d1cdc754fd8b8f7806911e3becc5bee Mon Sep 17 00:00:00 2001
From: Will Luna <wluna@anaconda.com>
Date: Wed, 18 Dec 2024 16:35:03 -0800
Subject: [PATCH 4/9] Update anaconda_webinar_notebook.ipynb

added error/warning suppression and working cortex.classify query
---
 .../anaconda_webinar/anaconda_webinar_notebook.ipynb | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
index 5a39a4bc..0b3e7c08 100644
--- a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
+++ b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
@@ -17,7 +17,7 @@
     "collapsed": false,
     "resultHeight": 0
    },
-   "source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()",
+   "source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n\nimport logging\nlogging.getLogger(\"cmdstanpy\").setLevel(logging.WARNING)\nimport warnings\nwarnings.filterwarnings('ignore', category=FutureWarning)",
    "execution_count": null,
    "outputs": []
   },
@@ -90,7 +90,7 @@
     "language": "sql",
     "name": "cell6",
     "collapsed": false,
-    "resultHeight": 510
+    "resultHeight": 159
    },
    "outputs": [],
    "source": "with windowed as (\n    \n    select\n        *,\n        sum(total) over(partition by id order by order_year asc) as lifetime_spend,\n        coalesce(lag(total) over(partition by id order by order_year asc), 0) as previous_year_total,\n    from df\n\n)\n\nselect *,\n  case\n    when total = previous_year_total and total > 0 then 'retained'\n    when total > 0 and previous_year_total = 0 and lifetime_spend = total then 'new'\n    when total = 0 and previous_year_total > 0 then 'churned'\n    when total > previous_year_total and previous_year_total > 0 then 'expanded'\n    when total < previous_year_total and previous_year_total > 0 then 'contracted'\n    when total > 0 and previous_year_total = 0 and lifetime_spend > total then 'resurrected'\n  else 'irrelevant' end as category,\n  case category\n    when 'retained' then 0\n    when 'new' then total\n    when 'churned' then (-1 * previous_year_total)\n    when 'expanded' then total - previous_year_total\n    when 'contracted' then (-1 * (previous_year_total - total))\n    when 'resurrected' then total\n  else 0 end as net_change\nfrom windowed\norder by id, order_year",
@@ -214,7 +214,7 @@
     "language": "python",
     "name": "cell26",
     "collapsed": false,
-    "resultHeight": 95
+    "resultHeight": 0
    },
    "outputs": [],
    "source": "m = Prophet()\ntry:\n    m.fit(df)\nexcept Exception as err:\n    print(Exception, err)",
@@ -227,7 +227,7 @@
     "language": "python",
     "name": "cell23",
     "collapsed": false,
-    "resultHeight": 1126
+    "resultHeight": 885
    },
    "outputs": [],
    "source": "future = m.make_future_dataframe(periods=365)\nforecast = m.predict(future)\nfig1 = m.plot(forecast)\n#fig2 = m.plot_components(forecast)",
@@ -316,10 +316,10 @@
     "name": "cell24",
     "codeCollapsed": false,
     "collapsed": false,
-    "resultHeight": 135
+    "resultHeight": 391
    },
    "outputs": [],
-   "source": "select \n    \"name\",\n    \"description\",\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['excellent', 'average', 'poor'],\n        {\n            'task_description': 'Return the likelihood that this company would be interested in purchasing manufacturing equipment based on this description.'\n        }\n    ):label::STRING as hair_type\nfrom prospects\n-- other class. ideas: industry, main product, region",
+   "source": "select \n    \"name\",\n    \"description\",\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['extremely likely', 'somewhat likely', 'unlikely'],\n        {\n            'task_description': 'Return the likelihood that this company would be interested in attending a webinar showcasing the GTM utility of Snowflake Notebooks and Anaconda Python Packages.'\n        }\n    ):label::STRING as persona_likelihood,\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['healthcare', 'finance', 'retail', 'technology', 'communication', 'other'],\n        {\n            'task_description': 'Return the most likely industry of the company based on this description.'\n        }\n    ):label::STRING as industry,\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['California', 'South', 'Northeast', 'Midatlantic', 'Midwest', 'Pacific Northwest', 'Outsite the US'],\n        {\n            'task_description': 'Return the most likely region the company is headquartered in based on this description.'\n        }\n    ):label::STRING as region\nfrom prospects\nwhere \"description\" is not null and \"description\" != ''\nlimit 10\n-- other class. ideas: industry, main product, region",
    "execution_count": null
   }
  ]

From 892d00714dd82319a4d3ecfb61291dc9349d53c2 Mon Sep 17 00:00:00 2001
From: Will Luna <wluna@anaconda.com>
Date: Thu, 2 Jan 2025 13:54:37 -0600
Subject: [PATCH 5/9] Create synthetic_data_generation.py

---
 .../synthetic_data_generation.py              | 208 ++++++++++++++++++
 1 file changed, 208 insertions(+)
 create mode 100644 samples/notebooks/anaconda_webinar/synthetic_data_generation.py

diff --git a/samples/notebooks/anaconda_webinar/synthetic_data_generation.py b/samples/notebooks/anaconda_webinar/synthetic_data_generation.py
new file mode 100644
index 00000000..9f638bff
--- /dev/null
+++ b/samples/notebooks/anaconda_webinar/synthetic_data_generation.py
@@ -0,0 +1,208 @@
+import numpy as np
+from datetime import datetime, timedelta
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
+
+class OrderGenerator:
+    def __init__(
+        self,
+        # Basic parameters
+        start_date='1992-01-01',
+        end_date='1998-12-31',
+        target_daily_total=100_000_000,
+        target_daily_orders=500,
+        
+        # Trend parameters
+        annual_growth_rate=0.15,        # 15% annual growth
+        order_value_growth_rate=0.05,   # 5% annual growth in order values
+        
+        # Seasonal parameters
+        holiday_peak_day=350,           # Peak shopping day (Dec 16)
+        holiday_effect_magnitude=1.0,   # Strength of holiday effect
+        seasonal_baseline=0.8,          # Minimum seasonal multiplier
+        seasonal_spread=1000,           # Controls how spread out the holiday effect is
+        
+        # Weekly parameters
+        weekend_dip=0.85,              # Weekend order multiplier
+        weekday_boost=1.1,             # Weekday order multiplier
+        
+        # Value distribution parameters
+        pareto_shape=2.0,              # Shape parameter for order values
+        min_value_factor=0.3,          # Minimum order value as fraction of average
+        value_noise_stddev=0.15,       # Standard deviation for order value noise
+        
+        # Random seed for reproducibility
+        random_seed=None
+    ):
+        self.start_date = pd.to_datetime(start_date)
+        self.end_date = pd.to_datetime(end_date)
+        self.target_daily_total = target_daily_total
+        self.target_daily_orders = target_daily_orders
+        
+        # Store all other parameters
+        self.annual_growth_rate = annual_growth_rate
+        self.order_value_growth_rate = order_value_growth_rate
+        self.holiday_peak_day = holiday_peak_day
+        self.holiday_effect_magnitude = holiday_effect_magnitude
+        self.seasonal_baseline = seasonal_baseline
+        self.seasonal_spread = seasonal_spread
+        self.weekend_dip = weekend_dip
+        self.weekday_boost = weekday_boost
+        self.pareto_shape = pareto_shape
+        self.min_value_factor = min_value_factor
+        self.value_noise_stddev = value_noise_stddev
+        
+        # Derived parameters
+        self.avg_order_value = target_daily_total / target_daily_orders
+        self.min_order_value = self.avg_order_value * self.min_value_factor
+        
+        if random_seed is not None:
+            np.random.seed(random_seed)
+    
+    def seasonal_effect(self, day_of_year):
+        """Stronger effect during holiday season"""
+        holiday_effect = np.exp(
+            -((day_of_year - self.holiday_peak_day) ** 2) / 
+            self.seasonal_spread
+        ) * self.holiday_effect_magnitude
+        return np.maximum(self.seasonal_baseline + holiday_effect, 0)
+    
+    def weekly_effect(self, day_of_week):
+        """Weekend dips in orders"""
+        return self.weekend_dip if day_of_week in [5, 6] else self.weekday_boost
+    
+    def trend_effect(self, years_passed):
+        """Long-term growth trend"""
+        return np.power(1 + self.annual_growth_rate, years_passed)
+    
+    def generate_order_value(self, years_passed):
+        """Generate order values following a Pareto distribution"""
+        u = np.random.random()
+        value = self.min_order_value / np.power(1 - u, 1/self.pareto_shape)
+        value = value * np.power(1 + self.order_value_growth_rate, years_passed)
+        noise = np.random.normal(1, self.value_noise_stddev)
+        return round(value * noise)
+    
+    def generate_clerk(self):
+        """Generate clerk IDs matching TPCH format"""
+        clerk_id = np.random.randint(1000)
+        return f"Clerk#{clerk_id:09d}"
+    
+    def generate_customer(self, num_customers=1500):
+        """Generate customer IDs matching TPCH format"""
+        return f"Customer#{np.random.randint(num_customers):09d}"
+    
+    def generate_orders(self):
+        """Generate supplementary orders with realistic patterns"""
+        orders = []
+        current_date = self.start_date
+        
+        while current_date <= self.end_date:
+            day_of_year = current_date.dayofyear
+            years_passed = (current_date - self.start_date).days / 365
+            
+            seasonal = self.seasonal_effect(day_of_year)
+            weekly = self.weekly_effect(current_date.weekday())
+            trend = self.trend_effect(years_passed)
+            
+            target_orders = round(
+                self.target_daily_orders * 
+                seasonal * weekly * trend
+            )
+            
+            for _ in range(target_orders):
+                order = {
+                    'o_orderdate': current_date,
+                    'o_totalprice': self.generate_order_value(years_passed),
+                    'o_orderstatus': 'O',
+                    'o_clerk': self.generate_clerk(),
+                    'o_custkey': self.generate_customer()
+                }
+                orders.append(order)
+            
+            current_date += timedelta(days=1)
+        
+        df = pd.DataFrame(orders)
+        df = df.sort_values('o_orderdate')
+        df['o_orderkey'] = range(len(df))
+        df['o_orderkey'] = df['o_orderkey'] + 1_500_000  # Offset to avoid conflicts
+        
+        return df
+
+def generate_and_save_orders(filename, **generator_params):
+    """Generate orders and save to CSV"""
+    generator = OrderGenerator(**generator_params)
+    df = generator.generate_orders()
+    df.to_csv(filename, index=False)
+    print(f"Orders saved to {filename}")
+    return df
+
+def plot_daily_patterns(filename, figsize=(15, 8), plot_style='compressed'):
+    """Load orders from CSV and create visualization"""
+    df = pd.read_csv(filename)
+    df['o_orderdate'] = pd.to_datetime(df['o_orderdate'])
+    
+    daily_summary = df.groupby('o_orderdate').agg({
+        'o_orderkey': 'count',
+        'o_totalprice': 'sum'
+    }).reset_index()
+    
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize)
+    
+    # Plot daily totals
+    ax1.plot(daily_summary['o_orderdate'], daily_summary['o_totalprice'], 
+            color='blue', linewidth=0.5)
+    ax1.set_title('Daily Order Totals')
+    ax1.set_ylabel('Daily Total ($)')
+    ax1.grid(True, alpha=0.3)
+    
+    # Set x-axis ticks to show years and months
+    ax1.xaxis.set_major_locator(YearLocator())
+    ax1.xaxis.set_minor_locator(MonthLocator())
+    ax1.xaxis.set_major_formatter(DateFormatter('%Y'))
+    ax1.yaxis.set_major_formatter(lambda x, p: f'${x/1e6:.1f}M')
+    
+    # Plot daily order counts
+    ax2.plot(daily_summary['o_orderdate'], daily_summary['o_orderkey'], 
+            color='green', linewidth=0.5)
+    ax2.set_title('Daily Order Count')
+    ax2.set_ylabel('Number of Orders')
+    ax2.grid(True, alpha=0.3)
+    
+    ax2.xaxis.set_major_locator(YearLocator())
+    ax2.xaxis.set_minor_locator(MonthLocator())
+    ax2.xaxis.set_major_formatter(DateFormatter('%Y'))
+    
+    for ax in [ax1, ax2]:
+        plt.setp(ax.get_xticklabels(), rotation=45)
+    
+    plt.tight_layout()
+    
+    # Print summary statistics
+    print("\nSummary Statistics:")
+    print(f"Date Range: {daily_summary['o_orderdate'].min().date()} to {daily_summary['o_orderdate'].max().date()}")
+    print(f"Average daily orders: {daily_summary['o_orderkey'].mean():.0f}")
+    print(f"Average daily total: ${daily_summary['o_totalprice'].mean():,.2f}")
+    
+    return fig
+
+if __name__ == "__main__":
+    # Example: Generate 2 years of data with pronounced patterns
+    params = {
+        'start_date': '1992-01-01',
+        'end_date': '1998-08-02',
+        'target_daily_total': 100_000_000,
+        'target_daily_orders': 500,
+        'holiday_effect_magnitude': 1.2,
+        'weekend_dip': 0.8,
+        'annual_growth_rate': 0.15,
+        'value_noise_stddev': 0.15
+    }
+    
+    # Generate and save orders
+    generate_and_save_orders('supplementary_orders.csv', **params)
+    
+    # Create visualization
+    fig = plot_daily_patterns('supplementary_orders.csv')
+    plt.show()

From 40b48afbe969e9caf9fab06004862094a33e7dec Mon Sep 17 00:00:00 2001
From: Will Luna <wluna@anaconda.com>
Date: Mon, 6 Jan 2025 19:05:37 -0600
Subject: [PATCH 6/9] Update anaconda_webinar_notebook.ipynb

---
 .../anaconda_webinar_notebook.ipynb           | 241 ++++++++----------
 1 file changed, 111 insertions(+), 130 deletions(-)

diff --git a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
index 0b3e7c08..f471f90f 100644
--- a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
+++ b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
@@ -13,231 +13,226 @@
    "id": "3775908f-ca36-4846-8f38-5adca39217f2",
    "metadata": {
     "language": "python",
-    "name": "cell1",
-    "collapsed": false,
-    "resultHeight": 0
+    "name": "session_creation",
+    "resultHeight": 0,
+    "collapsed": false
    },
    "source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n\nimport logging\nlogging.getLogger(\"cmdstanpy\").setLevel(logging.WARNING)\nimport warnings\nwarnings.filterwarnings('ignore', category=FutureWarning)",
    "execution_count": null,
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "id": "d776341f-464d-4a9b-8c98-ac8e05286559",
+   "metadata": {
+    "language": "sql",
+    "name": "orders_sample",
+    "resultHeight": 426,
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": "select\n    o_custkey,\n    o_orderdate,\n    o_totalprice\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\nlimit 10",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "23297335-ae53-477e-af45-1355957bc24e",
+   "metadata": {
+    "language": "python",
+    "name": "generate_synthetic_data",
+    "resultHeight": 60,
+    "collapsed": false,
+    "codeCollapsed": true
+   },
+   "outputs": [],
+   "source": "import numpy as np\nfrom datetime import datetime, timedelta\nimport pandas as pd\n\nclass OrderGenerator:\n    def __init__(\n        self,\n        # Basic parameters\n        start_date='1992-01-01',\n        end_date='1998-12-31',\n        target_daily_total=100_000_000,\n        target_daily_orders=500,\n        \n        # Trend parameters\n        annual_growth_rate=0.15,        # 15% annual growth\n        order_value_growth_rate=0.05,   # 5% annual growth in order values\n        \n        # Seasonal parameters\n        holiday_peak_day=350,           # Peak shopping day (Dec 16)\n        holiday_effect_magnitude=1.0,   # Strength of holiday effect\n        seasonal_baseline=0.8,          # Minimum seasonal multiplier\n        seasonal_spread=1000,           # Controls how spread out the holiday effect is\n        \n        # Weekly parameters\n        weekend_dip=0.85,              # Weekend order multiplier\n        weekday_boost=1.1,             # Weekday order multiplier\n        \n        # Value distribution parameters\n        pareto_shape=2.0,              # Shape parameter for order values\n        min_value_factor=0.3,          # Minimum order value as fraction of average\n        value_noise_stddev=0.15,       # Standard deviation for order value noise\n        \n        # Random seed for reproducibility\n        random_seed=None\n    ):\n        self.start_date = pd.to_datetime(start_date)\n        self.end_date = pd.to_datetime(end_date)\n        self.target_daily_total = target_daily_total\n        self.target_daily_orders = target_daily_orders\n        \n        # Store all other parameters\n        self.annual_growth_rate = annual_growth_rate\n        self.order_value_growth_rate = order_value_growth_rate\n        self.holiday_peak_day = holiday_peak_day\n        self.holiday_effect_magnitude = holiday_effect_magnitude\n        self.seasonal_baseline = seasonal_baseline\n        self.seasonal_spread = seasonal_spread\n        self.weekend_dip = weekend_dip\n        self.weekday_boost = weekday_boost\n        self.pareto_shape = pareto_shape\n        self.min_value_factor = min_value_factor\n        self.value_noise_stddev = value_noise_stddev\n        \n        # Derived parameters\n        self.avg_order_value = target_daily_total / target_daily_orders\n        self.min_order_value = self.avg_order_value * self.min_value_factor\n        \n        if random_seed is not None:\n            np.random.seed(random_seed)\n    \n    def seasonal_effect(self, day_of_year):\n        \"\"\"Stronger effect during holiday season\"\"\"\n        holiday_effect = np.exp(\n            -((day_of_year - self.holiday_peak_day) ** 2) / \n            self.seasonal_spread\n        ) * self.holiday_effect_magnitude\n        return np.maximum(self.seasonal_baseline + holiday_effect, 0)\n    \n    def weekly_effect(self, day_of_week):\n        \"\"\"Weekend dips in orders\"\"\"\n        return self.weekend_dip if day_of_week in [5, 6] else self.weekday_boost\n    \n    def trend_effect(self, years_passed):\n        \"\"\"Long-term growth trend\"\"\"\n        return np.power(1 + self.annual_growth_rate, years_passed)\n    \n    def generate_order_value(self, years_passed):\n        \"\"\"Generate order values following a Pareto distribution\"\"\"\n        u = np.random.random()\n        value = self.min_order_value / np.power(1 - u, 1/self.pareto_shape)\n        value = value * np.power(1 + self.order_value_growth_rate, years_passed)\n        noise = np.random.normal(1, self.value_noise_stddev)\n        return round(value * noise)\n    \n    def generate_clerk(self):\n        \"\"\"Generate clerk IDs matching TPCH format\"\"\"\n        clerk_id = np.random.randint(1000)\n        return f\"Clerk#{clerk_id:09d}\"\n    \n    def generate_customer(self, num_customers=149999):\n        \"\"\"Generate customer IDs matching TPCH format\"\"\"\n        return np.random.randint(num_customers)\n    \n    def generate_orders(self):\n        \"\"\"Generate supplementary orders with realistic patterns\"\"\"\n        orders = []\n        current_date = self.start_date\n        \n        while current_date <= self.end_date:\n            day_of_year = current_date.dayofyear\n            years_passed = (current_date - self.start_date).days / 365\n            \n            seasonal = self.seasonal_effect(day_of_year)\n            weekly = self.weekly_effect(current_date.weekday())\n            trend = self.trend_effect(years_passed)\n            \n            target_orders = round(\n                self.target_daily_orders * \n                seasonal * weekly * trend\n            )\n            \n            for _ in range(target_orders):\n                order = {\n                    'o_orderdate': current_date,\n                    'o_totalprice': self.generate_order_value(years_passed),\n                    'o_orderstatus': 'O',\n                    'o_clerk': self.generate_clerk(),\n                    'o_custkey': self.generate_customer()\n                }\n                orders.append(order)\n            \n            current_date += timedelta(days=1)\n        \n        df = pd.DataFrame(orders)\n        df = df.sort_values('o_orderdate')\n        df['o_orderkey'] = range(len(df))\n        df['o_orderkey'] = df['o_orderkey'] + 1_500_000  # Offset to avoid conflicts\n        \n        return df\n\ndef generate_and_save_synthetic_data():\n    \"\"\"Generate orders and save to CSV\"\"\"\n    # Example: Generate 2 years of data with pronounced patterns\n    params = {\n        'start_date': '1992-01-01',\n        'end_date': '1998-08-02',\n        'target_daily_total': 100_000_000,\n        'target_daily_orders': 500,\n        'holiday_effect_magnitude': 1.2,\n        'weekend_dip': 0.8,\n        'annual_growth_rate': 0.15,\n        'value_noise_stddev': 0.15\n    }\n    \n    generator = OrderGenerator(**params)\n    df = generator.generate_orders()\n    #save the synthetic data to a temporary table\n    filename = 'synthetic_orders'\n    df.to_csv(filename + '.csv', index=False)\n    print(f\"Orders saved to CSV {filename}.csv\")\n    csv_df = pd.read_csv(filename + '.csv')\n    csv_df['o_orderdate'] = pd.to_datetime(df['o_orderdate'])\n    table_df = session.create_dataframe(csv_df)\n    table_df.write.mode(\"overwrite\").save_as_table(filename, table_type=\"temporary\")\n    print(f\"Order saved to temporary table {filename}\")\n    return\n\n# Generate and save orders\ngenerate_and_save_synthetic_data()",
+   "execution_count": null
+  },
   {
    "cell_type": "markdown",
-   "id": "8ae58f97-bb31-4290-b2dd-2416f3c2ce15",
+   "id": "ca0f2f8f-33ae-4934-9064-f44a3e5ef5c9",
    "metadata": {
-    "name": "cell9",
+    "name": "growth_accounting_intro",
     "collapsed": false,
     "resultHeight": 74
    },
-   "source": "# Growth Accounting\n"
+   "source": "# Growth Accounting"
   },
   {
    "cell_type": "code",
-   "id": "435baefb-25ff-42a1-b4f8-236a98b4afac",
+   "id": "b10ebdb4-78f3-49f3-ab81-529b0afd662d",
    "metadata": {
     "language": "sql",
-    "name": "cell3",
+    "name": "orders",
+    "resultHeight": 510,
     "collapsed": false,
-    "resultHeight": 510
+    "codeCollapsed": false
    },
    "outputs": [],
-   "source": "select\n    o_custkey as id,\n    date_trunc(year, o_orderdate) as order_year,\n    sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by all\norder by id, order_year",
+   "source": "with synthetic as (\n\n    select\n        \"o_custkey\" as id,\n        to_date(\"o_orderdate\") as o_orderdate,\n        CAST(\"o_totalprice\" AS NUMERIC) as o_totalprice\n    from synthetic_orders\n    --SAMPLE (1000000 rows)\n\n),\n\noriginal as (\n    \n    select\n        o_custkey as id,\n        o_orderdate,\n        o_totalprice\n    from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\n    --SAMPLE (1000000 rows)\n\n)\n\nselect * from synthetic\nunion all \nselect * from original",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "61f451db-8ff2-4d83-b9be-6c1a77365446",
+   "id": "b933a301-0086-4682-9a6b-c0d430f62f87",
    "metadata": {
-    "language": "python",
-    "name": "cell12",
-    "collapsed": false,
-    "resultHeight": 0
+    "language": "sql",
+    "name": "annual_customer_orders",
+    "resultHeight": 510
    },
    "outputs": [],
-   "source": "import pandas as pd",
+   "source": "select\n    id,\n    date_trunc(year, o_orderdate) as order_year,\n    sum(o_totalprice) as total\nfrom {{ orders }}\ngroup by all\norder by id, order_year",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "20f1dd62-d796-4190-b34a-89a16fea1819",
+   "id": "a789790e-47be-4b57-94a1-53832336abb1",
    "metadata": {
     "language": "python",
-    "name": "cell10",
-    "collapsed": false,
+    "name": "add_rows_for_years_without_sales",
     "resultHeight": 0
    },
    "outputs": [],
-   "source": "df = cell3.to_pandas()\n\n#pivot data to add row for each id:year with no revenue\nresult = df.pivot_table(\n    index='ID',\n    columns='ORDER_YEAR', \n    values='TOTAL',\n    fill_value=0\n).reset_index().melt(\n    id_vars='ID',\n    var_name='ORDER_YEAR',\n    value_name='TOTAL'\n)\n\n# save the dataframe as table for SQL querying \ndf = session.create_dataframe(result)\ndf.write.mode(\"overwrite\").save_as_table(\"df\", table_type=\"temporary\")",
+   "source": "annual_customer_orders_df = annual_customer_orders.to_pandas()\n\n#pivot data to add row for each id:year with no revenue\nresult = annual_customer_orders_df.pivot_table(\n    index='ID',\n    columns='ORDER_YEAR', \n    values='TOTAL',\n    fill_value=0\n).reset_index().melt(\n    id_vars='ID',\n    var_name='ORDER_YEAR',\n    value_name='TOTAL'\n)\n\n# save the dataframe as table for SQL querying \ndf = session.create_dataframe(result)\ndf.write.mode(\"overwrite\").save_as_table(\"annual_customer_orders\", table_type=\"temporary\")",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "52ae5a36-e143-4ebb-b884-e17750b0c77f",
+   "id": "70c25d11-94cb-40f0-985a-89e8d8839d8e",
    "metadata": {
     "language": "sql",
-    "name": "cell7",
-    "collapsed": false,
+    "name": "sample_annual_customer_orders",
     "resultHeight": 426
    },
    "outputs": [],
-   "source": "select * from df\norder by id, order_year\nlimit 10",
+   "source": "select * from annual_customer_orders\norder by id, order_year\nlimit 10",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "11971c03-53a7-4429-870a-4b51bbef7aca",
+   "id": "d092b952-57aa-4076-b1cd-575279473bab",
    "metadata": {
     "language": "sql",
-    "name": "cell6",
-    "collapsed": false,
-    "resultHeight": 159
+    "name": "labeled_annual_customer_orders",
+    "resultHeight": 510
    },
    "outputs": [],
-   "source": "with windowed as (\n    \n    select\n        *,\n        sum(total) over(partition by id order by order_year asc) as lifetime_spend,\n        coalesce(lag(total) over(partition by id order by order_year asc), 0) as previous_year_total,\n    from df\n\n)\n\nselect *,\n  case\n    when total = previous_year_total and total > 0 then 'retained'\n    when total > 0 and previous_year_total = 0 and lifetime_spend = total then 'new'\n    when total = 0 and previous_year_total > 0 then 'churned'\n    when total > previous_year_total and previous_year_total > 0 then 'expanded'\n    when total < previous_year_total and previous_year_total > 0 then 'contracted'\n    when total > 0 and previous_year_total = 0 and lifetime_spend > total then 'resurrected'\n  else 'irrelevant' end as category,\n  case category\n    when 'retained' then 0\n    when 'new' then total\n    when 'churned' then (-1 * previous_year_total)\n    when 'expanded' then total - previous_year_total\n    when 'contracted' then (-1 * (previous_year_total - total))\n    when 'resurrected' then total\n  else 0 end as net_change\nfrom windowed\norder by id, order_year",
+   "source": "with windowed as (\n    \n    select\n        *,\n        sum(total) over(partition by id order by order_year asc) as lifetime_spend,\n        coalesce(lag(total) over(partition by id order by order_year asc), 0) as previous_year_total,\n    from annual_customer_orders\n\n)\n\nselect *,\n  case\n    when total = previous_year_total and total > 0 then 'retained'\n    when total > 0 and previous_year_total = 0 and lifetime_spend = total then 'new'\n    when total = 0 and previous_year_total > 0 then 'churned'\n    when total > previous_year_total and previous_year_total > 0 then 'expanded'\n    when total < previous_year_total and previous_year_total > 0 then 'contracted'\n    when total > 0 and previous_year_total = 0 and lifetime_spend > total then 'resurrected'\n  else 'irrelevant' end as category,\n  case category\n    when 'retained' then 0\n    when 'new' then total\n    when 'churned' then (-1 * previous_year_total)\n    when 'expanded' then total - previous_year_total\n    when 'contracted' then (-1 * (previous_year_total - total))\n    when 'resurrected' then total\n  else 0 end as net_change\nfrom windowed\norder by id, order_year",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "13f099e5-4265-438d-ab46-b3315bfc1f1d",
+   "id": "4fa6afc9-934a-40fb-a8ef-f6aedaec3ba0",
    "metadata": {
     "language": "sql",
-    "name": "cell4",
-    "collapsed": false,
+    "name": "annual_growth_labels",
     "resultHeight": 438
    },
    "outputs": [],
-   "source": "select\n    date_part(year, order_year) as order_year,\n    category,\n    round(sum(total)) as total,\n    round(sum(net_change)) as net_change\nfrom {{ cell6 }}\ngroup by all",
+   "source": "select\n    date_part(year, order_year) as order_year,\n    category,\n    round(sum(total)) as total,\n    round(sum(net_change)) as net_change\nfrom {{ labeled_annual_customer_orders }}\ngroup by all",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "735da8fc-91c0-4604-8041-1437208a1f01",
+   "id": "9f67f2b4-9c22-453d-abc0-68e5fbbc2e7f",
    "metadata": {
     "language": "python",
-    "name": "cell2",
-    "collapsed": false,
-    "resultHeight": 772
+    "name": "visualize_growth_framework",
+    "resultHeight": 239
    },
    "outputs": [],
-   "source": "import streamlit as st\n# Option to define dictionary to color code each category, may need to use matplotlib\n# Option to use altair for better control of ticks on Y axis\nst.bar_chart(cell4, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)",
+   "source": "import streamlit as st\n# Option to define dictionary to color code each category, may need to use matplotlib\n# Option to use altair for better control of ticks on Y axis\nst.bar_chart(annual_growth_labels, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "06f083eb-ae70-42ad-af0d-261138126bed",
+   "id": "2e2a6a8c-14e5-47f2-997e-fa53600564f2",
    "metadata": {
     "language": "python",
-    "name": "cell5",
-    "collapsed": false,
+    "name": "download_growth_accounting_csv",
     "resultHeight": 96
    },
    "outputs": [],
-   "source": "df = cell6.to_pandas()\nbutton_csv = df.to_csv().encode(\"utf-8\")\nst.download_button(label=\"Download\", data=button_csv, file_name=\"growth_accounting.csv\", mime=\"text/csv\")",
+   "source": "df = labeled_annual_customer_orders.to_pandas()\nbutton_csv = df.to_csv().encode(\"utf-8\")\nst.download_button(label=\"Download\", data=button_csv, file_name=\"growth_accounting.csv\", mime=\"text/csv\")",
    "execution_count": null
   },
   {
    "cell_type": "markdown",
-   "id": "db63ea18-13d4-43a4-a29c-a734db89e796",
+   "id": "fbd5ea2b-6a4f-423e-8e50-ea5d96eb8140",
    "metadata": {
-    "name": "cell8",
+    "name": "forecasting_intro",
     "collapsed": false,
     "resultHeight": 74
    },
-   "source": "# Forecasting\n"
+   "source": "# Forecasting"
   },
   {
    "cell_type": "code",
-   "id": "2a9b9481-4d24-4f6c-9b53-4f50add6458e",
+   "id": "16ec54e1-54cf-468c-a2d9-8bb8bd4abaaa",
    "metadata": {
     "language": "sql",
-    "name": "cell14",
-    "collapsed": false,
-    "resultHeight": 438
-   },
-   "outputs": [],
-   "source": "select\n    date_trunc(day, o_orderdate) as order_date,\n    sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by 1\norder by order_date asc",
-   "execution_count": null
-  },
-  {
-   "cell_type": "code",
-   "id": "9d5d7b4a-43cc-4c62-844e-a1954c312cbf",
-   "metadata": {
-    "language": "python",
-    "name": "cell15",
-    "collapsed": false,
-    "resultHeight": 0
+    "name": "daily_order_data",
+    "resultHeight": 438,
+    "collapsed": false
    },
    "outputs": [],
-   "source": "from prophet import Prophet\nfrom prophet.plot import plot_plotly, plot_components_plotly",
+   "source": "select\n    date_trunc(day, o_orderdate) as order_date,\n    sum(o_totalprice) as sum_revenue,\n    count(*) as num_orders\nfrom {{ orders }}\ngroup by 1\norder by order_date asc",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "87ca009b-4da8-46c2-a86c-9cad46fac89f",
+   "id": "e1368eea-3b25-46fd-92d9-d890e07dc61e",
    "metadata": {
     "language": "python",
-    "name": "cell17",
-    "collapsed": false,
-    "resultHeight": 150
+    "name": "prophet_data_preparation",
+    "resultHeight": 372,
+    "collapsed": false
    },
    "outputs": [],
-   "source": "df = cell14.to_pandas()\ndf = df.rename(columns={'ORDER_DATE': 'ds', 'TOTAL': 'y'})\nprint(df.head())",
+   "source": "from prophet import Prophet\nfrom prophet.plot import plot_plotly, plot_components_plotly\n\ndf = daily_order_data.to_pandas()\nprophet_df = df.rename(columns={'ORDER_DATE': 'ds', 'SUM_REVENUE': 'y'})\nst.line_chart(prophet_df, x='ds', y='y')",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "4efeff4d-da4b-4c1d-b3d5-a892bb2a2bc5",
+   "id": "bff69396-4c45-477a-a03a-9c173e9e0a02",
    "metadata": {
     "language": "python",
-    "name": "cell19",
-    "collapsed": false,
-    "resultHeight": 372
+    "name": "project_future_daily_sales",
+    "resultHeight": 41
    },
    "outputs": [],
-   "source": "st.line_chart(df, x='ds', y='y')",
+   "source": "m = Prophet()\ntry:\n    m.fit(prophet_df)\nexcept Exception as err:\n    print(Exception, err)\n\nfuture = m.make_future_dataframe(periods=365)\nforecast = m.predict(future)\nfig1 = m.plot(forecast)",
    "execution_count": null
   },
-  {
-   "cell_type": "markdown",
-   "id": "cbffd526-a4b0-405b-9718-6c5c2f8f6144",
-   "metadata": {
-    "name": "cell21",
-    "collapsed": false,
-    "resultHeight": 120
-   },
-   "source": "Waiting on role permission to write UDFs for Prophet library to run properly. Until then, code cell below will return \n```<class 'Exception'> Failed with error [Errno 1] Operation not permitted: '/usr/lib/python_udf/d212b0f949a4a60cf75395f561f7016ea978bad39b2e60eee12ece87d118e861/lib/python3.9/site-packages/prophet/stan_model/prophet_model.bin'```"
-  },
   {
    "cell_type": "code",
-   "id": "9d2c4877-5815-4f49-a53d-816b38de4eb6",
+   "id": "3ad6456c-376a-409b-a006-a42bfbb005fa",
    "metadata": {
     "language": "python",
-    "name": "cell26",
-    "collapsed": false,
-    "resultHeight": 0
+    "name": "inspect_forecasting_components",
+    "resultHeight": 41
    },
    "outputs": [],
-   "source": "m = Prophet()\ntry:\n    m.fit(df)\nexcept Exception as err:\n    print(Exception, err)",
+   "source": "fig2 = m.plot_components(forecast)",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "ce582f14-9490-4a54-8fe0-bbfc8b56f61f",
+   "id": "f30b1c81-80bf-4571-b971-84443f55630d",
    "metadata": {
     "language": "python",
-    "name": "cell23",
-    "collapsed": false,
-    "resultHeight": 885
+    "name": "simplify_forecast_visualization",
+    "resultHeight": 372
    },
    "outputs": [],
-   "source": "future = m.make_future_dataframe(periods=365)\nforecast = m.predict(future)\nfig1 = m.plot(forecast)\n#fig2 = m.plot_components(forecast)",
+   "source": "df = pd.DataFrame({\n    'ds': forecast['ds'],\n    'y': m.history['y'],\n    # Only show yhat for future dates\n    'yhat': np.where(forecast['ds'] > m.history['ds'].max(), forecast['yhat'], np.nan)\n})\n\nst.line_chart(df, x='ds', y=['y', 'yhat'])",
    "execution_count": null
   },
   {
    "cell_type": "markdown",
-   "id": "5dc1abf7-b9ea-4fe4-88ae-109342f6dc05",
+   "id": "5232d8e1-8ecb-4bb4-94c2-dd7122caaf30",
    "metadata": {
-    "name": "cell25",
+    "name": "customer_segmentation_introduction",
     "collapsed": false,
     "resultHeight": 74
    },
@@ -245,12 +240,12 @@
   },
   {
    "cell_type": "code",
-   "id": "939a7d50-2679-46ee-a43b-b7d03b627d61",
+   "id": "6a901764-40e1-4607-850c-444ad00450ef",
    "metadata": {
     "language": "sql",
-    "name": "cell16",
-    "collapsed": false,
-    "resultHeight": 426
+    "name": "sample_company_data",
+    "resultHeight": 426,
+    "collapsed": false
    },
    "outputs": [],
    "source": "select *\nfrom ADHOC_ANALYSIS.USER_UPLOADS.SP500_COMPANY_LIST\nlimit 10",
@@ -258,12 +253,12 @@
   },
   {
    "cell_type": "code",
-   "id": "9bd53742-511c-4cf9-9e28-02bdbcaca463",
+   "id": "e7acf161-5e2d-4277-89ea-65f1256358e4",
    "metadata": {
     "language": "python",
-    "name": "cell13",
-    "collapsed": false,
-    "resultHeight": 0
+    "name": "construct_api_request",
+    "resultHeight": 0,
+    "collapsed": false
    },
    "outputs": [],
    "source": "import requests\n\ndef get_wiki_extract(title):\n    # Base URL for Wikipedia's API\n    url = \"https://en.wikipedia.org/w/api.php\"\n    \n    # Parameters for the API request\n    params = {\n        \"action\": \"query\",\n        \"format\": \"json\",\n        \"titles\": title,\n        \"prop\": \"extracts\",\n        \"exintro\": True,  # Only get the intro section\n        \"explaintext\": True,  # Get plain text instead of HTML\n    }\n    \n    # Make the request\n    response = requests.get(url, params=params)\n    \n    # Check if request was successful\n    if response.status_code == 200:\n        data = response.json()\n        # Navigate through the JSON response to get the extract\n        pages = data[\"query\"][\"pages\"]\n        # Get the first (and only) page's extract\n        page = list(pages.values())[0]\n        return page.get(\"extract\", \"No extract available\")\n    else:\n        return f\"Error: {response.status_code}\"",
@@ -271,56 +266,42 @@
   },
   {
    "cell_type": "code",
-   "id": "0557102d-3584-469a-9fdc-be53fd0a249b",
-   "metadata": {
-    "language": "python",
-    "name": "cell22",
-    "collapsed": false,
-    "resultHeight": 60
-   },
-   "outputs": [],
-   "source": "df = cell16.to_pandas()\ncompany_names = df['NAME'].tolist()\ncsv_list = []\n\nprint(\"extracting descriptions\")\n\nfor name in company_names:\n    try:\n        extract = get_wiki_extract(name.replace(\" \", \"_\"))\n        #print(f'extracted description of {name} from Wikipedia')\n    except Exception as e:\n        #print(f\"Error getting Wikipedia extract for {name}: {str(e)}\")\n        extract = \"None available\"\n        \n    csv_list.append((name, extract))\n\nprint(\"finished extracting descriptions\")",
-   "execution_count": null
-  },
-  {
-   "cell_type": "code",
-   "id": "e979ca68-494a-46d4-a92d-d106d52980fb",
+   "id": "94963e7c-8d39-46e5-a035-4838ebb3617e",
    "metadata": {
     "language": "python",
-    "name": "cell18",
-    "collapsed": false,
-    "resultHeight": 0
+    "name": "extraxt_wikipedia_descriptions",
+    "resultHeight": 284,
+    "collapsed": false
    },
    "outputs": [],
-   "source": "# save the dataframe as table for SQL querying \ndf = pd.DataFrame(csv_list, columns=['name', 'description'])\ndf = session.create_dataframe(df)\ndf.write.mode(\"overwrite\").save_as_table(\"prospects\", table_type=\"temporary\")",
+   "source": "df = sample_company_data.to_pandas()\ncompany_names = df['NAME'].tolist()\ncsv_list = []\n\nprint(\"extracting descriptions\")\n\nfor name in company_names:\n    try:\n        extract = get_wiki_extract(name.replace(\" \", \"_\"))\n        print(f'extracted description of {name} from Wikipedia')\n    except Exception as e:\n        print(f\"Error getting Wikipedia extract for {name}: {str(e)}\")\n        extract = \"None available\"\n        \n    csv_list.append((name, extract))\n\nprint(\"finished extracting descriptions\")\n\n# save the dataframe as table for SQL querying \ndf = pd.DataFrame(csv_list, columns=['name', 'description'])\ndf = session.create_dataframe(df)\ndf.write.mode(\"overwrite\").save_as_table(\"prospects\", table_type=\"temporary\")",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "3f5d40d9-ca69-4137-affa-905caef97c29",
+   "id": "81c446dc-5c36-42e3-bb0d-985d397af0ca",
    "metadata": {
     "language": "sql",
-    "name": "cell20",
+    "name": "display_wikipedia_descriptions",
     "resultHeight": 426,
     "collapsed": false
    },
    "outputs": [],
-   "source": "select \"name\", \"description\" from prospects limit 10",
+   "source": "select \"name\", \"description\" from prospects",
    "execution_count": null
   },
   {
    "cell_type": "code",
-   "id": "51396730-f96a-476b-bb12-d7cac8c02576",
+   "id": "6b559934-f89d-418e-9a1f-38ef7faa03ad",
    "metadata": {
     "language": "sql",
-    "name": "cell24",
-    "codeCollapsed": false,
-    "collapsed": false,
-    "resultHeight": 391
+    "name": "categorize_descriptions_with_LLM",
+    "resultHeight": 391,
+    "collapsed": false
    },
    "outputs": [],
    "source": "select \n    \"name\",\n    \"description\",\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['extremely likely', 'somewhat likely', 'unlikely'],\n        {\n            'task_description': 'Return the likelihood that this company would be interested in attending a webinar showcasing the GTM utility of Snowflake Notebooks and Anaconda Python Packages.'\n        }\n    ):label::STRING as persona_likelihood,\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['healthcare', 'finance', 'retail', 'technology', 'communication', 'other'],\n        {\n            'task_description': 'Return the most likely industry of the company based on this description.'\n        }\n    ):label::STRING as industry,\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['California', 'South', 'Northeast', 'Midatlantic', 'Midwest', 'Pacific Northwest', 'Outsite the US'],\n        {\n            'task_description': 'Return the most likely region the company is headquartered in based on this description.'\n        }\n    ):label::STRING as region\nfrom prospects\nwhere \"description\" is not null and \"description\" != ''\nlimit 10\n-- other class. ideas: industry, main product, region",
    "execution_count": null
   }
  ]
-}
+}
\ No newline at end of file

From 70bb3499124bd387fc5ca16cfca5938a1e7ee6b6 Mon Sep 17 00:00:00 2001
From: Will Luna <wluna@anaconda.com>
Date: Mon, 27 Jan 2025 16:17:07 -0800
Subject: [PATCH 7/9] Updated Notebook and instructions

---
 samples/notebooks/anaconda_webinar/README.md  |  13 +-
 .../anaconda_webinar_notebook.ipynb           | 608 +++++++++++++++---
 .../synthetic_data_generation.py              | 208 ------
 .../package-lock.yml                          |   4 +
 4 files changed, 535 insertions(+), 298 deletions(-)
 delete mode 100644 samples/notebooks/anaconda_webinar/synthetic_data_generation.py
 create mode 100644 samples/sap_accounts_receivable_dbt/package-lock.yml

diff --git a/samples/notebooks/anaconda_webinar/README.md b/samples/notebooks/anaconda_webinar/README.md
index f15ffdf1..6f36fa4b 100644
--- a/samples/notebooks/anaconda_webinar/README.md
+++ b/samples/notebooks/anaconda_webinar/README.md
@@ -1,2 +1,11 @@
-# Title
-Placeholder title
+# Summary
+This notebook showcases how commonly-requested Analytics processes can be quickly implemented in Snowflake Notebooks. Those processes, and the key Python packages used in their implementation, are:
+
+1. Growth Accounting (pandas, streamlit)
+2. Forecasting (prophet, streamlit)
+3. Sales Enrichment (requests, Cortex LLM functions)
+
+# Permissions
+
+1. Access to `SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS` is necessary. Instructions are available in the Snowflake Documentation [here](https://docs.snowflake.com/en/user-guide/sample-data-using).
+2. Access to wikipedia.org is necessary. Instructions for setting up an External Access Integration is available [here](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-external-access).
diff --git a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
index f471f90f..3056e920 100644
--- a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
+++ b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
@@ -1,307 +1,739 @@
 {
- "metadata": {
-  "kernelspec": {
-   "display_name": "Streamlit Notebook",
-   "name": "streamlit"
-  }
- },
- "nbformat_minor": 5,
- "nbformat": 4,
  "cells": [
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "3775908f-ca36-4846-8f38-5adca39217f2",
    "metadata": {
+    "collapsed": false,
     "language": "python",
     "name": "session_creation",
-    "resultHeight": 0,
-    "collapsed": false
+    "resultHeight": 0
    },
-   "source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n\nimport logging\nlogging.getLogger(\"cmdstanpy\").setLevel(logging.WARNING)\nimport warnings\nwarnings.filterwarnings('ignore', category=FutureWarning)",
-   "execution_count": null,
-   "outputs": []
+   "outputs": [],
+   "source": [
+    "from snowflake.snowpark.context import get_active_session\n",
+    "session = get_active_session()\n",
+    "\n",
+    "import logging\n",
+    "logging.getLogger(\"cmdstanpy\").setLevel(logging.WARNING)\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore', category=FutureWarning)"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "d776341f-464d-4a9b-8c98-ac8e05286559",
    "metadata": {
+    "collapsed": false,
     "language": "sql",
     "name": "orders_sample",
-    "resultHeight": 426,
-    "collapsed": false
+    "resultHeight": 426
    },
    "outputs": [],
-   "source": "select\n    o_custkey,\n    o_orderdate,\n    o_totalprice\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\nlimit 10",
-   "execution_count": null
+   "source": [
+    "select\n",
+    "    o_custkey,\n",
+    "    o_orderdate,\n",
+    "    o_totalprice\n",
+    "from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\n",
+    "limit 10"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "23297335-ae53-477e-af45-1355957bc24e",
    "metadata": {
+    "codeCollapsed": true,
+    "collapsed": false,
     "language": "python",
     "name": "generate_synthetic_data",
-    "resultHeight": 60,
-    "collapsed": false,
-    "codeCollapsed": true
+    "resultHeight": 0
    },
    "outputs": [],
-   "source": "import numpy as np\nfrom datetime import datetime, timedelta\nimport pandas as pd\n\nclass OrderGenerator:\n    def __init__(\n        self,\n        # Basic parameters\n        start_date='1992-01-01',\n        end_date='1998-12-31',\n        target_daily_total=100_000_000,\n        target_daily_orders=500,\n        \n        # Trend parameters\n        annual_growth_rate=0.15,        # 15% annual growth\n        order_value_growth_rate=0.05,   # 5% annual growth in order values\n        \n        # Seasonal parameters\n        holiday_peak_day=350,           # Peak shopping day (Dec 16)\n        holiday_effect_magnitude=1.0,   # Strength of holiday effect\n        seasonal_baseline=0.8,          # Minimum seasonal multiplier\n        seasonal_spread=1000,           # Controls how spread out the holiday effect is\n        \n        # Weekly parameters\n        weekend_dip=0.85,              # Weekend order multiplier\n        weekday_boost=1.1,             # Weekday order multiplier\n        \n        # Value distribution parameters\n        pareto_shape=2.0,              # Shape parameter for order values\n        min_value_factor=0.3,          # Minimum order value as fraction of average\n        value_noise_stddev=0.15,       # Standard deviation for order value noise\n        \n        # Random seed for reproducibility\n        random_seed=None\n    ):\n        self.start_date = pd.to_datetime(start_date)\n        self.end_date = pd.to_datetime(end_date)\n        self.target_daily_total = target_daily_total\n        self.target_daily_orders = target_daily_orders\n        \n        # Store all other parameters\n        self.annual_growth_rate = annual_growth_rate\n        self.order_value_growth_rate = order_value_growth_rate\n        self.holiday_peak_day = holiday_peak_day\n        self.holiday_effect_magnitude = holiday_effect_magnitude\n        self.seasonal_baseline = seasonal_baseline\n        self.seasonal_spread = seasonal_spread\n        self.weekend_dip = weekend_dip\n        self.weekday_boost = weekday_boost\n        self.pareto_shape = pareto_shape\n        self.min_value_factor = min_value_factor\n        self.value_noise_stddev = value_noise_stddev\n        \n        # Derived parameters\n        self.avg_order_value = target_daily_total / target_daily_orders\n        self.min_order_value = self.avg_order_value * self.min_value_factor\n        \n        if random_seed is not None:\n            np.random.seed(random_seed)\n    \n    def seasonal_effect(self, day_of_year):\n        \"\"\"Stronger effect during holiday season\"\"\"\n        holiday_effect = np.exp(\n            -((day_of_year - self.holiday_peak_day) ** 2) / \n            self.seasonal_spread\n        ) * self.holiday_effect_magnitude\n        return np.maximum(self.seasonal_baseline + holiday_effect, 0)\n    \n    def weekly_effect(self, day_of_week):\n        \"\"\"Weekend dips in orders\"\"\"\n        return self.weekend_dip if day_of_week in [5, 6] else self.weekday_boost\n    \n    def trend_effect(self, years_passed):\n        \"\"\"Long-term growth trend\"\"\"\n        return np.power(1 + self.annual_growth_rate, years_passed)\n    \n    def generate_order_value(self, years_passed):\n        \"\"\"Generate order values following a Pareto distribution\"\"\"\n        u = np.random.random()\n        value = self.min_order_value / np.power(1 - u, 1/self.pareto_shape)\n        value = value * np.power(1 + self.order_value_growth_rate, years_passed)\n        noise = np.random.normal(1, self.value_noise_stddev)\n        return round(value * noise)\n    \n    def generate_clerk(self):\n        \"\"\"Generate clerk IDs matching TPCH format\"\"\"\n        clerk_id = np.random.randint(1000)\n        return f\"Clerk#{clerk_id:09d}\"\n    \n    def generate_customer(self, num_customers=149999):\n        \"\"\"Generate customer IDs matching TPCH format\"\"\"\n        return np.random.randint(num_customers)\n    \n    def generate_orders(self):\n        \"\"\"Generate supplementary orders with realistic patterns\"\"\"\n        orders = []\n        current_date = self.start_date\n        \n        while current_date <= self.end_date:\n            day_of_year = current_date.dayofyear\n            years_passed = (current_date - self.start_date).days / 365\n            \n            seasonal = self.seasonal_effect(day_of_year)\n            weekly = self.weekly_effect(current_date.weekday())\n            trend = self.trend_effect(years_passed)\n            \n            target_orders = round(\n                self.target_daily_orders * \n                seasonal * weekly * trend\n            )\n            \n            for _ in range(target_orders):\n                order = {\n                    'o_orderdate': current_date,\n                    'o_totalprice': self.generate_order_value(years_passed),\n                    'o_orderstatus': 'O',\n                    'o_clerk': self.generate_clerk(),\n                    'o_custkey': self.generate_customer()\n                }\n                orders.append(order)\n            \n            current_date += timedelta(days=1)\n        \n        df = pd.DataFrame(orders)\n        df = df.sort_values('o_orderdate')\n        df['o_orderkey'] = range(len(df))\n        df['o_orderkey'] = df['o_orderkey'] + 1_500_000  # Offset to avoid conflicts\n        \n        return df\n\ndef generate_and_save_synthetic_data():\n    \"\"\"Generate orders and save to CSV\"\"\"\n    # Example: Generate 2 years of data with pronounced patterns\n    params = {\n        'start_date': '1992-01-01',\n        'end_date': '1998-08-02',\n        'target_daily_total': 100_000_000,\n        'target_daily_orders': 500,\n        'holiday_effect_magnitude': 1.2,\n        'weekend_dip': 0.8,\n        'annual_growth_rate': 0.15,\n        'value_noise_stddev': 0.15\n    }\n    \n    generator = OrderGenerator(**params)\n    df = generator.generate_orders()\n    #save the synthetic data to a temporary table\n    filename = 'synthetic_orders'\n    df.to_csv(filename + '.csv', index=False)\n    print(f\"Orders saved to CSV {filename}.csv\")\n    csv_df = pd.read_csv(filename + '.csv')\n    csv_df['o_orderdate'] = pd.to_datetime(df['o_orderdate'])\n    table_df = session.create_dataframe(csv_df)\n    table_df.write.mode(\"overwrite\").save_as_table(filename, table_type=\"temporary\")\n    print(f\"Order saved to temporary table {filename}\")\n    return\n\n# Generate and save orders\ngenerate_and_save_synthetic_data()",
-   "execution_count": null
+   "source": [
+    "import numpy as np\n",
+    "from datetime import datetime, timedelta\n",
+    "import pandas as pd\n",
+    "\n",
+    "class OrderGenerator:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        # Basic parameters\n",
+    "        start_date='1992-01-01',\n",
+    "        end_date='1998-12-31',\n",
+    "        target_daily_total=100_000_000,\n",
+    "        target_daily_orders=500,\n",
+    "        \n",
+    "        # Trend parameters\n",
+    "        annual_growth_rate=0.15,        # 15% annual growth\n",
+    "        order_value_growth_rate=0.05,   # 5% annual growth in order values\n",
+    "        \n",
+    "        # Seasonal parameters\n",
+    "        holiday_peak_day=350,           # Peak shopping day (Dec 16)\n",
+    "        holiday_effect_magnitude=1.0,   # Strength of holiday effect\n",
+    "        seasonal_baseline=0.8,          # Minimum seasonal multiplier\n",
+    "        seasonal_spread=1000,           # Controls how spread out the holiday effect is\n",
+    "        \n",
+    "        # Weekly parameters\n",
+    "        weekend_dip=0.85,              # Weekend order multiplier\n",
+    "        weekday_boost=1.1,             # Weekday order multiplier\n",
+    "        \n",
+    "        # Value distribution parameters\n",
+    "        pareto_shape=2.0,              # Shape parameter for order values\n",
+    "        min_value_factor=0.3,          # Minimum order value as fraction of average\n",
+    "        value_noise_stddev=0.15,       # Standard deviation for order value noise\n",
+    "        \n",
+    "        # Random seed for reproducibility\n",
+    "        random_seed=None\n",
+    "    ):\n",
+    "        self.start_date = pd.to_datetime(start_date)\n",
+    "        self.end_date = pd.to_datetime(end_date)\n",
+    "        self.target_daily_total = target_daily_total\n",
+    "        self.target_daily_orders = target_daily_orders\n",
+    "        \n",
+    "        # Store all other parameters\n",
+    "        self.annual_growth_rate = annual_growth_rate\n",
+    "        self.order_value_growth_rate = order_value_growth_rate\n",
+    "        self.holiday_peak_day = holiday_peak_day\n",
+    "        self.holiday_effect_magnitude = holiday_effect_magnitude\n",
+    "        self.seasonal_baseline = seasonal_baseline\n",
+    "        self.seasonal_spread = seasonal_spread\n",
+    "        self.weekend_dip = weekend_dip\n",
+    "        self.weekday_boost = weekday_boost\n",
+    "        self.pareto_shape = pareto_shape\n",
+    "        self.min_value_factor = min_value_factor\n",
+    "        self.value_noise_stddev = value_noise_stddev\n",
+    "        \n",
+    "        # Derived parameters\n",
+    "        self.avg_order_value = target_daily_total / target_daily_orders\n",
+    "        self.min_order_value = self.avg_order_value * self.min_value_factor\n",
+    "        \n",
+    "        if random_seed is not None:\n",
+    "            np.random.seed(random_seed)\n",
+    "    \n",
+    "    def seasonal_effect(self, day_of_year):\n",
+    "        \"\"\"Stronger effect during holiday season\"\"\"\n",
+    "        holiday_effect = np.exp(\n",
+    "            -((day_of_year - self.holiday_peak_day) ** 2) / \n",
+    "            self.seasonal_spread\n",
+    "        ) * self.holiday_effect_magnitude\n",
+    "        return np.maximum(self.seasonal_baseline + holiday_effect, 0)\n",
+    "    \n",
+    "    def weekly_effect(self, day_of_week):\n",
+    "        \"\"\"Weekend dips in orders\"\"\"\n",
+    "        return self.weekend_dip if day_of_week in [5, 6] else self.weekday_boost\n",
+    "    \n",
+    "    def trend_effect(self, years_passed):\n",
+    "        \"\"\"Long-term growth trend\"\"\"\n",
+    "        return np.power(1 + self.annual_growth_rate, years_passed)\n",
+    "    \n",
+    "    def generate_order_value(self, years_passed):\n",
+    "        \"\"\"Generate order values following a Pareto distribution\"\"\"\n",
+    "        u = np.random.random()\n",
+    "        value = self.min_order_value / np.power(1 - u, 1/self.pareto_shape)\n",
+    "        value = value * np.power(1 + self.order_value_growth_rate, years_passed)\n",
+    "        noise = np.random.normal(1, self.value_noise_stddev)\n",
+    "        return round(value * noise)\n",
+    "    \n",
+    "    def generate_clerk(self):\n",
+    "        \"\"\"Generate clerk IDs matching TPCH format\"\"\"\n",
+    "        clerk_id = np.random.randint(1000)\n",
+    "        return f\"Clerk#{clerk_id:09d}\"\n",
+    "    \n",
+    "    def generate_customer(self, num_customers=149999):\n",
+    "        \"\"\"Generate customer IDs matching TPCH format\"\"\"\n",
+    "        return np.random.randint(num_customers)\n",
+    "    \n",
+    "    def generate_orders(self):\n",
+    "        \"\"\"Generate supplementary orders with realistic patterns\"\"\"\n",
+    "        orders = []\n",
+    "        current_date = self.start_date\n",
+    "        \n",
+    "        while current_date <= self.end_date:\n",
+    "            day_of_year = current_date.dayofyear\n",
+    "            years_passed = (current_date - self.start_date).days / 365\n",
+    "            \n",
+    "            seasonal = self.seasonal_effect(day_of_year)\n",
+    "            weekly = self.weekly_effect(current_date.weekday())\n",
+    "            trend = self.trend_effect(years_passed)\n",
+    "            \n",
+    "            target_orders = round(\n",
+    "                self.target_daily_orders * \n",
+    "                seasonal * weekly * trend\n",
+    "            )\n",
+    "            \n",
+    "            for _ in range(target_orders):\n",
+    "                order = {\n",
+    "                    'o_orderdate': current_date,\n",
+    "                    'o_totalprice': self.generate_order_value(years_passed),\n",
+    "                    'o_orderstatus': 'O',\n",
+    "                    'o_clerk': self.generate_clerk(),\n",
+    "                    'o_custkey': self.generate_customer()\n",
+    "                }\n",
+    "                orders.append(order)\n",
+    "            \n",
+    "            current_date += timedelta(days=1)\n",
+    "        \n",
+    "        df = pd.DataFrame(orders)\n",
+    "        df = df.sort_values('o_orderdate')\n",
+    "        df['o_orderkey'] = range(len(df))\n",
+    "        df['o_orderkey'] = df['o_orderkey'] + 1_500_000  # Offset to avoid conflicts\n",
+    "        \n",
+    "        return df\n",
+    "\n",
+    "def generate_and_save_synthetic_data():\n",
+    "    \"\"\"Generate orders and save to CSV\"\"\"\n",
+    "    # Example: Generate 2 years of data with pronounced patterns\n",
+    "    params = {\n",
+    "        'start_date': '1992-01-01',\n",
+    "        'end_date': '1998-08-02',\n",
+    "        'target_daily_total': 100_000_000,\n",
+    "        'target_daily_orders': 500,\n",
+    "        'holiday_effect_magnitude': 1.2,\n",
+    "        'weekend_dip': 0.8,\n",
+    "        'annual_growth_rate': 0.15,\n",
+    "        'value_noise_stddev': 0.15\n",
+    "    }\n",
+    "    \n",
+    "    generator = OrderGenerator(**params)\n",
+    "    df = generator.generate_orders()\n",
+    "    #save the synthetic data to a temporary table\n",
+    "    filename = 'synthetic_orders'\n",
+    "    df.to_csv(filename + '.csv', index=False)\n",
+    "    print(f\"Orders saved to CSV {filename}.csv\")\n",
+    "    csv_df = pd.read_csv(filename + '.csv')\n",
+    "    csv_df['o_orderdate'] = pd.to_datetime(df['o_orderdate'])\n",
+    "    table_df = session.create_dataframe(csv_df)\n",
+    "    table_df.write.mode(\"overwrite\").save_as_table(filename, table_type=\"temporary\")\n",
+    "    print(f\"Order saved to temporary table {filename}\")\n",
+    "    return\n",
+    "\n",
+    "# Generate and save orders\n",
+    "generate_and_save_synthetic_data()"
+   ]
   },
   {
    "cell_type": "markdown",
    "id": "ca0f2f8f-33ae-4934-9064-f44a3e5ef5c9",
    "metadata": {
-    "name": "growth_accounting_intro",
     "collapsed": false,
+    "name": "growth_accounting_intro",
     "resultHeight": 74
    },
-   "source": "# Growth Accounting"
+   "source": [
+    "# Growth Accounting"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "b10ebdb4-78f3-49f3-ab81-529b0afd662d",
    "metadata": {
+    "codeCollapsed": false,
+    "collapsed": false,
     "language": "sql",
     "name": "orders",
-    "resultHeight": 510,
-    "collapsed": false,
-    "codeCollapsed": false
+    "resultHeight": 510
    },
    "outputs": [],
-   "source": "with synthetic as (\n\n    select\n        \"o_custkey\" as id,\n        to_date(\"o_orderdate\") as o_orderdate,\n        CAST(\"o_totalprice\" AS NUMERIC) as o_totalprice\n    from synthetic_orders\n    --SAMPLE (1000000 rows)\n\n),\n\noriginal as (\n    \n    select\n        o_custkey as id,\n        o_orderdate,\n        o_totalprice\n    from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\n    --SAMPLE (1000000 rows)\n\n)\n\nselect * from synthetic\nunion all \nselect * from original",
-   "execution_count": null
+   "source": [
+    "with synthetic as (\n",
+    "\n",
+    "    select\n",
+    "        \"o_custkey\" as id,\n",
+    "        to_date(\"o_orderdate\") as o_orderdate,\n",
+    "        CAST(\"o_totalprice\" AS NUMERIC) as o_totalprice\n",
+    "    from synthetic_orders\n",
+    "    --SAMPLE (1000000 rows)\n",
+    "\n",
+    "),\n",
+    "\n",
+    "\n",
+    "original as (\n",
+    "    \n",
+    "    select\n",
+    "        o_custkey as id,\n",
+    "        o_orderdate,\n",
+    "        o_totalprice\n",
+    "    from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\n",
+    "    --SAMPLE (1000000 rows)\n",
+    "\n",
+    ")\n",
+    "\n",
+    "select * from synthetic\n",
+    "union all \n",
+    "select * from original"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "b933a301-0086-4682-9a6b-c0d430f62f87",
    "metadata": {
+    "collapsed": false,
     "language": "sql",
     "name": "annual_customer_orders",
     "resultHeight": 510
    },
    "outputs": [],
-   "source": "select\n    id,\n    date_trunc(year, o_orderdate) as order_year,\n    sum(o_totalprice) as total\nfrom {{ orders }}\ngroup by all\norder by id, order_year",
-   "execution_count": null
+   "source": [
+    "select\n",
+    "    id,\n",
+    "    date_trunc(year, o_orderdate) as order_year,\n",
+    "    sum(o_totalprice) as total\n",
+    "from {{ orders }}\n",
+    "group by all\n",
+    "order by id, order_year"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "a789790e-47be-4b57-94a1-53832336abb1",
    "metadata": {
+    "collapsed": false,
     "language": "python",
     "name": "add_rows_for_years_without_sales",
     "resultHeight": 0
    },
    "outputs": [],
-   "source": "annual_customer_orders_df = annual_customer_orders.to_pandas()\n\n#pivot data to add row for each id:year with no revenue\nresult = annual_customer_orders_df.pivot_table(\n    index='ID',\n    columns='ORDER_YEAR', \n    values='TOTAL',\n    fill_value=0\n).reset_index().melt(\n    id_vars='ID',\n    var_name='ORDER_YEAR',\n    value_name='TOTAL'\n)\n\n# save the dataframe as table for SQL querying \ndf = session.create_dataframe(result)\ndf.write.mode(\"overwrite\").save_as_table(\"annual_customer_orders\", table_type=\"temporary\")",
-   "execution_count": null
+   "source": [
+    "annual_customer_orders_df = annual_customer_orders.to_pandas()\n",
+    "\n",
+    "#pivot data to add row for each id:year with no revenue\n",
+    "result = annual_customer_orders_df.pivot_table(\n",
+    "    index='ID',\n",
+    "    columns='ORDER_YEAR', \n",
+    "    values='TOTAL',\n",
+    "    fill_value=0\n",
+    ").reset_index().melt(\n",
+    "    id_vars='ID',\n",
+    "    var_name='ORDER_YEAR',\n",
+    "    value_name='TOTAL'\n",
+    ")\n",
+    "\n",
+    "# save the dataframe as table for SQL querying \n",
+    "df = session.create_dataframe(result)\n",
+    "df.write.mode(\"overwrite\").save_as_table(\"annual_customer_orders\", table_type=\"temporary\")"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "70c25d11-94cb-40f0-985a-89e8d8839d8e",
    "metadata": {
+    "collapsed": false,
     "language": "sql",
     "name": "sample_annual_customer_orders",
-    "resultHeight": 426
+    "resultHeight": 438
    },
    "outputs": [],
-   "source": "select * from annual_customer_orders\norder by id, order_year\nlimit 10",
-   "execution_count": null
+   "source": [
+    "select * from annual_customer_orders\n",
+    "order by id, order_year\n",
+    "limit 25"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "d092b952-57aa-4076-b1cd-575279473bab",
    "metadata": {
+    "collapsed": false,
     "language": "sql",
     "name": "labeled_annual_customer_orders",
     "resultHeight": 510
    },
    "outputs": [],
-   "source": "with windowed as (\n    \n    select\n        *,\n        sum(total) over(partition by id order by order_year asc) as lifetime_spend,\n        coalesce(lag(total) over(partition by id order by order_year asc), 0) as previous_year_total,\n    from annual_customer_orders\n\n)\n\nselect *,\n  case\n    when total = previous_year_total and total > 0 then 'retained'\n    when total > 0 and previous_year_total = 0 and lifetime_spend = total then 'new'\n    when total = 0 and previous_year_total > 0 then 'churned'\n    when total > previous_year_total and previous_year_total > 0 then 'expanded'\n    when total < previous_year_total and previous_year_total > 0 then 'contracted'\n    when total > 0 and previous_year_total = 0 and lifetime_spend > total then 'resurrected'\n  else 'irrelevant' end as category,\n  case category\n    when 'retained' then 0\n    when 'new' then total\n    when 'churned' then (-1 * previous_year_total)\n    when 'expanded' then total - previous_year_total\n    when 'contracted' then (-1 * (previous_year_total - total))\n    when 'resurrected' then total\n  else 0 end as net_change\nfrom windowed\norder by id, order_year",
-   "execution_count": null
+   "source": [
+    "with windowed as (\n",
+    "    \n",
+    "    select\n",
+    "        *,\n",
+    "        sum(total) over(partition by id order by order_year asc) as lifetime_spend,\n",
+    "        coalesce(lag(total) over(partition by id order by order_year asc), 0) as previous_year_total,\n",
+    "    from annual_customer_orders\n",
+    "\n",
+    ")\n",
+    "\n",
+    "select *,\n",
+    "  case\n",
+    "    when total = previous_year_total and total > 0 then 'retained'\n",
+    "    when total > 0 and previous_year_total = 0 and lifetime_spend = total then 'new'\n",
+    "    when total = 0 and previous_year_total > 0 then 'churned'\n",
+    "    when total > previous_year_total and previous_year_total > 0 then 'expanded'\n",
+    "    when total < previous_year_total and previous_year_total > 0 then 'contracted'\n",
+    "    when total > 0 and previous_year_total = 0 and lifetime_spend > total then 'resurrected'\n",
+    "  else 'irrelevant' end as category,\n",
+    "  case category\n",
+    "    when 'retained' then 0\n",
+    "    when 'new' then total\n",
+    "    when 'churned' then (-1 * previous_year_total)\n",
+    "    when 'expanded' then total - previous_year_total\n",
+    "    when 'contracted' then (-1 * (previous_year_total - total))\n",
+    "    when 'resurrected' then total\n",
+    "  else 0 end as net_change\n",
+    "from windowed\n",
+    "order by id, order_year"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "4fa6afc9-934a-40fb-a8ef-f6aedaec3ba0",
    "metadata": {
+    "collapsed": false,
     "language": "sql",
     "name": "annual_growth_labels",
     "resultHeight": 438
    },
    "outputs": [],
-   "source": "select\n    date_part(year, order_year) as order_year,\n    category,\n    round(sum(total)) as total,\n    round(sum(net_change)) as net_change\nfrom {{ labeled_annual_customer_orders }}\ngroup by all",
-   "execution_count": null
+   "source": [
+    "with final as (\n",
+    "\n",
+    "select\n",
+    "    date_part(year, order_year) as order_year,\n",
+    "    category,\n",
+    "    round(sum(total)) as total,\n",
+    "    round(sum(net_change)) as net_change\n",
+    "from {{ labeled_annual_customer_orders }}\n",
+    "group by all\n",
+    "\n",
+    ")\n",
+    "\n",
+    "select * from final\n",
+    "-- exclude first and last years\n",
+    "where order_year not in (1992, 1998)"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "9f67f2b4-9c22-453d-abc0-68e5fbbc2e7f",
    "metadata": {
+    "collapsed": false,
     "language": "python",
     "name": "visualize_growth_framework",
-    "resultHeight": 239
+    "resultHeight": 772
    },
    "outputs": [],
-   "source": "import streamlit as st\n# Option to define dictionary to color code each category, may need to use matplotlib\n# Option to use altair for better control of ticks on Y axis\nst.bar_chart(annual_growth_labels, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)",
-   "execution_count": null
+   "source": [
+    "import streamlit as st\n",
+    "st.bar_chart(annual_growth_labels, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "2e2a6a8c-14e5-47f2-997e-fa53600564f2",
    "metadata": {
+    "collapsed": false,
     "language": "python",
     "name": "download_growth_accounting_csv",
     "resultHeight": 96
    },
    "outputs": [],
-   "source": "df = labeled_annual_customer_orders.to_pandas()\nbutton_csv = df.to_csv().encode(\"utf-8\")\nst.download_button(label=\"Download\", data=button_csv, file_name=\"growth_accounting.csv\", mime=\"text/csv\")",
-   "execution_count": null
+   "source": [
+    "df = labeled_annual_customer_orders.to_pandas()\n",
+    "button_csv = df.to_csv().encode(\"utf-8\")\n",
+    "st.download_button(label=\"Download\", data=button_csv, file_name=\"growth_accounting.csv\", mime=\"text/csv\")"
+   ]
   },
   {
    "cell_type": "markdown",
    "id": "fbd5ea2b-6a4f-423e-8e50-ea5d96eb8140",
    "metadata": {
-    "name": "forecasting_intro",
     "collapsed": false,
+    "name": "forecasting_intro",
     "resultHeight": 74
    },
-   "source": "# Forecasting"
+   "source": [
+    "# Forecasting"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "16ec54e1-54cf-468c-a2d9-8bb8bd4abaaa",
    "metadata": {
+    "collapsed": false,
     "language": "sql",
     "name": "daily_order_data",
-    "resultHeight": 438,
-    "collapsed": false
+    "resultHeight": 438
    },
    "outputs": [],
-   "source": "select\n    date_trunc(day, o_orderdate) as order_date,\n    sum(o_totalprice) as sum_revenue,\n    count(*) as num_orders\nfrom {{ orders }}\ngroup by 1\norder by order_date asc",
-   "execution_count": null
+   "source": [
+    "select\n",
+    "    date_trunc(day, o_orderdate) as order_date,\n",
+    "    sum(o_totalprice) as sum_revenue,\n",
+    "    count(*) as num_orders\n",
+    "from {{ orders }}\n",
+    "group by 1\n",
+    "order by order_date asc"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "e1368eea-3b25-46fd-92d9-d890e07dc61e",
    "metadata": {
+    "collapsed": false,
     "language": "python",
     "name": "prophet_data_preparation",
-    "resultHeight": 372,
-    "collapsed": false
+    "resultHeight": 372
    },
    "outputs": [],
-   "source": "from prophet import Prophet\nfrom prophet.plot import plot_plotly, plot_components_plotly\n\ndf = daily_order_data.to_pandas()\nprophet_df = df.rename(columns={'ORDER_DATE': 'ds', 'SUM_REVENUE': 'y'})\nst.line_chart(prophet_df, x='ds', y='y')",
-   "execution_count": null
+   "source": [
+    "from prophet import Prophet\n",
+    "from prophet.plot import plot_plotly, plot_components_plotly\n",
+    "\n",
+    "df = daily_order_data.to_pandas()\n",
+    "prophet_df = df.rename(columns={'ORDER_DATE': 'ds', 'SUM_REVENUE': 'y'})\n",
+    "st.line_chart(prophet_df, x='ds', y='y')"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "bff69396-4c45-477a-a03a-9c173e9e0a02",
    "metadata": {
+    "collapsed": false,
     "language": "python",
     "name": "project_future_daily_sales",
-    "resultHeight": 41
+    "resultHeight": 981
    },
    "outputs": [],
-   "source": "m = Prophet()\ntry:\n    m.fit(prophet_df)\nexcept Exception as err:\n    print(Exception, err)\n\nfuture = m.make_future_dataframe(periods=365)\nforecast = m.predict(future)\nfig1 = m.plot(forecast)",
-   "execution_count": null
+   "source": [
+    "m = Prophet()\n",
+    "try:\n",
+    "    m.fit(prophet_df)\n",
+    "except Exception as err:\n",
+    "    print(Exception, err)\n",
+    "\n",
+    "future = m.make_future_dataframe(periods=365)\n",
+    "forecast = m.predict(future)\n",
+    "fig1 = m.plot(forecast)"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "3ad6456c-376a-409b-a006-a42bfbb005fa",
    "metadata": {
+    "collapsed": false,
     "language": "python",
     "name": "inspect_forecasting_components",
-    "resultHeight": 41
+    "resultHeight": 1480
    },
    "outputs": [],
-   "source": "fig2 = m.plot_components(forecast)",
-   "execution_count": null
+   "source": [
+    "fig2 = m.plot_components(forecast)"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "f30b1c81-80bf-4571-b971-84443f55630d",
    "metadata": {
+    "collapsed": false,
     "language": "python",
     "name": "simplify_forecast_visualization",
     "resultHeight": 372
    },
    "outputs": [],
-   "source": "df = pd.DataFrame({\n    'ds': forecast['ds'],\n    'y': m.history['y'],\n    # Only show yhat for future dates\n    'yhat': np.where(forecast['ds'] > m.history['ds'].max(), forecast['yhat'], np.nan)\n})\n\nst.line_chart(df, x='ds', y=['y', 'yhat'])",
-   "execution_count": null
+   "source": [
+    "df = pd.DataFrame({\n",
+    "    'ds': forecast['ds'],\n",
+    "    'y': m.history['y'],\n",
+    "    # Only show yhat for future dates\n",
+    "    'yhat': np.where(forecast['ds'] > m.history['ds'].max(), forecast['yhat'], np.nan)\n",
+    "})\n",
+    "\n",
+    "st.line_chart(df, x='ds', y=['y', 'yhat'])"
+   ]
   },
   {
    "cell_type": "markdown",
    "id": "5232d8e1-8ecb-4bb4-94c2-dd7122caaf30",
    "metadata": {
-    "name": "customer_segmentation_introduction",
     "collapsed": false,
+    "name": "customer_segmentation_introduction",
     "resultHeight": 74
    },
-   "source": "# Customer Segmentation"
+   "source": [
+    "# Customer Segmentation"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "6a901764-40e1-4607-850c-444ad00450ef",
    "metadata": {
+    "collapsed": false,
     "language": "sql",
     "name": "sample_company_data",
-    "resultHeight": 426,
-    "collapsed": false
+    "resultHeight": 438
    },
    "outputs": [],
-   "source": "select *\nfrom ADHOC_ANALYSIS.USER_UPLOADS.SP500_COMPANY_LIST\nlimit 10",
-   "execution_count": null
+   "source": [
+    "select *\n",
+    "from ADHOC_ANALYSIS.USER_UPLOADS.SP500_COMPANY_LIST\n",
+    "limit 20"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "e7acf161-5e2d-4277-89ea-65f1256358e4",
    "metadata": {
+    "collapsed": false,
     "language": "python",
     "name": "construct_api_request",
-    "resultHeight": 0,
-    "collapsed": false
+    "resultHeight": 0
    },
    "outputs": [],
-   "source": "import requests\n\ndef get_wiki_extract(title):\n    # Base URL for Wikipedia's API\n    url = \"https://en.wikipedia.org/w/api.php\"\n    \n    # Parameters for the API request\n    params = {\n        \"action\": \"query\",\n        \"format\": \"json\",\n        \"titles\": title,\n        \"prop\": \"extracts\",\n        \"exintro\": True,  # Only get the intro section\n        \"explaintext\": True,  # Get plain text instead of HTML\n    }\n    \n    # Make the request\n    response = requests.get(url, params=params)\n    \n    # Check if request was successful\n    if response.status_code == 200:\n        data = response.json()\n        # Navigate through the JSON response to get the extract\n        pages = data[\"query\"][\"pages\"]\n        # Get the first (and only) page's extract\n        page = list(pages.values())[0]\n        return page.get(\"extract\", \"No extract available\")\n    else:\n        return f\"Error: {response.status_code}\"",
-   "execution_count": null
+   "source": [
+    "import requests\n",
+    "\n",
+    "def get_wiki_extract(title):\n",
+    "    # Base URL for Wikipedia's API\n",
+    "    url = \"https://en.wikipedia.org/w/api.php\"\n",
+    "    \n",
+    "    # Parameters for the API request\n",
+    "    params = {\n",
+    "        \"action\": \"query\",\n",
+    "        \"format\": \"json\",\n",
+    "        \"titles\": title,\n",
+    "        \"prop\": \"extracts\",\n",
+    "        \"exintro\": True,  # Only get the intro section\n",
+    "        \"explaintext\": True,  # Get plain text instead of HTML\n",
+    "    }\n",
+    "    \n",
+    "    # Make the request\n",
+    "    response = requests.get(url, params=params)\n",
+    "    \n",
+    "    # Check if request was successful\n",
+    "    if response.status_code == 200:\n",
+    "        data = response.json()\n",
+    "        # Navigate through the JSON response to get the extract\n",
+    "        pages = data[\"query\"][\"pages\"]\n",
+    "        # Get the first (and only) page's extract\n",
+    "        page = list(pages.values())[0]\n",
+    "        return page.get(\"extract\", \"No extract available\")\n",
+    "    else:\n",
+    "        return f\"Error: {response.status_code}\""
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "94963e7c-8d39-46e5-a035-4838ebb3617e",
    "metadata": {
+    "collapsed": false,
     "language": "python",
-    "name": "extraxt_wikipedia_descriptions",
-    "resultHeight": 284,
-    "collapsed": false
+    "name": "extract_wikipedia_descriptions",
+    "resultHeight": 508
    },
    "outputs": [],
-   "source": "df = sample_company_data.to_pandas()\ncompany_names = df['NAME'].tolist()\ncsv_list = []\n\nprint(\"extracting descriptions\")\n\nfor name in company_names:\n    try:\n        extract = get_wiki_extract(name.replace(\" \", \"_\"))\n        print(f'extracted description of {name} from Wikipedia')\n    except Exception as e:\n        print(f\"Error getting Wikipedia extract for {name}: {str(e)}\")\n        extract = \"None available\"\n        \n    csv_list.append((name, extract))\n\nprint(\"finished extracting descriptions\")\n\n# save the dataframe as table for SQL querying \ndf = pd.DataFrame(csv_list, columns=['name', 'description'])\ndf = session.create_dataframe(df)\ndf.write.mode(\"overwrite\").save_as_table(\"prospects\", table_type=\"temporary\")",
-   "execution_count": null
+   "source": [
+    "df = sample_company_data.to_pandas()\n",
+    "company_names = df['NAME'].tolist()\n",
+    "csv_list = []\n",
+    "\n",
+    "print(\"extracting descriptions\")\n",
+    "\n",
+    "for name in company_names:\n",
+    "    try:\n",
+    "        extract = get_wiki_extract(name.replace(\" \", \"_\"))\n",
+    "        print(f'extracted description of {name} from Wikipedia')\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error getting Wikipedia extract for {name}: {str(e)}\")\n",
+    "        extract = \"None available\"\n",
+    "        \n",
+    "    csv_list.append((name, extract))\n",
+    "\n",
+    "print(\"finished extracting descriptions\")\n",
+    "\n",
+    "# save the dataframe as table for SQL querying \n",
+    "df = pd.DataFrame(csv_list, columns=['name', 'description'])\n",
+    "df = session.create_dataframe(df)\n",
+    "df.write.mode(\"overwrite\").save_as_table(\"prospects\", table_type=\"temporary\")"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "81c446dc-5c36-42e3-bb0d-985d397af0ca",
    "metadata": {
+    "collapsed": false,
     "language": "sql",
     "name": "display_wikipedia_descriptions",
-    "resultHeight": 426,
-    "collapsed": false
+    "resultHeight": 438
    },
    "outputs": [],
-   "source": "select \"name\", \"description\" from prospects",
-   "execution_count": null
+   "source": [
+    "select \"name\", \"description\" from prospects"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "6b559934-f89d-418e-9a1f-38ef7faa03ad",
    "metadata": {
+    "collapsed": false,
     "language": "sql",
     "name": "categorize_descriptions_with_LLM",
-    "resultHeight": 391,
-    "collapsed": false
+    "resultHeight": 426
    },
    "outputs": [],
-   "source": "select \n    \"name\",\n    \"description\",\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['extremely likely', 'somewhat likely', 'unlikely'],\n        {\n            'task_description': 'Return the likelihood that this company would be interested in attending a webinar showcasing the GTM utility of Snowflake Notebooks and Anaconda Python Packages.'\n        }\n    ):label::STRING as persona_likelihood,\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['healthcare', 'finance', 'retail', 'technology', 'communication', 'other'],\n        {\n            'task_description': 'Return the most likely industry of the company based on this description.'\n        }\n    ):label::STRING as industry,\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['California', 'South', 'Northeast', 'Midatlantic', 'Midwest', 'Pacific Northwest', 'Outsite the US'],\n        {\n            'task_description': 'Return the most likely region the company is headquartered in based on this description.'\n        }\n    ):label::STRING as region\nfrom prospects\nwhere \"description\" is not null and \"description\" != ''\nlimit 10\n-- other class. ideas: industry, main product, region",
-   "execution_count": null
+   "source": [
+    "select\n",
+    "    \"name\",\n",
+    "    snowflake.cortex.classify_text(\n",
+    "        \"description\",\n",
+    "        ['extremely likely', 'somewhat likely', 'unlikely'],\n",
+    "        {\n",
+    "            'task_description': 'Return the likelihood that this company would be interested in attending a webinar showcasing the GTM utility of Snowflake Notebooks and Anaconda Python Packages.'\n",
+    "        }\n",
+    "    ):label::STRING as persona_likelihood,\n",
+    "    snowflake.cortex.classify_text(\n",
+    "        \"description\",\n",
+    "        ['healthcare', 'finance', 'retail', 'technology', 'communication', 'other'],\n",
+    "        {\n",
+    "            'task_description': 'Return the most likely industry of the company based on this description.'\n",
+    "        }\n",
+    "    ):label::STRING as industry,\n",
+    "    snowflake.cortex.classify_text(\n",
+    "        \"description\",\n",
+    "        ['California', 'South', 'Northeast', 'Midatlantic', 'Midwest', 'Pacific Northwest', 'Outsite the US'],\n",
+    "        {\n",
+    "            'task_description': 'Return the most likely region the company is headquartered in based on this description.'\n",
+    "        }\n",
+    "    ):label::STRING as region,\n",
+    "    \"description\"\n",
+    "from prospects\n",
+    "where \"description\" is not null and \"description\" != ''\n",
+    "limit 10\n"
+   ]
   }
- ]
-}
\ No newline at end of file
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Streamlit Notebook",
+   "name": "streamlit"
+  },
+  "lastEditStatus": {
+   "authorEmail": "wluna@anaconda.com",
+   "authorId": "405715820451",
+   "authorName": "WLUNA",
+   "lastEditTime": 1737744033132,
+   "notebookId": "2jcfdffhscjksh5ccsf7",
+   "sessionId": "e4e4ef1b-68d9-44f4-b7b9-1d472664a700"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/samples/notebooks/anaconda_webinar/synthetic_data_generation.py b/samples/notebooks/anaconda_webinar/synthetic_data_generation.py
deleted file mode 100644
index 9f638bff..00000000
--- a/samples/notebooks/anaconda_webinar/synthetic_data_generation.py
+++ /dev/null
@@ -1,208 +0,0 @@
-import numpy as np
-from datetime import datetime, timedelta
-import pandas as pd
-import matplotlib.pyplot as plt
-from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
-
-class OrderGenerator:
-    def __init__(
-        self,
-        # Basic parameters
-        start_date='1992-01-01',
-        end_date='1998-12-31',
-        target_daily_total=100_000_000,
-        target_daily_orders=500,
-        
-        # Trend parameters
-        annual_growth_rate=0.15,        # 15% annual growth
-        order_value_growth_rate=0.05,   # 5% annual growth in order values
-        
-        # Seasonal parameters
-        holiday_peak_day=350,           # Peak shopping day (Dec 16)
-        holiday_effect_magnitude=1.0,   # Strength of holiday effect
-        seasonal_baseline=0.8,          # Minimum seasonal multiplier
-        seasonal_spread=1000,           # Controls how spread out the holiday effect is
-        
-        # Weekly parameters
-        weekend_dip=0.85,              # Weekend order multiplier
-        weekday_boost=1.1,             # Weekday order multiplier
-        
-        # Value distribution parameters
-        pareto_shape=2.0,              # Shape parameter for order values
-        min_value_factor=0.3,          # Minimum order value as fraction of average
-        value_noise_stddev=0.15,       # Standard deviation for order value noise
-        
-        # Random seed for reproducibility
-        random_seed=None
-    ):
-        self.start_date = pd.to_datetime(start_date)
-        self.end_date = pd.to_datetime(end_date)
-        self.target_daily_total = target_daily_total
-        self.target_daily_orders = target_daily_orders
-        
-        # Store all other parameters
-        self.annual_growth_rate = annual_growth_rate
-        self.order_value_growth_rate = order_value_growth_rate
-        self.holiday_peak_day = holiday_peak_day
-        self.holiday_effect_magnitude = holiday_effect_magnitude
-        self.seasonal_baseline = seasonal_baseline
-        self.seasonal_spread = seasonal_spread
-        self.weekend_dip = weekend_dip
-        self.weekday_boost = weekday_boost
-        self.pareto_shape = pareto_shape
-        self.min_value_factor = min_value_factor
-        self.value_noise_stddev = value_noise_stddev
-        
-        # Derived parameters
-        self.avg_order_value = target_daily_total / target_daily_orders
-        self.min_order_value = self.avg_order_value * self.min_value_factor
-        
-        if random_seed is not None:
-            np.random.seed(random_seed)
-    
-    def seasonal_effect(self, day_of_year):
-        """Stronger effect during holiday season"""
-        holiday_effect = np.exp(
-            -((day_of_year - self.holiday_peak_day) ** 2) / 
-            self.seasonal_spread
-        ) * self.holiday_effect_magnitude
-        return np.maximum(self.seasonal_baseline + holiday_effect, 0)
-    
-    def weekly_effect(self, day_of_week):
-        """Weekend dips in orders"""
-        return self.weekend_dip if day_of_week in [5, 6] else self.weekday_boost
-    
-    def trend_effect(self, years_passed):
-        """Long-term growth trend"""
-        return np.power(1 + self.annual_growth_rate, years_passed)
-    
-    def generate_order_value(self, years_passed):
-        """Generate order values following a Pareto distribution"""
-        u = np.random.random()
-        value = self.min_order_value / np.power(1 - u, 1/self.pareto_shape)
-        value = value * np.power(1 + self.order_value_growth_rate, years_passed)
-        noise = np.random.normal(1, self.value_noise_stddev)
-        return round(value * noise)
-    
-    def generate_clerk(self):
-        """Generate clerk IDs matching TPCH format"""
-        clerk_id = np.random.randint(1000)
-        return f"Clerk#{clerk_id:09d}"
-    
-    def generate_customer(self, num_customers=1500):
-        """Generate customer IDs matching TPCH format"""
-        return f"Customer#{np.random.randint(num_customers):09d}"
-    
-    def generate_orders(self):
-        """Generate supplementary orders with realistic patterns"""
-        orders = []
-        current_date = self.start_date
-        
-        while current_date <= self.end_date:
-            day_of_year = current_date.dayofyear
-            years_passed = (current_date - self.start_date).days / 365
-            
-            seasonal = self.seasonal_effect(day_of_year)
-            weekly = self.weekly_effect(current_date.weekday())
-            trend = self.trend_effect(years_passed)
-            
-            target_orders = round(
-                self.target_daily_orders * 
-                seasonal * weekly * trend
-            )
-            
-            for _ in range(target_orders):
-                order = {
-                    'o_orderdate': current_date,
-                    'o_totalprice': self.generate_order_value(years_passed),
-                    'o_orderstatus': 'O',
-                    'o_clerk': self.generate_clerk(),
-                    'o_custkey': self.generate_customer()
-                }
-                orders.append(order)
-            
-            current_date += timedelta(days=1)
-        
-        df = pd.DataFrame(orders)
-        df = df.sort_values('o_orderdate')
-        df['o_orderkey'] = range(len(df))
-        df['o_orderkey'] = df['o_orderkey'] + 1_500_000  # Offset to avoid conflicts
-        
-        return df
-
-def generate_and_save_orders(filename, **generator_params):
-    """Generate orders and save to CSV"""
-    generator = OrderGenerator(**generator_params)
-    df = generator.generate_orders()
-    df.to_csv(filename, index=False)
-    print(f"Orders saved to {filename}")
-    return df
-
-def plot_daily_patterns(filename, figsize=(15, 8), plot_style='compressed'):
-    """Load orders from CSV and create visualization"""
-    df = pd.read_csv(filename)
-    df['o_orderdate'] = pd.to_datetime(df['o_orderdate'])
-    
-    daily_summary = df.groupby('o_orderdate').agg({
-        'o_orderkey': 'count',
-        'o_totalprice': 'sum'
-    }).reset_index()
-    
-    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize)
-    
-    # Plot daily totals
-    ax1.plot(daily_summary['o_orderdate'], daily_summary['o_totalprice'], 
-            color='blue', linewidth=0.5)
-    ax1.set_title('Daily Order Totals')
-    ax1.set_ylabel('Daily Total ($)')
-    ax1.grid(True, alpha=0.3)
-    
-    # Set x-axis ticks to show years and months
-    ax1.xaxis.set_major_locator(YearLocator())
-    ax1.xaxis.set_minor_locator(MonthLocator())
-    ax1.xaxis.set_major_formatter(DateFormatter('%Y'))
-    ax1.yaxis.set_major_formatter(lambda x, p: f'${x/1e6:.1f}M')
-    
-    # Plot daily order counts
-    ax2.plot(daily_summary['o_orderdate'], daily_summary['o_orderkey'], 
-            color='green', linewidth=0.5)
-    ax2.set_title('Daily Order Count')
-    ax2.set_ylabel('Number of Orders')
-    ax2.grid(True, alpha=0.3)
-    
-    ax2.xaxis.set_major_locator(YearLocator())
-    ax2.xaxis.set_minor_locator(MonthLocator())
-    ax2.xaxis.set_major_formatter(DateFormatter('%Y'))
-    
-    for ax in [ax1, ax2]:
-        plt.setp(ax.get_xticklabels(), rotation=45)
-    
-    plt.tight_layout()
-    
-    # Print summary statistics
-    print("\nSummary Statistics:")
-    print(f"Date Range: {daily_summary['o_orderdate'].min().date()} to {daily_summary['o_orderdate'].max().date()}")
-    print(f"Average daily orders: {daily_summary['o_orderkey'].mean():.0f}")
-    print(f"Average daily total: ${daily_summary['o_totalprice'].mean():,.2f}")
-    
-    return fig
-
-if __name__ == "__main__":
-    # Example: Generate 2 years of data with pronounced patterns
-    params = {
-        'start_date': '1992-01-01',
-        'end_date': '1998-08-02',
-        'target_daily_total': 100_000_000,
-        'target_daily_orders': 500,
-        'holiday_effect_magnitude': 1.2,
-        'weekend_dip': 0.8,
-        'annual_growth_rate': 0.15,
-        'value_noise_stddev': 0.15
-    }
-    
-    # Generate and save orders
-    generate_and_save_orders('supplementary_orders.csv', **params)
-    
-    # Create visualization
-    fig = plot_daily_patterns('supplementary_orders.csv')
-    plt.show()
diff --git a/samples/sap_accounts_receivable_dbt/package-lock.yml b/samples/sap_accounts_receivable_dbt/package-lock.yml
new file mode 100644
index 00000000..7fa7e893
--- /dev/null
+++ b/samples/sap_accounts_receivable_dbt/package-lock.yml
@@ -0,0 +1,4 @@
+packages:
+  - git: https://github.com/dbt-labs/dbt-utils.git
+    revision: 68b4b4dadc20cd5cc2a894bd2ad62aa1b8176dc7
+sha1_hash: 0f4dc0fb373403efb568c23241b42220d924b872

From ffdc1143b194c74d1a8bcea776b4e10629c69aca Mon Sep 17 00:00:00 2001
From: Will Luna <wluna@anaconda.com>
Date: Mon, 27 Jan 2025 16:19:07 -0800
Subject: [PATCH 8/9] Delete package-lock.yml

---
 samples/sap_accounts_receivable_dbt/package-lock.yml | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 samples/sap_accounts_receivable_dbt/package-lock.yml

diff --git a/samples/sap_accounts_receivable_dbt/package-lock.yml b/samples/sap_accounts_receivable_dbt/package-lock.yml
deleted file mode 100644
index 7fa7e893..00000000
--- a/samples/sap_accounts_receivable_dbt/package-lock.yml
+++ /dev/null
@@ -1,4 +0,0 @@
-packages:
-  - git: https://github.com/dbt-labs/dbt-utils.git
-    revision: 68b4b4dadc20cd5cc2a894bd2ad62aa1b8176dc7
-sha1_hash: 0f4dc0fb373403efb568c23241b42220d924b872

From a4279f5e91f9d48476e1beb74c0adf6d68f79348 Mon Sep 17 00:00:00 2001
From: Will Luna <wluna@anaconda.com>
Date: Tue, 18 Feb 2025 16:24:34 -0800
Subject: [PATCH 9/9] Generates company list via SQL

---
 .../anaconda_webinar_notebook.ipynb           | 59 ++++++++++++++++++-
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
index 3056e920..c3e3508d 100644
--- a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
+++ b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
@@ -579,9 +579,62 @@
    },
    "outputs": [],
    "source": [
-    "select *\n",
-    "from ADHOC_ANALYSIS.USER_UPLOADS.SP500_COMPANY_LIST\n",
-    "limit 20"
+    "with companies as (\n",
+    "    select column1 as NAME\n",
+    "    from (values\n",
+    "        ('3M'),\n",
+    "        ('A. O. Smith'),\n",
+    "        ('Abbott Laboratories'),\n",
+    "        ('AbbVie'),\n",
+    "        ('Accenture'),\n",
+    "        ('Adobe Inc.'),\n",
+    "        ('Advanced Micro Devices'),\n",
+    "        ('AES Corporation'),\n",
+    "        ('Aflac'),\n",
+    "        ('Agilent Technologies'),\n",
+    "        ('Air Products'),\n",
+    "        ('Airbnb'),\n",
+    "        ('Akamai Technologies'),\n",
+    "        ('Albemarle Corporation'),\n",
+    "        ('Alexandria Real Estate Equities'),\n",
+    "        ('Align Technology'),\n",
+    "        ('Allegion'),\n",
+    "        ('Alliant Energy'),\n",
+    "        ('Allstate'),\n",
+    "        ('Amazon'),\n",
+    "        ('Amcor'),\n",
+    "        ('Amentum'),\n",
+    "        ('Ameren'),\n",
+    "        ('American Electric Power'),\n",
+    "        ('American Express'),\n",
+    "        ('American International Group'),\n",
+    "        ('American Tower'),\n",
+    "        ('American Water Works'),\n",
+    "        ('Ameriprise Financial'),\n",
+    "        ('Ametek'),\n",
+    "        ('Amgen'),\n",
+    "        ('Amphenol'),\n",
+    "        ('Analog Devices'),\n",
+    "        ('Ansys'),\n",
+    "        ('Aon'),\n",
+    "        ('APA Corporation'),\n",
+    "        ('Apple Inc.'),\n",
+    "        ('Applied Materials'),\n",
+    "        ('Aptiv'),\n",
+    "        ('Arch Capital Group'),\n",
+    "        ('Archer Daniels Midland'),\n",
+    "        ('Arista Networks'),\n",
+    "        ('Arthur J. Gallagher & Co.'),\n",
+    "        ('Assurant'),\n",
+    "        ('AT&T'),\n",
+    "        ('Atmos Energy'),\n",
+    "        ('Autodesk'),\n",
+    "        ('Automatic Data Processing'),\n",
+    "        ('AutoZone'),\n",
+    "        ('AvalonBay Communities')\n",
+    "    )\n",
+    ")\n",
+    "select NAME from companies limit 20"
    ]
   },
   {