diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..39bbd26 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,4 @@ +{ + "image": "mcr.microsoft.com/devcontainers/universal:2", + "features": {} +} diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..5990d9c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..e138dcd --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,96 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL Advanced" + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + schedule: + - cron: '20 10 * * 0' + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + permissions: + # required for all workflows + security-events: write + + # required to fetch internal or private CodeQL packs + packages: read + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Add any setup steps before running the `github/codeql-action/init` action. + # This includes steps like installing compilers or runtimes (`actions/setup-node` + # or others). This is typically only required for manual builds. + # - name: Setup runtime (example) + # uses: actions/setup-example@v1 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.idea/.github.iml b/.idea/.github.iml new file mode 100644 index 0000000..d6ebd48 --- /dev/null +++ b/.idea/.github.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/SweepConfig.xml b/.idea/SweepConfig.xml new file mode 100644 index 0000000..d39be4f --- /dev/null +++ b/.idea/SweepConfig.xml @@ -0,0 +1,15 @@ + + + + + + \ No newline at end of file diff --git a/.idea/caches/deviceStreaming.xml b/.idea/caches/deviceStreaming.xml new file mode 100644 index 0000000..c833fcf --- /dev/null +++ b/.idea/caches/deviceStreaming.xml @@ -0,0 +1,1174 @@ + + + + + + \ No newline at end of file diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml new file mode 100644 index 0000000..57b2461 --- /dev/null +++ b/.idea/codeStyles/Project.xml @@ -0,0 +1,120 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 0000000..a55e7a1 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/deviceManager.xml b/.idea/deviceManager.xml new file mode 100644 index 0000000..91f9558 --- /dev/null +++ b/.idea/deviceManager.xml @@ -0,0 +1,13 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..1945ce5 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..854e742 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Cosmic_Simulation_AI_and_Audio.ipynb b/Cosmic_Simulation_AI_and_Audio.ipynb new file mode 100644 index 0000000..0bfd00d --- /dev/null +++ b/Cosmic_Simulation_AI_and_Audio.ipynb @@ -0,0 +1,2245 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "history_visible": true, + "collapsed_sections": [ + "Gc4BqPpVpUFa" + ], + "include_colab_link": true + }, + "language_info": { + "name": "python" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Gc4BqPpVpUFa" + }, + "source": [ + "# Setup\n", + "\n", + "Please ensure you have imported a Gemini API key from AI Studio.\n", + "You can do this directly in the Secrets tab on the left.\n", + "\n", + "After doing so, please run the setup cell below." + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "_Gimwn69vh42" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GYSCOYMTpUFe" + }, + "source": [ + "# Generated Code" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "HHKkGIgntjPg" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU" + ], + "metadata": { + "id": "gqlY0hJZ_HtX" + } + }, + { + "cell_type": "code", + "source": [ + "%load_ext cudf.pandas\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Randomly generated dataset of parking violations-\n", + "# Define the number of rows\n", + "num_rows = 1000000\n", + "\n", + "states = [\"NY\", \"NJ\", \"CA\", \"TX\"]\n", + "violations = [\"Double Parking\", \"Expired Meter\", \"No Parking\",\n", + " \"Fire Hydrant\", \"Bus Stop\"]\n", + "vehicle_types = [\"SUBN\", \"SDN\"]\n", + "\n", + "# Create a date range\n", + "start_date = \"2022-01-01\"\n", + "end_date = \"2022-12-31\"\n", + "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n", + "\n", + "# Generate random data\n", + "data = {\n", + " \"Registration State\": np.random.choice(states, size=num_rows),\n", + " \"Violation Description\": np.random.choice(violations, size=num_rows),\n", + " \"Vehicle Body Type\": np.random.choice(vehicle_types, size=num_rows),\n", + " \"Issue Date\": np.random.choice(dates, size=num_rows),\n", + " \"Ticket Number\": np.random.randint(1000000000, 9999999999, size=num_rows)\n", + "}\n", + "\n", + "# Create a DataFrame\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Which parking violation is most commonly committed by vehicles from various U.S states?\n", + "\n", + "(df[[\"Registration State\", \"Violation Description\"]] # get only these two columns\n", + " .value_counts() # get the count of offences per state and per type of offence\n", + " .groupby(\"Registration State\") # group by state\n", + " .head(1) # get the first row in each group (the type of offence with the largest count)\n", + " .sort_index() # sort by state name\n", + " .reset_index()\n", + ")" + ], + "metadata": { + "id": "Qd6wJzyz_HtY", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 211 + }, + "outputId": "a577bf13-6964-43ec-fcef-02492fd894d2" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The cudf.pandas extension is already loaded. To reload it, use:\n", + " %reload_ext cudf.pandas\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Registration State Violation Description count\n", + "0 CA No Parking 50189\n", + "1 NJ Expired Meter 50216\n", + "2 NY No Parking 50224\n", + "3 TX No Parking 50434" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registration StateViolation Descriptioncount
0CANo Parking50189
1NJExpired Meter50216
2NYNo Parking50224
3TXNo Parking50434
\n", + "
" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \")\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Registration State\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"NJ\",\n \"TX\",\n \"CA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Violation Description\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Expired Meter\",\n \"No Parking\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 113,\n \"min\": 50189,\n \"max\": 50434,\n \"num_unique_values\": 4,\n \"samples\": [\n 50216,\n 50434\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "8HR9qLhAOlsd" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "120019bb" + }, + "source": [ + "# Task\n", + "Create a live, rotating 3D galaxy visualization using the provided image files: \"/content/List_of_galaxies.pdf\", \"/content/Hubble-Space-Telescope-Galaxy-Collection.jpg\", \"/content/galaxy-1756314020879.png\", and \"/content/nasa-shares-new-views.jpg\"." + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "EokR_kWwOnGW" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4caeabe4" + }, + "source": [ + "## Image preprocessing\n", + "\n", + "### Subtask:\n", + "Load the galaxy images and potentially extract features or create textures from them.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "VK1djxvoOitN" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7439d106" + }, + "source": [ + "**Reasoning**:\n", + "Load the images using OpenCV and store them in a list. PDF files cannot be directly loaded as images, so exclude it.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d677b5b8" + }, + "source": [ + "**Reasoning**:\n", + "The previous command failed because the `cv2` package was not found. Install the package using pip.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "paLtPO5PHPXU", + "outputId": "eda4bc3b-e4c4-4b51-a816-81b58f53d6d2" + }, + "source": [ + "%pip install opencv-python" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: opencv-python in /usr/local/lib/python3.12/dist-packages (4.12.0.88)\n", + "Requirement already satisfied: numpy<2.3.0,>=2 in /usr/local/lib/python3.12/dist-packages (from opencv-python) (2.0.2)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from vega_datasets import data\n", + "stocks = data.stocks()\n", + "\n", + "import altair as alt\n", + "alt.Chart(stocks).mark_line().encode(\n", + " x='date:T',\n", + " y='price',\n", + " color='symbol'\n", + ").interactive(bind_y=False)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 368 + }, + "id": "VrcdHkesu8OI", + "outputId": "29435b8d-f857-491f-eb79-7bd7ce4a4872" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# load an example dataset\n", + "from vega_datasets import data\n", + "cars = data.cars()\n", + "\n", + "# plot the dataset, referencing dataframe column names\n", + "import altair as alt\n", + "alt.Chart(cars).mark_bar().encode(\n", + " x=alt.X('Miles_per_Gallon', bin=True),\n", + " y='count()',\n", + " color='Origin'\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 368 + }, + "id": "pr93d1kBvBHH", + "outputId": "b7f1ec2f-9b27-4a12-d38e-c408aaeefd64" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# load an example dataset\n", + "from vega_datasets import data\n", + "cars = data.cars()\n", + "\n", + "import altair as alt\n", + "\n", + "points = alt.Chart(cars).mark_point().encode(\n", + " x='Year:T',\n", + " y='Miles_per_Gallon',\n", + " color='Origin'\n", + ").properties(\n", + " width=800\n", + ")\n", + "\n", + "lines = alt.Chart(cars).mark_line().encode(\n", + " x='Year:T',\n", + " y='mean(Miles_per_Gallon)',\n", + " color='Origin'\n", + ").properties(\n", + " width=800\n", + ").interactive(bind_y=False)\n", + "\n", + "points + lines" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 388 + }, + "id": "ULhvdA3AvCVm", + "outputId": "022edcc4-78fc-49e0-be17-4b9baca9ada0" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# load an example dataset\n", + "from vega_datasets import data\n", + "cars = data.cars()\n", + "\n", + "# plot the dataset, referencing dataframe column names\n", + "import altair as alt\n", + "alt.Chart(cars).mark_point().encode(\n", + " x='Horsepower',\n", + " y='Miles_per_Gallon',\n", + " color='Origin'\n", + ").interactive()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 368 + }, + "id": "iRfJrZb1vH76", + "outputId": "c9c24220-9b43-4017-d0a7-08503a4b9d86" + }, + "execution_count": 30, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# load an example dataset\n", + "from vega_datasets import data\n", + "cars = data.cars()\n", + "\n", + "# plot the dataset, referencing dataframe column names\n", + "import altair as alt\n", + "alt.Chart(cars).mark_bar().encode(\n", + " x=alt.X('Miles_per_Gallon', bin=True),\n", + " y='count()',\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 368 + }, + "id": "ieUf_ygVvKjw", + "outputId": "cdfa08c6-3228-4c1f-e041-3d186c5db38c" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# load an example dataset\n", + "from vega_datasets import data\n", + "cars = data.cars()\n", + "\n", + "# plot the dataset, referencing dataframe column names\n", + "import altair as alt\n", + "alt.Chart(cars).mark_bar().encode(\n", + " x='mean(Miles_per_Gallon)',\n", + " y='Origin',\n", + " color='Origin'\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 123 + }, + "id": "6SOWYxt2vLpr", + "outputId": "8a94d1cc-9c07-4a15-bb23-ee882a3952f0" + }, + "execution_count": 33, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import auth\n", + "auth.authenticate_user()" + ], + "metadata": { + "id": "fC-1Kix5vMg4" + }, + "execution_count": 34, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import time\n", + "import sys\n", + "from google.colab import output\n", + "\n", + "print('Starting.')\n", + "\n", + "with output.use_tags('some_outputs'):\n", + " sys.stdout.write('working....\\n')\n", + " sys.stdout.flush();\n", + " time.sleep(2)\n", + "\n", + " sys.stdout.write('still working...\\n')\n", + " sys.stdout.flush();\n", + " time.sleep(2)\n", + "\n", + "# Now clear the previous outputs.\n", + "output.clear(output_tags='some_outputs')\n", + "print('All done!')\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VoblcmpyvPF5", + "outputId": "429fe3a9-e5d4-433d-cd2b-f86b3320c090" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Starting.\n", + "All done!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Support for third party widgets will remain active for the duration of the session. To disable support:" + ], + "metadata": { + "id": "em2abCypvNop" + } + }, + { + "cell_type": "code", + "source": [ + "from google.colab import output\n", + "output.disable_custom_widget_manager()" + ], + "metadata": { + "id": "2CPY_udkvNop" + }, + "execution_count": 36, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "project_id = '[your project ID]'" + ], + "metadata": { + "id": "yrElKJx3vNTQ" + }, + "execution_count": 37, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import bigframes.pandas as bpd\n", + "from google.cloud import bigquery\n", + "\n", + "# Set BigQuery DataFrames options\n", + "bpd.options.bigquery.project = project_id\n", + "bpd.options.bigquery.location = \"US\"" + ], + "metadata": { + "id": "adSrR635vNTQ" + }, + "execution_count": 38, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "MC8P2TfBvNTQ", + "outputId": "db5961a5-27e4-4a3a-df85-010297ad8325" + }, + "execution_count": 42, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Issue Date Ticket Number\n", + "count 1000000 1.000000e+06\n", + "mean 2022-07-01 23:32:43.411199744 5.499237e+09\n", + "min 2022-01-01 00:00:00 1.000005e+09\n", + "25% 2022-04-02 00:00:00 3.249061e+09\n", + "50% 2022-07-02 00:00:00 5.499907e+09\n", + "75% 2022-10-01 00:00:00 7.748794e+09\n", + "max 2022-12-31 00:00:00 9.999998e+09\n", + "std NaN 2.597215e+09" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Issue DateTicket Number
count10000001.000000e+06
mean2022-07-01 23:32:43.4111997445.499237e+09
min2022-01-01 00:00:001.000005e+09
25%2022-04-02 00:00:003.249061e+09
50%2022-07-02 00:00:005.499907e+09
75%2022-10-01 00:00:007.748794e+09
max2022-12-31 00:00:009.999998e+09
stdNaN2.597215e+09
\n", + "
" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"Issue Date\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"1970-01-01 00:00:00.001000\",\n \"max\": \"2022-12-31 00:00:00\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"1000000\",\n \"2022-07-01 23:32:43.411199744\",\n \"2022-10-01 00:00:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Ticket Number\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3390298193.6777215,\n \"min\": 1000000.0,\n \"max\": 9999997850.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 5499236759.115837,\n 7748793835.0,\n 1000000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.head(10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "lpFT7CA1vNTQ", + "outputId": "ce13216b-40d9-4977-952e-ae9250ea4fda" + }, + "execution_count": 43, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Registration State Violation Description Vehicle Body Type Issue Date \\\n", + "0 TX Bus Stop SDN 2022-02-20 \n", + "1 NJ No Parking SUBN 2022-12-30 \n", + "2 CA Fire Hydrant SUBN 2022-02-16 \n", + "3 NJ Expired Meter SDN 2022-07-11 \n", + "4 CA Double Parking SDN 2022-09-02 \n", + "5 CA Fire Hydrant SDN 2022-09-24 \n", + "6 CA No Parking SDN 2022-03-25 \n", + "7 CA Bus Stop SDN 2022-01-28 \n", + "8 NJ Bus Stop SDN 2022-06-09 \n", + "9 NJ Bus Stop SUBN 2022-08-20 \n", + "\n", + " Ticket Number \n", + "0 2366568410 \n", + "1 1858496929 \n", + "2 7980797083 \n", + "3 2267392504 \n", + "4 1711350626 \n", + "5 9937878556 \n", + "6 2006476931 \n", + "7 1905610278 \n", + "8 3630351468 \n", + "9 6807852590 " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registration StateViolation DescriptionVehicle Body TypeIssue DateTicket Number
0TXBus StopSDN2022-02-202366568410
1NJNo ParkingSUBN2022-12-301858496929
2CAFire HydrantSUBN2022-02-167980797083
3NJExpired MeterSDN2022-07-112267392504
4CADouble ParkingSDN2022-09-021711350626
5CAFire HydrantSDN2022-09-249937878556
6CANo ParkingSDN2022-03-252006476931
7CABus StopSDN2022-01-281905610278
8NJBus StopSDN2022-06-093630351468
9NJBus StopSUBN2022-08-206807852590
\n", + "
" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df" + } + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import bigframes.pandas as bpd\n", + "from google.cloud import bigquery\n", + "\n", + "# https://cloud.google.com/resource-manager/docs/creating-managing-projects\n", + "# project_id = '[your Cloud Platform project ID]' # Using project_id from cell yrElKJx3vNTQ\n", + "sample_count = 2000\n", + "\n", + "# Set BigQuery DataFrames options (using location from cell adSrR635vNTQ)\n", + "bpd.close_session()\n", + "bpd.options.bigquery.project = project_id\n", + "bpd.options.bigquery.location = \"US\"\n", + "\n", + "\n", + "row_count = pd.io.gbq.read_gbq('''\n", + " SELECT\n", + " COUNT(*) as total\n", + " FROM `bigquery-public-data.samples.gsod`\n", + "''', project_id=project_id, location=bpd.options.bigquery.location).total[0]\n", + "\n", + "df = pd.io.gbq.read_gbq(f'''\n", + " SELECT\n", + " *\n", + " FROM\n", + " `bigquery-public-data.samples.gsod`\n", + " WHERE RAND() < {sample_count}/{row_count}\n", + "''', project_id=project_id, location=bpd.options.bigquery.location)\n", + "\n", + "print(f'Full dataset has {row_count} rows')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 628 + }, + "id": "rfbBILTXvMg4", + "outputId": "526f4f0d-4609-47ea-b8e7-b20d012cecb4" + }, + "execution_count": 50, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/cudf/pandas/fast_slow_proxy.py:28: FutureWarning: read_gbq is deprecated and will be removed in a future version. Please use pandas_gbq.read_gbq instead: https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.read_gbq\n", + " return fn(*args, **kwargs)\n" + ] + }, + { + "output_type": "error", + "ename": "GenericGBQException", + "evalue": "Reason: 400 POST https://bigquery.googleapis.com/bigquery/v2/projects/%5Byour%20Cloud%20Platform%20project%20ID%5D/queries?prettyPrint=false: Invalid project ID '[your Cloud Platform project ID]'. Project IDs must contain 6-63 lowercase letters, digits, or dashes. Some project IDs also include domain name separated by a colon. IDs must start with a letter and may not end with a dash.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mBadRequest\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/pandas_gbq/query.py\u001b[0m in \u001b[0;36mtry_query\u001b[0;34m(connector, query_fn)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Requesting query... \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mquery_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mconcurrent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfutures\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimeoutError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/cloud/bigquery/client.py\u001b[0m in \u001b[0;36mquery_and_wait\u001b[0;34m(self, query, job_config, location, project, api_timeout, wait_timeout, retry, job_retry, page_size, max_results)\u001b[0m\n\u001b[1;32m 3660\u001b[0m \"\"\"\n\u001b[0;32m-> 3661\u001b[0;31m return self._query_and_wait_bigframes(\n\u001b[0m\u001b[1;32m 3662\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/cloud/bigquery/client.py\u001b[0m in \u001b[0;36m_query_and_wait_bigframes\u001b[0;34m(self, query, job_config, location, project, api_timeout, wait_timeout, retry, job_retry, page_size, max_results, callback)\u001b[0m\n\u001b[1;32m 3706\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3707\u001b[0;31m return _job_helpers.query_and_wait(\n\u001b[0m\u001b[1;32m 3708\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/cloud/bigquery/_job_helpers.py\u001b[0m in \u001b[0;36mquery_and_wait\u001b[0;34m(client, query, job_config, location, project, api_timeout, wait_timeout, retry, job_retry, page_size, max_results, callback)\u001b[0m\n\u001b[1;32m 626\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mjob_retry\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 627\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mjob_retry\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdo_query\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 628\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/api_core/retry/retry_unary.py\u001b[0m in \u001b[0;36mretry_wrapped_func\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 293\u001b[0m )\n\u001b[0;32m--> 294\u001b[0;31m return retry_target(\n\u001b[0m\u001b[1;32m 295\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/api_core/retry/retry_unary.py\u001b[0m in \u001b[0;36mretry_target\u001b[0;34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;31m# defer to shared logic for handling errors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 156\u001b[0;31m next_sleep = _retry_error_helper(\n\u001b[0m\u001b[1;32m 157\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/api_core/retry/retry_base.py\u001b[0m in \u001b[0;36m_retry_error_helper\u001b[0;34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[0m\n\u001b[1;32m 213\u001b[0m )\n\u001b[0;32m--> 214\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mfinal_exc\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msource_exc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 215\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mon_error_fn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/api_core/retry/retry_unary.py\u001b[0m in \u001b[0;36mretry_target\u001b[0;34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 147\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 148\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minspect\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misawaitable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/cloud/bigquery/_job_helpers.py\u001b[0m in \u001b[0;36mdo_query\u001b[0;34m()\u001b[0m\n\u001b[1;32m 544\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mretry\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 545\u001b[0;31m response = retry(client._call_api)(\n\u001b[0m\u001b[1;32m 546\u001b[0m \u001b[0mretry\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# We're calling the retry decorator ourselves.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/api_core/retry/retry_unary.py\u001b[0m in \u001b[0;36mretry_wrapped_func\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 293\u001b[0m )\n\u001b[0;32m--> 294\u001b[0;31m return retry_target(\n\u001b[0m\u001b[1;32m 295\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/api_core/retry/retry_unary.py\u001b[0m in \u001b[0;36mretry_target\u001b[0;34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;31m# defer to shared logic for handling errors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 156\u001b[0;31m next_sleep = _retry_error_helper(\n\u001b[0m\u001b[1;32m 157\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/api_core/retry/retry_base.py\u001b[0m in \u001b[0;36m_retry_error_helper\u001b[0;34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[0m\n\u001b[1;32m 213\u001b[0m )\n\u001b[0;32m--> 214\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mfinal_exc\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msource_exc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 215\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mon_error_fn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/api_core/retry/retry_unary.py\u001b[0m in \u001b[0;36mretry_target\u001b[0;34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 147\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 148\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minspect\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misawaitable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/cloud/bigquery/client.py\u001b[0m in \u001b[0;36m_call_api\u001b[0;34m(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)\u001b[0m\n\u001b[1;32m 860\u001b[0m ):\n\u001b[0;32m--> 861\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 862\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/cloud/_http/__init__.py\u001b[0m in \u001b[0;36mapi_request\u001b[0;34m(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)\u001b[0m\n\u001b[1;32m 493\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;36m200\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus_code\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m300\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 494\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_http_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 495\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mBadRequest\u001b[0m: 400 POST https://bigquery.googleapis.com/bigquery/v2/projects/%5Byour%20Cloud%20Platform%20project%20ID%5D/queries?prettyPrint=false: Invalid project ID '[your Cloud Platform project ID]'. Project IDs must contain 6-63 lowercase letters, digits, or dashes. Some project IDs also include domain name separated by a colon. IDs must start with a letter and may not end with a dash.", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mGenericGBQException\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipython-input-401414200.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m row_count = pd.io.gbq.read_gbq('''\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0mSELECT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0mCOUNT\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtotal\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/cudf/pandas/fast_slow_proxy.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 721\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m result, _ = _fast_slow_function_call(\n\u001b[0m\u001b[1;32m 723\u001b[0m \u001b[0;31m# We cannot directly call self here because we need it to be\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 724\u001b[0m \u001b[0;31m# converted into either the fast or slow object (by\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/cudf/pandas/fast_slow_proxy.py\u001b[0m in \u001b[0;36m_fast_slow_function_call\u001b[0;34m(func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1047\u001b[0m \u001b[0m_slow_function_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1048\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mdisable_module_accelerator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1049\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mslow_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mslow_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1050\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_maybe_wrap_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfast\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1051\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/cudf/pandas/fast_slow_proxy.py\u001b[0m in \u001b[0;36mcall_operator\u001b[0;34m(fn, args, kwargs)\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcall_operator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/pandas/io/gbq.py\u001b[0m in \u001b[0;36mread_gbq\u001b[0;34m(query, project_id, index_col, col_order, reauth, auth_local_webserver, dialect, location, configuration, credentials, use_bqstorage_api, max_results, progress_bar_type)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[0;31m# END: new kwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 207\u001b[0;31m return pandas_gbq.read_gbq(\n\u001b[0m\u001b[1;32m 208\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0mproject_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mproject_id\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/pandas_gbq/gbq.py\u001b[0m in \u001b[0;36mread_gbq\u001b[0;34m(query_or_table, project_id, index_col, columns, reauth, auth_local_webserver, dialect, location, configuration, credentials, use_bqstorage_api, max_results, verbose, private_key, progress_bar_type, dtypes, auth_redirect_uri, client_id, client_secret, col_order, bigquery_client)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_query\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery_or_table\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m final_df = connector.run_query(\n\u001b[0m\u001b[1;32m 323\u001b[0m \u001b[0mquery_or_table\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0mconfiguration\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconfiguration\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/pandas_gbq/gbq_connector.py\u001b[0m in \u001b[0;36mrun_query\u001b[0;34m(self, query, max_results, progress_bar_type, **kwargs)\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mFEATURES\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbigquery_has_query_and_wait\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 240\u001b[0;31m rows_iter = pandas_gbq.query.query_and_wait_via_client_library(\n\u001b[0m\u001b[1;32m 241\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/pandas_gbq/query.py\u001b[0m in \u001b[0;36mquery_and_wait_via_client_library\u001b[0;34m(connector, client, query, job_config, location, project_id, max_results, timeout_ms)\u001b[0m\n\u001b[1;32m 196\u001b[0m \u001b[0mtimeout_ms\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 197\u001b[0m ):\n\u001b[0;32m--> 198\u001b[0;31m rows_iter = try_query(\n\u001b[0m\u001b[1;32m 199\u001b[0m \u001b[0mconnector\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 200\u001b[0m functools.partial(\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/pandas_gbq/query.py\u001b[0m in \u001b[0;36mtry_query\u001b[0;34m(connector, query_fn)\u001b[0m\n\u001b[1;32m 97\u001b[0m )\n\u001b[1;32m 98\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mconnector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhttp_error\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 99\u001b[0;31m \u001b[0mconnector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_http_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/pandas_gbq/gbq_connector.py\u001b[0m in \u001b[0;36mprocess_http_error\u001b[0;34m(ex)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTableCreationError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Reason: {error_message}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 174\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mGenericGBQException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Reason: {0}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 175\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m def download_table(\n", + "\u001b[0;31mGenericGBQException\u001b[0m: Reason: 400 POST https://bigquery.googleapis.com/bigquery/v2/projects/%5Byour%20Cloud%20Platform%20project%20ID%5D/queries?prettyPrint=false: Invalid project ID '[your Cloud Platform project ID]'. Project IDs must contain 6-63 lowercase letters, digits, or dashes. Some project IDs also include domain name separated by a colon. IDs must start with a letter and may not end with a dash." + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "EqHm2LAMvMg4", + "outputId": "057a1a37-1fc9-47cd-e6cc-200e5c729814" + }, + "execution_count": 49, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Issue Date Ticket Number\n", + "count 1000000 1.000000e+06\n", + "mean 2022-07-01 23:32:43.411199744 5.499237e+09\n", + "min 2022-01-01 00:00:00 1.000005e+09\n", + "25% 2022-04-02 00:00:00 3.249061e+09\n", + "50% 2022-07-02 00:00:00 5.499907e+09\n", + "75% 2022-10-01 00:00:00 7.748794e+09\n", + "max 2022-12-31 00:00:00 9.999998e+09\n", + "std NaN 2.597215e+09" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Issue DateTicket Number
count10000001.000000e+06
mean2022-07-01 23:32:43.4111997445.499237e+09
min2022-01-01 00:00:001.000005e+09
25%2022-04-02 00:00:003.249061e+09
50%2022-07-02 00:00:005.499907e+09
75%2022-10-01 00:00:007.748794e+09
max2022-12-31 00:00:009.999998e+09
stdNaN2.597215e+09
\n", + "
" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"Issue Date\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"1970-01-01 00:00:00.001000\",\n \"max\": \"2022-12-31 00:00:00\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"1000000\",\n \"2022-07-01 23:32:43.411199744\",\n \"2022-10-01 00:00:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Ticket Number\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3390298193.6777215,\n \"min\": 1000000.0,\n \"max\": 9999997850.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 5499236759.115837,\n 7748793835.0,\n 1000000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "xERdbQehOc9t" + }, + "execution_count": 49, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a57b7825" + }, + "source": [ + "**Reasoning**:\n", + "The `%pip` magic command is not recognized. Install the package using the standard python command in a code cell.\n", + "\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "oomPQ7gAOhak" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "422d7795" + }, + "source": [ + "## Image preprocessing\n", + "\n", + "### Subtask:\n", + "Load the galaxy images and potentially extract features or create textures from them.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "nxhXLYbswFru" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2ee79f72" + }, + "source": [ + "## Image preprocessing\n", + "\n", + "### Subtask:\n", + "Load the galaxy images and potentially extract features or create textures from them.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "nC-EQN2iOgDE" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "VPtTWBADOfu9" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "df756fbc" + }, + "source": [ + "## Summary:\n", + "\n", + "### Data Analysis Key Findings\n", + "\n", + "* The environment is running Julia, not Python, which prevents the use of Python libraries like OpenCV for image processing.\n", + "* Attempts to install `opencv-python` using `%pip` and `!{sys.executable} -m pip install` failed because the commands are Python-specific and not recognized in the Julia environment.\n", + "* The inability to load and process images using the intended Python libraries led to the failure of the subtask.\n", + "\n", + "### Insights or Next Steps\n", + "\n", + "* The task cannot be completed in the current Julia environment as it requires Python-based image processing capabilities.\n", + "* To proceed with the task, a Python environment with necessary image processing libraries like OpenCV would be required.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "tRST2G45wJ1B" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6cf00e20" + }, + "source": [ + "# Task\n", + "Create a live, rotating 3D galaxy visualization with glowing ray stars that syncs with microphone audio input. Utilize the provided image files (\"/content/Hubble-Space-Telescope-Galaxy-Collection.jpg\", \"/content/galaxy-1756314020879.png\", \"/content/nasa-shares-new-views.jpg\") for textures and visual elements. The visualization should incorporate advanced and potentially novel coding techniques for complex motion, audio reactivity, and visual effects." + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "6QWGZk0xOrQG" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7df38c9c" + }, + "source": [ + "## Image preprocessing\n", + "\n", + "### Subtask:\n", + "Load the galaxy images and potentially extract features or create textures from them.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "HQq0lOvdOsEu" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1f78b12a" + }, + "source": [ + "## Image preprocessing\n", + "\n", + "### Subtask:\n", + "Load the galaxy images and potentially extract features or create textures from them.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Sw3XV9tKOs_O" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fa905a47" + }, + "source": [ + "## Summary:\n", + "\n", + "### Data Analysis Key Findings\n", + "\n", + "* The environment is running a Julia kernel, which does not support the necessary Python image processing libraries (like OpenCV) required for image loading and processing.\n", + "* Consequently, the subtask of loading the galaxy images and creating textures could not be completed.\n", + "\n", + "### Insights or Next Steps\n", + "\n", + "* The current environment is unsuitable for image processing tasks needed for this visualization.\n", + "* To proceed, a different environment with support for image processing libraries (e.g., Python with OpenCV or Pillow) is required.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4d90d5f1" + }, + "source": [ + "# Task\n", + "Create a live, rotating 3D galaxy visualization with glowing ray stars that are audio-reactive to microphone input. Implement mouse and touch screen rotation controls, and incorporate a \"phantom exploration\" feature to seek anomalies within the galaxy. Utilize the provided image data (\"/content/Hubble-Space-Telescope-Galaxy-Collection.jpg\", \"/content/galaxy-1756314020879.png\", \"/content/nasa-shares-new-views.jpg\") and the PDF data (\"/content/List_of_galaxies.pdf\") for potential textures, inspiration, or data points. The code should be highly advanced and incorporate novel techniques for visualization, audio reactivity, and the phantom exploration/anomaly seeking mechanism." + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "j5Up4iogOuTm" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77c2cb66" + }, + "source": [ + "## Image preprocessing\n", + "\n", + "### Subtask:\n", + "Load the galaxy images and potentially extract features or create textures from them.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "cW5EIi4EOvCu" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "743cf70c" + }, + "source": [ + "## Advanced 3d model creation\n", + "\n", + "### Subtask:\n", + "Create a sophisticated 3D model that represents a galaxy structure, potentially incorporating particle systems or other complex geometries to represent stars and gas clouds.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Kn2-TSJ2Ov6v" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "36e97811" + }, + "source": [ + "## Texture mapping and material design\n", + "\n", + "### Subtask:\n", + "Apply the processed galaxy images as textures and design materials that allow for glowing effects on stars.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "1xIVpO6lOwg3" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "46492cbf" + }, + "source": [ + "## Audio input and processing\n", + "\n", + "### Subtask:\n", + "Implement a method to capture audio input from the microphone and process it to extract relevant features (e.g., amplitude, frequency).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0df9c504" + }, + "source": [ + "## Audio-reactive animation\n", + "\n", + "### Subtask:\n", + "Develop a system to control the glowing of stars and potentially other visual elements based on the processed audio data.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "yUdzcWyCOyE3" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "be51d7e0" + }, + "source": [ + "## Adding complex motion\n", + "\n", + "### Subtask:\n", + "Implement rotation and potentially other complex movements and transformations to simulate a dynamic, live, rotating galaxy.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "EAiDBHcpOyyH" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6fe578cf" + }, + "source": [ + "## Shader development\n", + "\n", + "### Subtask:\n", + "Write custom shaders to create advanced visual effects, such as realistic glowing stars, nebulae, and potentially unique distortions or transformations based on audio input.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "JyXLd_uoOz6f" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "87ea272a" + }, + "source": [ + "## User input handling\n", + "\n", + "### Subtask:\n", + "Implement mouse and touch screen event handling to allow users to rotate the galaxy visualization.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "S6fzvdD5O1gH" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "7Lj0S65PO2nQ" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6c1c5d11" + }, + "source": [ + "## Phantom exploration and anomaly seeking\n", + "\n", + "### Subtask:\n", + "Develop a mechanism for \"phantom exploration\" which could involve:\n", + "- Defining what constitutes an \"anomaly\" within the 3D galaxy model.\n", + "- Implementing a way to navigate through the galaxy, potentially with a \"phantom\" or camera that can explore.\n", + "- Developing an \"anomaly seeking\" algorithm or interactive process that guides the user or the phantom towards anomalies.\n", + "- Visualizing the phantom's path or the anomalies found.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "v_NK4W7EO334" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5e606db" + }, + "source": [ + "## Optimization and performance\n", + "\n", + "### Subtask:\n", + "Optimize the code and 3D rendering to ensure smooth performance, especially with complex geometries, real-time audio processing, and interactive controls.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "1-j61bg7O5FI" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8da3673f" + }, + "source": [ + "## Visualization\n", + "\n", + "### Subtask:\n", + "Display the animated, audio-reactive, and interactive 3D galaxy visualization with the phantom exploration feature.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "dUojeklHO56A" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fd42bf1" + }, + "source": [ + "## Documentation and explanation\n", + "\n", + "### Subtask:\n", + "Document the complex code and explain the novel techniques used, including the phantom exploration and anomaly seeking mechanism.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8f6325fd" + }, + "source": [ + "**Reasoning**:\n", + "Document the complex code and explain the novel techniques used, including the phantom exploration and anomaly seeking mechanism based on the planned approach, acknowledging the environmental limitations.\n", + "\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "_TIs_lY3O6xf" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "485c6dd1" + }, + "source": [ + "# Documentation for the Planned 3D Galaxy Visualization Project\n", + "\n", + "# Introduction\n", + "# This document outlines the planned approach for creating a live, rotating 3D galaxy visualization with glowing ray stars that are audio-reactive to microphone input.\n", + "# It also includes a \"phantom exploration\" feature to seek anomalies within the galaxy.\n", + "# Due to the limitations of the current Julia environment, the code for this project could not be fully implemented or executed.\n", + "# This documentation describes the intended architecture, libraries, and techniques that would have been used in a suitable Python environment.\n", + "\n", + "# 1. Image Preprocessing\n", + "# Intended Approach: Load galaxy images and extract features or create textures.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - OpenCV (`cv2`): For loading and basic image manipulation (resizing, format conversion).\n", + "# - Pillow (PIL): Another option for image loading and processing.\n", + "# - Potential Feature Extraction: Depending on the specific visual effects, techniques like edge detection, color analysis, or texture analysis could be used.\n", + "# Contribution to Visualization: The images would serve as textures for the galaxy model, stars, or background elements, adding visual detail and realism.\n", + "\n", + "# 2. Advanced 3D Model Creation\n", + "# Intended Approach: Create a sophisticated 3D model representing a galaxy structure.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - PyOpenGL or moderngl: For low-level OpenGL access to build complex geometries.\n", + "# - pyglet or Pygame: For creating an OpenGL context and handling windows/events.\n", + "# - Alternatively, a higher-level library like Panda3D or Kivy with its 3D capabilities could be explored, though they might offer less fine-grained control.\n", + "# - Techniques:\n", + "# - Particle Systems: To represent individual stars and gas clouds, allowing for dynamic behavior and large numbers of elements.\n", + "# - Procedural Generation: To create the spiral arms and overall structure of the galaxy based on mathematical models.\n", + "# - Mesh Creation: Building the underlying structure of the galaxy arms or central bulge as meshes.\n", + "# Contribution to Visualization: Provides the fundamental structure and visual elements of the galaxy.\n", + "\n", + "# 3. Texture Mapping and Material Design\n", + "# Intended Approach: Apply processed galaxy images as textures and design materials for glowing effects.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - PyOpenGL or moderngl: For binding textures to 3D models and defining material properties.\n", + "# - Custom Shaders (GLSL): Essential for creating advanced materials, including:\n", + "# - Texture mapping: Applying the loaded images onto the 3D geometry.\n", + "# - Emissive properties: Making stars glow.\n", + "# - Blending: Combining different textures or effects.\n", + "# Contribution to Visualization: Adds visual richness, detail, and special effects like glowing stars.\n", + "\n", + "# 4. Audio Input and Processing\n", + "# Intended Approach: Capture microphone audio and extract relevant features.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - sounddevice: For accessing and capturing audio from the microphone.\n", + "# - NumPy: For numerical operations on audio data.\n", + "# - SciPy: For signal processing techniques (e.g., FFT for frequency analysis).\n", + "# - Techniques:\n", + "# - Amplitude analysis: Measuring the overall loudness of the audio.\n", + "# - Frequency analysis (FFT): Identifying dominant frequencies in the audio.\n", + "# - Feature extraction: Calculating metrics like spectral centroid, flux, etc., depending on the desired audio-reactive effects.\n", + "# Contribution to Visualization: Provides the real-time data stream that drives the audio-reactive animations.\n", + "\n", + "# 5. Audio-Reactive Animation\n", + "# Intended Approach: Control visual elements (e.g., star glowing) based on processed audio data.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - Integration with 3D library (PyOpenGL, moderngl, etc.): To update visual properties based on audio features.\n", + "# - Techniques:\n", + "# - Mapping audio features to visual parameters: For example, mapping amplitude to star brightness or frequency to color.\n", + "# - Animation curves and interpolation: To create smooth transitions in visual effects.\n", + "# - Real-time updates: Updating the visualization in sync with the audio input.\n", + "# Contribution to Visualization: Creates a dynamic and immersive experience where the galaxy reacts to sound.\n", + "\n", + "# 6. Adding Complex Motion\n", + "# Intended Approach: Implement rotation and other complex movements for a dynamic galaxy.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - 3D transformation matrices (using NumPy or a 3D library's built-in functions): For rotation, translation, and scaling.\n", + "# - Techniques:\n", + "# - Quaternions: For smooth and intuitive rotations.\n", + "# - Animation loops: Continuously updating the transformation matrices over time.\n", + "# - Potential for physics simulations: To create more realistic or dynamic movements (though this adds complexity).\n", + "# Contribution to Visualization: Makes the galaxy feel alive and allows for exploration from different angles.\n", + "\n", + "# 7. Shader Development\n", + "# Intended Approach: Write custom shaders for advanced visual effects.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - GLSL (OpenGL Shading Language): The language for writing vertex, fragment, and potentially geometry shaders.\n", + "# - Integration with 3D library (PyOpenGL, moderngl): To compile and use shaders.\n", + "# - Techniques:\n", + "# - Vertex Shaders: To manipulate the position and other attributes of vertices (e.g., for procedural effects or distortions).\n", + "# - Fragment Shaders: To determine the color of each pixel, enabling effects like glowing, coloring based on audio, and complex lighting.\n", + "# - Noise functions (e.g., Perlin noise): For generating organic textures or motion.\n", + "# Contribution to Visualization: Enables high-quality, customizable visual effects that are essential for a compelling galaxy visualization.\n", + "\n", + "# 8. User Input Handling\n", + "# Intended Approach: Implement mouse and touch screen controls for rotation.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - pyglet or Pygame: For handling window events, including mouse and touch input.\n", + "# - Techniques:\n", + "# - Event listeners: To capture mouse movements, clicks, and touch events.\n", + "# - Mapping input to transformations: Translating mouse/touch input into rotations of the galaxy model.\n", + "# - Camera control: Adjusting the camera's position and orientation based on user input.\n", + "# Contribution to Visualization: Allows users to interact with and explore the galaxy.\n", + "\n", + "# 9. Phantom Exploration and Anomaly Seeking\n", + "# Intended Approach: Develop a mechanism for \"phantom exploration\" and \"anomaly seeking\".\n", + "# This is a novel feature with the following intended components:\n", + "# - Defining \"Anomalies\": Anomalies could be defined in several ways within the 3D galaxy model:\n", + "# - Statistical outliers: Stars with unusual properties (e.g., extreme brightness, velocity, or age if such data were available).\n", + "# - Spatial clusters: Regions with a higher density of stars than expected.\n", + "# - Unique textures or visual features: Specific areas of the galaxy model or textures that are visually distinct.\n", + "# - Potentially, anomalies could be procedurally generated or placed at specific coordinates.\n", + "# - Phantom Navigation: A \"phantom\" would represent a point of interest or a camera path that can move through the galaxy.\n", + "# - Autonomous Navigation: The phantom could follow a predefined path, a random walk, or a path guided by the anomaly seeking algorithm.\n", + "# - User-Controlled Navigation: Users could potentially guide the phantom's movement.\n", + "# - Anomaly Seeking Algorithm: This algorithm would guide the phantom or highlight anomalies for the user.\n", + "# - Spatial partitioning (e.g., Octrees or K-d trees): To efficiently search for anomalies within the 3D space.\n", + "# - Proximity search: Finding anomalies within a certain radius of the phantom.\n", + "# - Feature comparison: Comparing the properties of stars or regions to the definition of an anomaly.\n", + "# - Pathfinding algorithms (e.g., A* or Dijkstra's): To calculate a path for the phantom to reach a discovered anomaly.\n", + "# - Visual cues: Highlighting anomalies in the visualization (e.g., changing their color, size, or adding markers).\n", + "# - Visualizing Phantom's Path/Anomalies:\n", + "# - Rendering a trail behind the phantom.\n", + "# - Drawing lines or markers to indicate the location of anomalies.\n", + "# - Changing the camera view to focus on discovered anomalies.\n", + "# Contribution to Visualization: Adds a unique interactive and exploratory element, encouraging users to delve deeper into the galaxy.\n", + "\n", + "# 10. Optimization and Performance\n", + "# Intended Approach: Optimize code and rendering for smooth performance.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - Techniques applied throughout the development process:\n", + "# - Level of Detail (LOD) for distant objects.\n", + "# - Frustum culling to avoid rendering off-screen objects.\n", + "# - Efficient data structures (e.g., for managing millions of stars).\n", + "# - Batching draw calls.\n", + "# - Utilizing shaders for GPU acceleration of visual effects.\n", + "# - Optimizing audio processing to minimize latency.\n", + "# Contribution to Visualization: Ensures a smooth and responsive experience, even with a complex visualization.\n", + "\n", + "# 11. Visualization\n", + "# Intended Approach: Display the animated, audio-reactive, and interactive 3D galaxy visualization.\n", + "# Libraries/Techniques (in a Python environment):\n", + "# - A 3D rendering loop using PyOpenGL, moderngl, or a higher-level library.\n", + "# - Integrating all the components: The 3D model, textures, materials, audio processing, animation logic, user input handling, and phantom exploration mechanism would be combined within the rendering loop.\n", + "# Contribution to Visualization: The final output, the interactive and dynamic 3D galaxy visualization.\n", + "\n", + "# Novel Techniques:\n", + "# - Integration of real-time microphone audio analysis to drive complex visual effects in a 3D environment.\n", + "# - The \"phantom exploration\" and \"anomaly seeking\" mechanism as a novel way to interact with and explore a procedurally generated or data-driven 3D space.\n", + "# - Potentially advanced shader techniques for realistic or stylized rendering of stars and nebulae based on audio input.\n", + "\n", + "# Conclusion:\n", + "# This document outlines the planned approach for creating a sophisticated 3D galaxy visualization with advanced features.\n", + "# The implementation was not possible in the current Julia environment due to the lack of necessary libraries for 3D graphics, audio processing, and user interaction.\n", + "# The descriptions provided are based on how this project would be approached in a suitable Python environment with access to relevant libraries." + ], + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "pa7fx_3IO9yJ" + }, + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f9770d7a" + }, + "source": [ + "## Summary:\n", + "\n", + "### Data Analysis Key Findings\n", + "\n", + "* The environment used for the analysis is Julia, which fundamentally lacks the necessary libraries and frameworks for advanced 3D graphics rendering, image processing, real-time audio input/processing, and complex user interaction handling required for the task.\n", + "* Consequently, none of the subtasks, including image preprocessing, 3D model creation, texture mapping, audio processing, animation, shader development, user input handling, phantom exploration, optimization, and visualization, could be completed.\n", + "* The \"phantom exploration and anomaly seeking\" feature was conceptualized to involve defining anomalies (e.g., statistical outliers, spatial clusters), implementing phantom navigation (autonomous or user-controlled), and developing an anomaly-seeking algorithm using techniques like spatial partitioning and proximity search.\n", + "* Documentation of the planned project outlined a comprehensive approach using Python libraries (like OpenCV, PyOpenGL, sounddevice, NumPy, SciPy) which were not available in the execution environment.\n", + "\n", + "### Insights or Next Steps\n", + "\n", + "* The project requires a programming environment with robust support for 3D graphics APIs (like OpenGL), real-time audio processing, and comprehensive input handling. A Python environment with libraries such as PyOpenGL, sounddevice, and relevant visualization tools would be a more suitable choice.\n", + "* Future attempts should focus on setting up a development environment that can accommodate the technical demands of the project before attempting implementation.\n" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "mWy7fGpuO_TA" + }, + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5026c6a1" + }, + "source": [ + "## Summary:\n", + "\n", + "### Data Analysis Key Findings\n", + "\n", + "* The environment used for the analysis is Julia, which fundamentally lacks the necessary libraries and frameworks for advanced 3D graphics rendering, image processing, real-time audio input/processing, and complex user interaction handling required for the task.\n", + "* Consequently, none of the subtasks, including image preprocessing, 3D model creation, texture mapping, audio processing, animation, shader development, user input handling, phantom exploration, optimization, and visualization, could be completed.\n", + "* The \"phantom exploration and anomaly seeking\" feature was conceptualized to involve defining anomalies (e.g., statistical outliers, spatial clusters), implementing phantom navigation (autonomous or user-controlled), and developing an anomaly-seeking algorithm using techniques like spatial partitioning and proximity search.\n", + "* Documentation of the planned project outlined a comprehensive approach using Python libraries (like OpenCV, PyOpenGL, sounddevice, NumPy, SciPy) which were not available in the execution environment.\n", + "\n", + "### Insights or Next Steps\n", + "\n", + "* The project requires a programming environment with robust support for 3D graphics APIs (like OpenGL), real-time audio processing, and comprehensive input handling. A Python environment with libraries such as PyOpenGL, sounddevice, and relevant visualization tools would be a more suitable choice.\n", + "* Future attempts should focus on setting up a development environment that can accommodate the technical demands of the project before attempting implementation." + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "yYKbRNvXPAug" + }, + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "TdB06AzNwNjG" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "6_TmfNYqwO8v" + }, + "execution_count": 51, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/gemini/prompts/AXEE Synthesis b/gemini/prompts/AXEE Synthesis new file mode 100644 index 0000000..a58a21f --- /dev/null +++ b/gemini/prompts/AXEE Synthesis @@ -0,0 +1,477 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hlI1rYKa2IGx" + }, + "outputs": [], + "source": [ + "# Copyright 2024 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RN8N3O43QDT5" + }, + "source": [ + "# Vertex Prompt Optimizer Notebook UI (Preview)\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Google
Open in Colab\n", + "
\n", + "
\n", + " \n", + " \"Google
Open in Colab Enterprise\n", + "
\n", + "
\n", + " \n", + " \"Vertex
Open in Vertex AI Workbench\n", + "
\n", + "
\n", + " \n", + " \"GitHub
View on GitHub\n", + "
\n", + "
\n", + "\n", + "
\n", + "\n", + "Share to:\n", + "\n", + "\n", + " \"LinkedIn\n", + "\n", + "\n", + "\n", + " \"Bluesky\n", + "\n", + "\n", + "\n", + " \"X\n", + "\n", + "\n", + "\n", + " \"Reddit\n", + "\n", + "\n", + "\n", + " \"Facebook\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pHyuJTFr2IGx" + }, + "source": [ + "# Overview\n", + "This Notebook showcases the Vertex AI prompt optimizer, a tool that iteratively optimizes prompts to suit a target model (e.g., `gemini-2.0-flash`) using target-specific metric(s).\n", + "\n", + "Key Use Cases:\n", + "\n", + "* Prompt Optimization: Enhance the quality of an initial prompt by refining its structure and content to match the target model's optimal input characteristics.\n", + "\n", + "* Prompt Translation: Adapt prompts optimized for one model to work effectively with a different target model.\n", + "\n", + "For the detailed documentation please see [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "61RBz8LLbxCR" + }, + "source": [ + "## Getting Started" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dmWOrTJ3gx13" + }, + "source": [ + "### Authenticate your notebook environment (Colab only)\n", + "\n", + "Authenticate your environment on Google Colab.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NyKGtVQjgx13" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "if \"google.colab\" in sys.modules:\n", + " from google.colab import auth\n", + "\n", + " auth.authenticate_user()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tTtKHedrO1Rx" + }, + "source": [ + "# Step 0: Install packages and libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8-Zw72vFORz_" + }, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/prompts/prompt_optimizer/vapo_lib.py\n", + "import vapo_lib" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-p59jd5rOp4q" + }, + "source": [ + "# Step 1: Create a prompt template and system instructions\n", + "Provide your system intruction and prompt template below. Refer to [here]( https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#template-si) for instructions.\n", + "\n", + "Prompts consist of two key components:\n", + "\n", + "- System Instruction: System instruction is the instruction that get passed to the model before any user input in the prompt. This is the fixed part of the prompt template shared across all queries for a given task.\n", + "- Prompt template: A task is the text in the prompt that you want the model to provide a response for. Context is information that you include in the prompt that the model uses or references when generating a response. These are the dynamic parts of the prompt template that changes based on the task.\n", + "\n", + "Prompt Optimizer enables the optimization or translation of the System Instruction template, while the prompt template remains essential for evaluating and selecting the best System Instruction template." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rJG1pVZO317x" + }, + "outputs": [], + "source": [ + "SYSTEM_INSTRUCTION = (\n", + " \"Answer the following question. Let's think step by step.\\n\" # @param\n", + ")\n", + "PROMPT_TEMPLATE = \"Question: {question}\\n\\nAnswer: {target}\" # @param" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5y-cmg0TQP6v" + }, + "source": [ + "# Step 2: Configure project settings\n", + "To optimize the prompt for your target Google model, provide a CSV or JSONL file containing labeled validation samples (input, ground truth output pairs). Refer to [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#prepare-sample-prompts) for instructions.\n", + "\n", + "Focus on examples that specifically demonstrate the issues you want to address.\n", + "Recommendation: Use 50-100 distinct samples for reliable results. However, the tool can still be effective with as few as 5 samples.\n", + "For prompt translation (e.g. 3P model to Google model, PaLM 2 to Gemini):\n", + "\n", + "Consider using the source model to label examples that the target model struggles with, helping to identify areas for improvement.\n", + "When you select a source model, you don't need to provide labels for the input examples.\n", + "While the source model selection is limited to Google models, it still supports labeled inputs from non-Google models. If you wish to select a non-Google source model, you will need to provide labels for your input examples.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mfgi_oR6tTIB" + }, + "outputs": [], + "source": [ + "# @markdown **Project setup**:
\n", + "PROJECT_ID = \"[YOUR_PROJECT]\" # @param {type:\"string\"}\n", + "LOCATION = \"us-central1\" # @param {type:\"string\"}\n", + "OUTPUT_PATH = \"[OUTPUT_PATH]\" # @param {type:\"string\"}\n", + "INPUT_DATA_PATH = \"[INPUT_DATA_PATH]\" # @param {type:\"string\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ucebZHkHRxKH" + }, + "source": [ + "# Step 3: Configure optimization settings\n", + "The optimization configurations are defaulted to the values that are most commonly used, which we recommend using as the initial set-up.\n", + "\n", + "The most important settings are:\n", + "\n", + "* Target Model: Which model you are trying to optimize your prompts to.\n", + "* Thinking Budget: The thinking budget for thinking models like Gemini-2.5. Default to -1, which means no thinking for non-thinking models and auto thinking for thinking models. Refer [here](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking) to learn more about manual budget settings.\n", + "* Optimization Mode: The mode in which you are trying to optimize your prompt with.\n", + "* Evaluation Metrics: The evaluation metrics in which you are trying to optimize your prompts against.\n", + "* Translation Source Field Name: fill in with the corresponding field name of the source text in the data if translation metrics like Comet or MetricX are selected. Otherwise, leave it as empty.\n", + "\n", + "Note that all evaluation metrics are expected to have the larger-the-better property. Therefore, we have modified the MetricX value to between 0 (worst) and 25 (best).\n", + "Refer [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#configuration) to learn more about the different configuration settings and how to best utilize them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B2R3P8mMvK9q" + }, + "outputs": [], + "source": [ + "TARGET_MODEL = \"gemini-2.0-flash-001\" # @param [\"gemini-2.5-flash-lite\", \"gemini-2.5-flash\", \"gemini-2.5-pro\", \"gemini-2.0-flash-lite-001\", \"gemini-2.0-flash-001\"]\n", + "THINKING_BUDGET = -1 # @param {type:\"integer\"}\n", + "OPTIMIZATION_MODE = \"instruction_and_demo\" # @param [\"instruction\", \"demonstration\", \"instruction_and_demo\"]\n", + "EVAL_METRIC = \"question_answering_correctness\" # @param [\"bleu\", \"coherence\", \"comet\", \"exact_match\", \"fluency\", \"groundedness\", \"metricx\", \"text_quality\", \"verbosity\", \"rouge_1\", \"rouge_2\", \"rouge_l\", \"rouge_l_sum\", \"safety\", \"question_answering_correctness\", \"question_answering_quality\", \"summarization_quality\", \"tool_name_match\", \"tool_parameter_key_match\", \"tool_parameter_kv_match\", \"tool_call_valid\"] {type:\"string\"}\n", + "TRANSLATION_SOURCE_FIELD_NAME = \"\" # @param {type:\"string\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kO7fO0qTSNLs" + }, + "source": [ + "# Step 4: Configure advanced optimization settings [Optional]\n", + "Refer [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#configuration) to learn more about the different configuration settings and how to best utilize them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fRHHTpaV4Xyo" + }, + "outputs": [], + "source": [ + "# @markdown **Instruction Optimization Configs**:
\n", + "NUM_INST_OPTIMIZATION_STEPS = 10 # @param {type:\"integer\"}\n", + "\n", + "# @markdown **Demonstration Optimization Configs**:
\n", + "NUM_DEMO_OPTIMIZATION_STEPS = 10 # @param {type:\"integer\"}\n", + "NUM_DEMO_PER_PROMPT = 3 # @param {type:\"integer\"}\n", + "\n", + "# @markdown **Model Configs**:
\n", + "TARGET_MODEL_QPS = 3.0 # @param {type:\"number\"}\n", + "EVAL_QPS = 3.0 # @param {type:\"number\"}\n", + "\n", + "# @markdown **Multi-metric Configs**:
\n", + "# @markdown Use this section only if you need more than one metric for optimization. This will override the metric you picked above.\n", + "EVAL_METRIC_1 = \"NA\" # @param [\"NA\", \"bleu\", \"coherence\", \"comet\", \"exact_match\", \"fluency\", \"groundedness\", \"metricx\", \"text_quality\", \"verbosity\", \"rouge_1\", \"rouge_2\", \"rouge_l\", \"rouge_l_sum\", \"safety\", \"question_answering_correctness\", \"question_answering_quality\", \"summarization_quality\", \"tool_name_match\", \"tool_parameter_key_match\", \"tool_parameter_kv_match\", \"tool_call_valid\"] {type:\"string\"}\n", + "EVAL_METRIC_1_WEIGHT = 0.0 # @param {type:\"number\"}\n", + "EVAL_METRIC_2 = \"NA\" # @param [\"NA\", \"bleu\", \"coherence\", \"comet\", \"exact_match\", \"fluency\", \"groundedness\", \"metricx\", \"text_quality\", \"verbosity\", \"rouge_1\", \"rouge_2\", \"rouge_l\", \"rouge_l_sum\", \"safety\", \"question_answering_correctness\", \"question_answering_quality\", \"summarization_quality\", \"tool_name_match\", \"tool_parameter_key_match\", \"tool_parameter_kv_match\", \"tool_call_valid\"] {type:\"string\"}\n", + "EVAL_METRIC_2_WEIGHT = 0.0 # @param {type:\"number\"}\n", + "EVAL_METRIC_3 = \"NA\" # @param [\"NA\", \"bleu\", \"coherence\", \"comet\", \"exact_match\", \"fluency\", \"groundedness\", \"metricx\", \"text_quality\", \"verbosity\", \"rouge_1\", \"rouge_2\", \"rouge_l\", \"rouge_l_sum\", \"safety\", \"question_answering_correctness\", \"question_answering_quality\", \"summarization_quality\", \"tool_name_match\", \"tool_parameter_key_match\", \"tool_parameter_kv_match\", \"tool_call_valid\"] {type:\"string\"}\n", + "EVAL_METRIC_3_WEIGHT = 0.0 # @param {type:\"number\"}\n", + "METRIC_AGGREGATION_TYPE = \"weighted_sum\" # @param [\"weighted_sum\", \"weighted_average\"]\n", + "\n", + "# @markdown **Misc Configs**:
\n", + "PLACEHOLDER_TO_VALUE = \"{}\" # @param\n", + "RESPONSE_MIME_TYPE = \"text/plain\" # @param [\"text/plain\", \"application/json\", \"text/x.enum\"] {type:\"string\"}\n", + "RESPONSE_SCHEMA = \"\"\n", + "TARGET_LANGUAGE = \"English\" # @param [\"English\", \"French\", \"German\", \"Hebrew\", \"Hindi\", \"Italian\", \"Japanese\", \"Korean\", \"Portuguese\", \"Simplified Chinese\", \"Spanish\", \"Traditional Chinese\"] {type:\"string\"}\n", + "TOOLS = \"\" # @param\n", + "TOOL_CONFIG = \"\" # @param" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X7Mgb0EHSSFk" + }, + "source": [ + "# Step 5: Run Prompt Optimizer\n", + "A progress bar will appear to let you know how long the job takes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z8NvNLTfxPTf" + }, + "outputs": [], + "source": [ + "import datetime\n", + "import json\n", + "import time\n", + "\n", + "timestamp = datetime.datetime.now().strftime(\"%Y-%m-%dT%H:%M:%S\")\n", + "display_name = f\"pt_{timestamp}\"\n", + "\n", + "label_enforced = vapo_lib.is_run_target_required(\n", + " [\n", + " EVAL_METRIC,\n", + " EVAL_METRIC_1,\n", + " EVAL_METRIC_2,\n", + " EVAL_METRIC_3,\n", + " ],\n", + " \"\",\n", + ")\n", + "input_data_path = f\"{INPUT_DATA_PATH}\"\n", + "vapo_lib.validate_prompt_and_data(\n", + " \"\\n\".join([SYSTEM_INSTRUCTION, PROMPT_TEMPLATE]),\n", + " input_data_path,\n", + " PLACEHOLDER_TO_VALUE,\n", + " label_enforced,\n", + ")\n", + "\n", + "output_path = f\"{OUTPUT_PATH}/{display_name}\"\n", + "\n", + "params = {\n", + " \"project\": PROJECT_ID,\n", + " \"num_steps\": NUM_INST_OPTIMIZATION_STEPS,\n", + " \"system_instruction\": SYSTEM_INSTRUCTION,\n", + " \"prompt_template\": PROMPT_TEMPLATE,\n", + " \"target_model\": TARGET_MODEL,\n", + " \"target_model_qps\": TARGET_MODEL_QPS,\n", + " \"target_model_location\": LOCATION,\n", + " \"optimizer_model_location\": LOCATION,\n", + " \"eval_qps\": EVAL_QPS,\n", + " \"optimization_mode\": OPTIMIZATION_MODE,\n", + " \"num_demo_set_candidates\": NUM_DEMO_OPTIMIZATION_STEPS,\n", + " \"demo_set_size\": NUM_DEMO_PER_PROMPT,\n", + " \"aggregation_type\": METRIC_AGGREGATION_TYPE,\n", + " \"data_limit\": 50,\n", + " \"input_data_path\": input_data_path,\n", + " \"output_path\": output_path,\n", + " \"response_mime_type\": RESPONSE_MIME_TYPE,\n", + " \"response_schema\": RESPONSE_SCHEMA,\n", + " \"language\": TARGET_LANGUAGE,\n", + " \"placeholder_to_content\": json.loads(PLACEHOLDER_TO_VALUE),\n", + " \"tools\": TOOLS,\n", + " \"tool_config\": TOOL_CONFIG,\n", + " \"translation_source_field_name\": TRANSLATION_SOURCE_FIELD_NAME,\n", + " \"thinking_budget\": THINKING_BUDGET,\n", + "}\n", + "\n", + "if EVAL_METRIC_1 == \"NA\":\n", + " params[\"eval_metrics_types\"] = [EVAL_METRIC]\n", + " params[\"eval_metrics_weights\"] = [1.0]\n", + "else:\n", + " metrics = []\n", + " weights = []\n", + " for metric, weight in zip(\n", + " [EVAL_METRIC_1, EVAL_METRIC_2, EVAL_METRIC_3],\n", + " [EVAL_METRIC_1_WEIGHT, EVAL_METRIC_2_WEIGHT, EVAL_METRIC_3_WEIGHT],\n", + " ):\n", + " if metric == \"NA\":\n", + " break\n", + " metrics.append(metric)\n", + " weights.append(weight)\n", + " params[\"eval_metrics_types\"] = metrics\n", + " params[\"eval_metrics_weights\"] = weights\n", + "\n", + "job = vapo_lib.run_apd(params, OUTPUT_PATH, display_name)\n", + "print(f\"Job ID: {job.name}\")\n", + "\n", + "progress_form = vapo_lib.ProgressForm(params)\n", + "while progress_form.monitor_progress(job):\n", + " time.sleep(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lo5mcTzwSgBP" + }, + "source": [ + "# Step 6: Inspect the results\n", + "For a clearer look at the specific responses generated by each prompt template during the optimization process, use the cell below.\n", + "This will allow you to inspect all the predictions made by all the\n", + "generated templates during one or multiple vertex prompt optimizer runs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1x6HSty759jY" + }, + "outputs": [], + "source": [ + "from IPython.display import HTML, display\n", + "\n", + "RESULT_PATH = \"[OUTPUT_PATH]\" # @param {type:\"string\"}\n", + "\n", + "results_ui = vapo_lib.ResultsUI(RESULT_PATH)\n", + "\n", + "results_df_html = \"\"\"\n", + "\n", + "\"\"\"\n", + "\n", + "display(HTML(results_df_html))\n", + "display(results_ui.get_container())" + ] + } + ], + "metadata": { + "colab": { + "name": "vertex_ai_prompt_optimizer_ui.ipynb", + "toc_visible": true, + "provenance": [], + "private_outputs": true, + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/gemini/prompts/prompt_optimizer/AXEE.ipynb b/gemini/prompts/prompt_optimizer/AXEE.ipynb new file mode 100644 index 0000000..a58a21f --- /dev/null +++ b/gemini/prompts/prompt_optimizer/AXEE.ipynb @@ -0,0 +1,477 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hlI1rYKa2IGx" + }, + "outputs": [], + "source": [ + "# Copyright 2024 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RN8N3O43QDT5" + }, + "source": [ + "# Vertex Prompt Optimizer Notebook UI (Preview)\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Google
Open in Colab\n", + "
\n", + "
\n", + " \n", + " \"Google
Open in Colab Enterprise\n", + "
\n", + "
\n", + " \n", + " \"Vertex
Open in Vertex AI Workbench\n", + "
\n", + "
\n", + " \n", + " \"GitHub
View on GitHub\n", + "
\n", + "
\n", + "\n", + "
\n", + "\n", + "Share to:\n", + "\n", + "\n", + " \"LinkedIn\n", + "\n", + "\n", + "\n", + " \"Bluesky\n", + "\n", + "\n", + "\n", + " \"X\n", + "\n", + "\n", + "\n", + " \"Reddit\n", + "\n", + "\n", + "\n", + " \"Facebook\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pHyuJTFr2IGx" + }, + "source": [ + "# Overview\n", + "This Notebook showcases the Vertex AI prompt optimizer, a tool that iteratively optimizes prompts to suit a target model (e.g., `gemini-2.0-flash`) using target-specific metric(s).\n", + "\n", + "Key Use Cases:\n", + "\n", + "* Prompt Optimization: Enhance the quality of an initial prompt by refining its structure and content to match the target model's optimal input characteristics.\n", + "\n", + "* Prompt Translation: Adapt prompts optimized for one model to work effectively with a different target model.\n", + "\n", + "For the detailed documentation please see [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "61RBz8LLbxCR" + }, + "source": [ + "## Getting Started" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dmWOrTJ3gx13" + }, + "source": [ + "### Authenticate your notebook environment (Colab only)\n", + "\n", + "Authenticate your environment on Google Colab.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NyKGtVQjgx13" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "if \"google.colab\" in sys.modules:\n", + " from google.colab import auth\n", + "\n", + " auth.authenticate_user()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tTtKHedrO1Rx" + }, + "source": [ + "# Step 0: Install packages and libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8-Zw72vFORz_" + }, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/prompts/prompt_optimizer/vapo_lib.py\n", + "import vapo_lib" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-p59jd5rOp4q" + }, + "source": [ + "# Step 1: Create a prompt template and system instructions\n", + "Provide your system intruction and prompt template below. Refer to [here]( https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#template-si) for instructions.\n", + "\n", + "Prompts consist of two key components:\n", + "\n", + "- System Instruction: System instruction is the instruction that get passed to the model before any user input in the prompt. This is the fixed part of the prompt template shared across all queries for a given task.\n", + "- Prompt template: A task is the text in the prompt that you want the model to provide a response for. Context is information that you include in the prompt that the model uses or references when generating a response. These are the dynamic parts of the prompt template that changes based on the task.\n", + "\n", + "Prompt Optimizer enables the optimization or translation of the System Instruction template, while the prompt template remains essential for evaluating and selecting the best System Instruction template." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rJG1pVZO317x" + }, + "outputs": [], + "source": [ + "SYSTEM_INSTRUCTION = (\n", + " \"Answer the following question. Let's think step by step.\\n\" # @param\n", + ")\n", + "PROMPT_TEMPLATE = \"Question: {question}\\n\\nAnswer: {target}\" # @param" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5y-cmg0TQP6v" + }, + "source": [ + "# Step 2: Configure project settings\n", + "To optimize the prompt for your target Google model, provide a CSV or JSONL file containing labeled validation samples (input, ground truth output pairs). Refer to [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#prepare-sample-prompts) for instructions.\n", + "\n", + "Focus on examples that specifically demonstrate the issues you want to address.\n", + "Recommendation: Use 50-100 distinct samples for reliable results. However, the tool can still be effective with as few as 5 samples.\n", + "For prompt translation (e.g. 3P model to Google model, PaLM 2 to Gemini):\n", + "\n", + "Consider using the source model to label examples that the target model struggles with, helping to identify areas for improvement.\n", + "When you select a source model, you don't need to provide labels for the input examples.\n", + "While the source model selection is limited to Google models, it still supports labeled inputs from non-Google models. If you wish to select a non-Google source model, you will need to provide labels for your input examples.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mfgi_oR6tTIB" + }, + "outputs": [], + "source": [ + "# @markdown **Project setup**:
\n", + "PROJECT_ID = \"[YOUR_PROJECT]\" # @param {type:\"string\"}\n", + "LOCATION = \"us-central1\" # @param {type:\"string\"}\n", + "OUTPUT_PATH = \"[OUTPUT_PATH]\" # @param {type:\"string\"}\n", + "INPUT_DATA_PATH = \"[INPUT_DATA_PATH]\" # @param {type:\"string\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ucebZHkHRxKH" + }, + "source": [ + "# Step 3: Configure optimization settings\n", + "The optimization configurations are defaulted to the values that are most commonly used, which we recommend using as the initial set-up.\n", + "\n", + "The most important settings are:\n", + "\n", + "* Target Model: Which model you are trying to optimize your prompts to.\n", + "* Thinking Budget: The thinking budget for thinking models like Gemini-2.5. Default to -1, which means no thinking for non-thinking models and auto thinking for thinking models. Refer [here](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking) to learn more about manual budget settings.\n", + "* Optimization Mode: The mode in which you are trying to optimize your prompt with.\n", + "* Evaluation Metrics: The evaluation metrics in which you are trying to optimize your prompts against.\n", + "* Translation Source Field Name: fill in with the corresponding field name of the source text in the data if translation metrics like Comet or MetricX are selected. Otherwise, leave it as empty.\n", + "\n", + "Note that all evaluation metrics are expected to have the larger-the-better property. Therefore, we have modified the MetricX value to between 0 (worst) and 25 (best).\n", + "Refer [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#configuration) to learn more about the different configuration settings and how to best utilize them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B2R3P8mMvK9q" + }, + "outputs": [], + "source": [ + "TARGET_MODEL = \"gemini-2.0-flash-001\" # @param [\"gemini-2.5-flash-lite\", \"gemini-2.5-flash\", \"gemini-2.5-pro\", \"gemini-2.0-flash-lite-001\", \"gemini-2.0-flash-001\"]\n", + "THINKING_BUDGET = -1 # @param {type:\"integer\"}\n", + "OPTIMIZATION_MODE = \"instruction_and_demo\" # @param [\"instruction\", \"demonstration\", \"instruction_and_demo\"]\n", + "EVAL_METRIC = \"question_answering_correctness\" # @param [\"bleu\", \"coherence\", \"comet\", \"exact_match\", \"fluency\", \"groundedness\", \"metricx\", \"text_quality\", \"verbosity\", \"rouge_1\", \"rouge_2\", \"rouge_l\", \"rouge_l_sum\", \"safety\", \"question_answering_correctness\", \"question_answering_quality\", \"summarization_quality\", \"tool_name_match\", \"tool_parameter_key_match\", \"tool_parameter_kv_match\", \"tool_call_valid\"] {type:\"string\"}\n", + "TRANSLATION_SOURCE_FIELD_NAME = \"\" # @param {type:\"string\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kO7fO0qTSNLs" + }, + "source": [ + "# Step 4: Configure advanced optimization settings [Optional]\n", + "Refer [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#configuration) to learn more about the different configuration settings and how to best utilize them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fRHHTpaV4Xyo" + }, + "outputs": [], + "source": [ + "# @markdown **Instruction Optimization Configs**:
\n", + "NUM_INST_OPTIMIZATION_STEPS = 10 # @param {type:\"integer\"}\n", + "\n", + "# @markdown **Demonstration Optimization Configs**:
\n", + "NUM_DEMO_OPTIMIZATION_STEPS = 10 # @param {type:\"integer\"}\n", + "NUM_DEMO_PER_PROMPT = 3 # @param {type:\"integer\"}\n", + "\n", + "# @markdown **Model Configs**:
\n", + "TARGET_MODEL_QPS = 3.0 # @param {type:\"number\"}\n", + "EVAL_QPS = 3.0 # @param {type:\"number\"}\n", + "\n", + "# @markdown **Multi-metric Configs**:
\n", + "# @markdown Use this section only if you need more than one metric for optimization. This will override the metric you picked above.\n", + "EVAL_METRIC_1 = \"NA\" # @param [\"NA\", \"bleu\", \"coherence\", \"comet\", \"exact_match\", \"fluency\", \"groundedness\", \"metricx\", \"text_quality\", \"verbosity\", \"rouge_1\", \"rouge_2\", \"rouge_l\", \"rouge_l_sum\", \"safety\", \"question_answering_correctness\", \"question_answering_quality\", \"summarization_quality\", \"tool_name_match\", \"tool_parameter_key_match\", \"tool_parameter_kv_match\", \"tool_call_valid\"] {type:\"string\"}\n", + "EVAL_METRIC_1_WEIGHT = 0.0 # @param {type:\"number\"}\n", + "EVAL_METRIC_2 = \"NA\" # @param [\"NA\", \"bleu\", \"coherence\", \"comet\", \"exact_match\", \"fluency\", \"groundedness\", \"metricx\", \"text_quality\", \"verbosity\", \"rouge_1\", \"rouge_2\", \"rouge_l\", \"rouge_l_sum\", \"safety\", \"question_answering_correctness\", \"question_answering_quality\", \"summarization_quality\", \"tool_name_match\", \"tool_parameter_key_match\", \"tool_parameter_kv_match\", \"tool_call_valid\"] {type:\"string\"}\n", + "EVAL_METRIC_2_WEIGHT = 0.0 # @param {type:\"number\"}\n", + "EVAL_METRIC_3 = \"NA\" # @param [\"NA\", \"bleu\", \"coherence\", \"comet\", \"exact_match\", \"fluency\", \"groundedness\", \"metricx\", \"text_quality\", \"verbosity\", \"rouge_1\", \"rouge_2\", \"rouge_l\", \"rouge_l_sum\", \"safety\", \"question_answering_correctness\", \"question_answering_quality\", \"summarization_quality\", \"tool_name_match\", \"tool_parameter_key_match\", \"tool_parameter_kv_match\", \"tool_call_valid\"] {type:\"string\"}\n", + "EVAL_METRIC_3_WEIGHT = 0.0 # @param {type:\"number\"}\n", + "METRIC_AGGREGATION_TYPE = \"weighted_sum\" # @param [\"weighted_sum\", \"weighted_average\"]\n", + "\n", + "# @markdown **Misc Configs**:
\n", + "PLACEHOLDER_TO_VALUE = \"{}\" # @param\n", + "RESPONSE_MIME_TYPE = \"text/plain\" # @param [\"text/plain\", \"application/json\", \"text/x.enum\"] {type:\"string\"}\n", + "RESPONSE_SCHEMA = \"\"\n", + "TARGET_LANGUAGE = \"English\" # @param [\"English\", \"French\", \"German\", \"Hebrew\", \"Hindi\", \"Italian\", \"Japanese\", \"Korean\", \"Portuguese\", \"Simplified Chinese\", \"Spanish\", \"Traditional Chinese\"] {type:\"string\"}\n", + "TOOLS = \"\" # @param\n", + "TOOL_CONFIG = \"\" # @param" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X7Mgb0EHSSFk" + }, + "source": [ + "# Step 5: Run Prompt Optimizer\n", + "A progress bar will appear to let you know how long the job takes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z8NvNLTfxPTf" + }, + "outputs": [], + "source": [ + "import datetime\n", + "import json\n", + "import time\n", + "\n", + "timestamp = datetime.datetime.now().strftime(\"%Y-%m-%dT%H:%M:%S\")\n", + "display_name = f\"pt_{timestamp}\"\n", + "\n", + "label_enforced = vapo_lib.is_run_target_required(\n", + " [\n", + " EVAL_METRIC,\n", + " EVAL_METRIC_1,\n", + " EVAL_METRIC_2,\n", + " EVAL_METRIC_3,\n", + " ],\n", + " \"\",\n", + ")\n", + "input_data_path = f\"{INPUT_DATA_PATH}\"\n", + "vapo_lib.validate_prompt_and_data(\n", + " \"\\n\".join([SYSTEM_INSTRUCTION, PROMPT_TEMPLATE]),\n", + " input_data_path,\n", + " PLACEHOLDER_TO_VALUE,\n", + " label_enforced,\n", + ")\n", + "\n", + "output_path = f\"{OUTPUT_PATH}/{display_name}\"\n", + "\n", + "params = {\n", + " \"project\": PROJECT_ID,\n", + " \"num_steps\": NUM_INST_OPTIMIZATION_STEPS,\n", + " \"system_instruction\": SYSTEM_INSTRUCTION,\n", + " \"prompt_template\": PROMPT_TEMPLATE,\n", + " \"target_model\": TARGET_MODEL,\n", + " \"target_model_qps\": TARGET_MODEL_QPS,\n", + " \"target_model_location\": LOCATION,\n", + " \"optimizer_model_location\": LOCATION,\n", + " \"eval_qps\": EVAL_QPS,\n", + " \"optimization_mode\": OPTIMIZATION_MODE,\n", + " \"num_demo_set_candidates\": NUM_DEMO_OPTIMIZATION_STEPS,\n", + " \"demo_set_size\": NUM_DEMO_PER_PROMPT,\n", + " \"aggregation_type\": METRIC_AGGREGATION_TYPE,\n", + " \"data_limit\": 50,\n", + " \"input_data_path\": input_data_path,\n", + " \"output_path\": output_path,\n", + " \"response_mime_type\": RESPONSE_MIME_TYPE,\n", + " \"response_schema\": RESPONSE_SCHEMA,\n", + " \"language\": TARGET_LANGUAGE,\n", + " \"placeholder_to_content\": json.loads(PLACEHOLDER_TO_VALUE),\n", + " \"tools\": TOOLS,\n", + " \"tool_config\": TOOL_CONFIG,\n", + " \"translation_source_field_name\": TRANSLATION_SOURCE_FIELD_NAME,\n", + " \"thinking_budget\": THINKING_BUDGET,\n", + "}\n", + "\n", + "if EVAL_METRIC_1 == \"NA\":\n", + " params[\"eval_metrics_types\"] = [EVAL_METRIC]\n", + " params[\"eval_metrics_weights\"] = [1.0]\n", + "else:\n", + " metrics = []\n", + " weights = []\n", + " for metric, weight in zip(\n", + " [EVAL_METRIC_1, EVAL_METRIC_2, EVAL_METRIC_3],\n", + " [EVAL_METRIC_1_WEIGHT, EVAL_METRIC_2_WEIGHT, EVAL_METRIC_3_WEIGHT],\n", + " ):\n", + " if metric == \"NA\":\n", + " break\n", + " metrics.append(metric)\n", + " weights.append(weight)\n", + " params[\"eval_metrics_types\"] = metrics\n", + " params[\"eval_metrics_weights\"] = weights\n", + "\n", + "job = vapo_lib.run_apd(params, OUTPUT_PATH, display_name)\n", + "print(f\"Job ID: {job.name}\")\n", + "\n", + "progress_form = vapo_lib.ProgressForm(params)\n", + "while progress_form.monitor_progress(job):\n", + " time.sleep(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lo5mcTzwSgBP" + }, + "source": [ + "# Step 6: Inspect the results\n", + "For a clearer look at the specific responses generated by each prompt template during the optimization process, use the cell below.\n", + "This will allow you to inspect all the predictions made by all the\n", + "generated templates during one or multiple vertex prompt optimizer runs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1x6HSty759jY" + }, + "outputs": [], + "source": [ + "from IPython.display import HTML, display\n", + "\n", + "RESULT_PATH = \"[OUTPUT_PATH]\" # @param {type:\"string\"}\n", + "\n", + "results_ui = vapo_lib.ResultsUI(RESULT_PATH)\n", + "\n", + "results_df_html = \"\"\"\n", + "\n", + "\"\"\"\n", + "\n", + "display(HTML(results_df_html))\n", + "display(results_ui.get_container())" + ] + } + ], + "metadata": { + "colab": { + "name": "vertex_ai_prompt_optimizer_ui.ipynb", + "toc_visible": true, + "provenance": [], + "private_outputs": true, + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/Getting_started_with_google_colab_ai.ipynb b/notebooks/Getting_started_with_google_colab_ai.ipynb new file mode 100644 index 0000000..ea523ac --- /dev/null +++ b/notebooks/Getting_started_with_google_colab_ai.ipynb @@ -0,0 +1,214 @@ +{ + "cells": [ + { + "metadata": { + "id": "wdj9RMfoGPC2" + }, + "cell_type": "markdown", + "source": [ + "Colab is making it easier than ever to integrate powerful Generative AI capabilities into your projects. We are launching public preview for a simple and intuitive Python library (google.colab.ai) to access state-of-the-art language models directly within Pro and Pro+ subscriber Colab environments. This means subscribers can spend less time on configuration and set up and more time bringing their ideas to life. With just a few lines of code, you can now perform a variety of tasks:\n", + "- Generate text\n", + "- Translate languages\n", + "- Write creative content\n", + "- Categorize text\n", + "\n", + "Happy Coding!\n", + "\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colabtools/blob/main/notebooks/Getting_started_with_google_colab_ai.ipynb)" + ] + }, + { + "metadata": { + "id": "Ucchuu5vV3Jp", + "outputId": "a3004031-0e0c-4f8d-842d-a79421f51e11", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "cell_type": "code", + "source": [ + "# @title List available models\n", + "from google.colab import ai\n", + "\n", + "ai.list_models()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['google/gemini-2.0-flash',\n", + " 'google/gemini-2.0-flash-lite',\n", + " 'google/gemini-2.5-flash',\n", + " 'google/gemini-2.5-flash-lite',\n", + " 'google/gemini-2.5-pro',\n", + " 'google/gemma-3-12b',\n", + " 'google/gemma-3-1b',\n", + " 'google/gemma-3-27b',\n", + " 'google/gemma-3-4b']" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ], + "execution_count": 27 + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "9D74LOIiggIt" + }, + "execution_count": 27, + "outputs": [] + }, + { + "metadata": { + "id": "LjfCGEpzDsD9" + }, + "cell_type": "markdown", + "source": [ + "Choosing a Model\n", + "The model names give you a hint about their capabilities and intended use:\n", + "\n", + "Pro: These are the most capable models, ideal for complex reasoning, creative tasks, and detailed analysis.\n", + "\n", + "Flash: These models are optimized for high speed and efficiency, making them great for summarization, chat applications, and tasks requiring rapid responses.\n", + "\n", + "Gemma: These are lightweight, open-weight models suitable for a variety of text generation tasks and are great for experimentation." + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "8fjql8kzgin1" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "cNSefuDZgjqV" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "0_KsQAqsgkW9" + }, + "execution_count": 27, + "outputs": [] + }, + { + "metadata": { + "id": "ysDdFbH_Dgtz" + }, + "cell_type": "markdown", + "source": [ + "For longer text generations, you can stream the response. This displays the output token by token as it's generated, rather than waiting for the entire response to complete. This provides a more interactive and responsive experience. To enable this, simply set stream=True." + ] + }, + { + "metadata": { + "id": "CpMmpaVClSBV", + "cellView": "form" + }, + "cell_type": "code", + "source": [ + "#@title Text formatting setup\n", + "#code is not necessary for colab.ai, but is useful in fomatting text chunks\n", + "import sys\n", + "\n", + "class LineWrapper:\n", + " def __init__(self, max_length=80):\n", + " self.max_length = max_length\n", + " self.current_line_length = 0\n", + "\n", + " def print(self, text_chunk):\n", + " i = 0\n", + " n = len(text_chunk)\n", + " while i < n:\n", + " start_index = i\n", + " while i < n and text_chunk[i] not in ' \\n': # Find end of word\n", + " i += 1\n", + " current_word = text_chunk[start_index:i]\n", + "\n", + " delimiter = \"\"\n", + " if i < n: # If not end of chunk, we found a delimiter\n", + " delimiter = text_chunk[i]\n", + " i += 1 # Consume delimiter\n", + "\n", + " if current_word:\n", + " needs_leading_space = (self.current_line_length > 0)\n", + "\n", + " # Case 1: Word itself is too long for a line (must be broken)\n", + " if len(current_word) > self.max_length:\n", + " if needs_leading_space: # Newline if current line has content\n", + " sys.stdout.write('\\n')\n", + " self.current_line_length = 0\n", + " for char_val in current_word: # Break the long word\n", + " if self.current_line_length >= self.max_length:\n", + " sys.stdout.write('\\n')\n", + " self.current_line_length = 0\n", + " sys.stdout.write(char_val)\n", + " self.current_line_length += 1\n", + " # Case 2: Word doesn't fit on current line (print on new line)\n", + " elif self.current_line_length + (1 if needs_leading_space else 0) + len(current_word) > self.max_length:\n", + " sys.stdout.write('\\n')\n", + " sys.stdout.write(current_word)\n", + " self.current_line_length = len(current_word)\n", + " # Case 3: Word fits on current line\n", + " else:\n", + " if needs_leading_space:\n", + " # Define punctuation that should not have a leading space\n", + " # when they form an entire \"word\" (token) following another word.\n", + " no_leading_space_punctuation = {\n", + " \",\", \".\", \";\", \":\", \"!\", \"?\", # Standard sentence punctuation\n", + " \")\", \"]\", \"}\", # Closing brackets\n", + " \"'s\", \"'S\", \"'re\", \"'RE\", \"'ve\", \"'VE\", # Common contractions\n", + " \"'m\", \"'M\", \"'ll\", \"'LL\", \"'d\", \"'D\",\n", + " \"n't\", \"N'T\",\n", + " \"...\", \"…\" # Ellipses\n", + " }\n", + " if current_word not in no_leading_space_punctuation:\n", + " sys.stdout.write(' ')\n", + " self.current_line_length += 1\n", + " sys.stdout.write(current_word)\n", + " self.current_line_length += len(current_word)\n", + "\n", + " if delimiter == '\\n':\n", + " sys.stdout.write('\\n')\n", + " self.current_line_length = 0\n", + " elif delimiter == ' ':\n", + " # If line is full and a space delimiter arrives, it implies a wrap.\n", + " if self.current_line_length >= self.max_length:\n", + " sys.stdout.write('\\n')\n", + " self.current_line_length = 0\n", + "\n", + " sys.stdout.flush()\n" + ], + "outputs": [], + "execution_count": 29 + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/quickstarts/Get_started_LiveAPI.ipynb b/quickstarts/Get_started_LiveAPI.ipynb new file mode 100644 index 0000000..157e32b --- /dev/null +++ b/quickstarts/Get_started_LiveAPI.ipynb @@ -0,0 +1,1039 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2025 Google LLC." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R5DkeFMP75as" + }, + "source": [ + "# Multimodal Live API - Quickstart" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tqktCVDm1yFo" + }, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iS0rHk3RBrtA" + }, + "source": [ + "**Preview**: The Live API is in preview.\n", + "\n", + "This notebook demonstrates simple usage of the Gemini Multimodal Live API. For an overview of new capabilities refer to the [Gemini Live API docs](https://ai.google.dev/gemini-api/docs/live).\n", + "\n", + "This notebook implements a simple turn-based chat where you send messages as text, and the model replies with audio. The API is capable of much more than that. The goal here is to demonstrate with **simple code**.\n", + "\n", + "Some features of the API are not working in Colab, to try them it is recommended to have a look at this [Python script](./Get_started_LiveAPI.py) and run it locally.\n", + "\n", + "If you aren't looking for code, and just want to try multimedia streaming use [Live API in Google AI Studio](https://aistudio.google.com/app/live).\n", + "\n", + "The [Next steps](#next_steps) section at the end of this tutorial provides links to additional resources.\n", + "\n", + "#### Native audio output\n", + "\n", + "**Info**: Gemini 2.5 introduces [native audio generation](https://ai.google.dev/gemini-api/docs/live#native-audio-output), which directly generates audio output, providing a more natural sounding audio, more expressive voices, more awareness of additional context, e.g., tone, and more proactive responses. You can try a native audio example in this [script](./Get_started_LiveAPI_NativeAudio.py)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mfk6YY3G5kqp" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d5027929de8f" + }, + "source": [ + "### Install SDK\n", + "\n", + "The new **[Google Gen AI SDK](https://ai.google.dev/gemini-api/docs/sdks)** provides programmatic access to Gemini 2.5 (and previous models) using both the [Google AI for Developers](https://ai.google.dev/gemini-api/docs) and [Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/overview) APIs. With a few exceptions, code that runs on one platform will run on both.\n", + "\n", + "More details about this new SDK on the [documentation](https://ai.google.dev/gemini-api/docs/sdks) or in the [Getting started](../quickstarts/Get_started.ipynb) notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "46zEFO2a9FFd", + "outputId": "444618bd-a432-428b-b15a-d3bfcef91065" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 25.1.1 -> 25.2\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "%pip install -U -q google-genai" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CTIfnvCn9HvH" + }, + "source": [ + "### Set up your API key\n", + "\n", + "To run the following cell, your API key must be stored in a Colab Secret named `GOOGLE_API_KEY`. If you don't already have an API key, or you're not sure how to create a Colab Secret, see [Authentication](../quickstarts/Authentication.ipynb) for an example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "A1pkoyZb9Jm3" + }, + "outputs": [], + "source": [ + "from google.colab import userdata\n", + "import os\n", + "\n", + "os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3Hx_Gw9i0Yuv" + }, + "source": [ + "### Initialize SDK client\n", + "\n", + "The client will pick up your API key from the environment variable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HghvVpbU0Uap" + }, + "outputs": [], + "source": [ + "from google import genai\n", + "from google.genai import types\n", + "client = genai.Client(api_key=GOOGLE_API_KEY)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QOov6dpG99rY" + }, + "source": [ + "### Select a model\n", + "\n", + "The [Gemini 2.5 Flash Live](https://ai.google.dev/gemini-api/docs/models#live-api) model works with the Live API to enable low-latency bidirectional voice and video interactions with Gemini. The model can process text, audio, and video input, and it can provide text and audio output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "27Fikag0xSaB" + }, + "outputs": [], + "source": [ + "MODEL = \"gemini-2.0-flash-live-001\" # @param [\"gemini-2.0-flash-live-001\", \"gemini-live-2.5-flash-preview\",\"gemini-2.5-flash-preview-native-audio-dialog\"] {\"allow-input\":true, isTemplate: true}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GOOZsm7i9io6" + }, + "source": [ + "### Import\n", + "\n", + "Import all the necessary modules." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Yd1vs3cP8EmS" + }, + "outputs": [], + "source": [ + "import asyncio\n", + "import base64\n", + "import contextlib\n", + "import datetime\n", + "import os\n", + "import json\n", + "import wave\n", + "import itertools\n", + "\n", + "from IPython.display import display, Audio\n", + "\n", + "from google import genai\n", + "from google.genai import types" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj7gDzfDOq4h" + }, + "source": [ + "## Text to Text\n", + "\n", + "The simplest way to use the Live API is as a text-to-text chat interface, but it can do **a lot** more than this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dDfslcyIOqgI", + "outputId": "ae08f423-be20-4e05-876e-479485cf7004" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> Hello? Gemini are you there? \n", + "\n", + "- Hello\n", + "- there! I am indeed here. How can I help you today?\n" + ] + } + ], + "source": [ + "config={\n", + " \"response_modalities\": [\"TEXT\"]\n", + "}\n", + "\n", + "async with client.aio.live.connect(model=MODEL, config=config) as session:\n", + " message = \"Hello? Gemini are you there?\"\n", + " print(\"> \", message, \"\\n\")\n", + " await session.send_client_content(\n", + " turns={\"role\": \"user\", \"parts\": [{\"text\": message}]}, turn_complete=True\n", + " )\n", + "\n", + " # For text responses, When the model's turn is complete it breaks out of the loop.\n", + " turn = session.receive()\n", + " async for chunk in turn:\n", + " if chunk.text is not None:\n", + " print(f'- {chunk.text}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rvpmur4lKfOv" + }, + "source": [ + "## Simple text to audio" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jjkzgogvG1q0" + }, + "source": [ + "The simplest way to playback the audio in Colab, is to write it out to a `.wav` file. So here is a simple wave file writer:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7mEDGwJfLRrm" + }, + "outputs": [], + "source": [ + "@contextlib.contextmanager\n", + "def wave_file(filename, channels=1, rate=24000, sample_width=2):\n", + " with wave.open(filename, \"wb\") as wf:\n", + " wf.setnchannels(channels)\n", + " wf.setsampwidth(sample_width)\n", + " wf.setframerate(rate)\n", + " yield wf" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DGuKQSurN7F4" + }, + "source": [ + "The next step is to tell the model to return audio by setting `\"response_modalities\": [\"AUDIO\"]` in the `LiveConnectConfig`. \n", + "\n", + "When you get a response from the model, then you write out the data to a `.wav` file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VFD4VleVKj1-", + "outputId": "6d5420ae-8d9b-4ab4-c048-7c176aa4c5f4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> Hello? Gemini are you there? \n", + "\n", + "audio/pcm;rate=24000\n", + "................" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "config={\n", + " \"response_modalities\": [\"AUDIO\"]\n", + "}\n", + "\n", + "async def async_enumerate(aiterable):\n", + " n=0\n", + " async for item in aiterable:\n", + " yield n, item\n", + " n+=1\n", + "\n", + "\n", + "async with client.aio.live.connect(model=MODEL, config=config) as session:\n", + " file_name = 'audio.wav'\n", + " with wave_file(file_name) as wav:\n", + " message = \"Hello? Gemini are you there?\"\n", + " print(\"> \", message, \"\\n\")\n", + " await session.send_client_content(\n", + " turns={\"role\": \"user\", \"parts\": [{\"text\": message}]}, turn_complete=True\n", + " )\n", + "\n", + " turn = session.receive()\n", + " async for n,response in async_enumerate(turn):\n", + " if response.data is not None:\n", + " wav.writeframes(response.data)\n", + "\n", + " if n==0:\n", + " print(response.server_content.model_turn.parts[0].inline_data.mime_type)\n", + " print('.', end='')\n", + "\n", + "\n", + "display(Audio(file_name, autoplay=True))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QutDG7r78Zf-" + }, + "source": [ + "## Towards Async Tasks\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YfEQZrtZY_90" + }, + "source": [ + "The real power of the Live API is that it's real time, and interruptable. You can't get that full power in a simple sequence of steps. To really use the functionality you will move the `send` and `recieve` operations (and others) into their own [async tasks](https://docs.python.org/3/library/asyncio-task.html).\n", + "\n", + "Because of the limitations of Colab this tutorial doesn't totally implement the interactive async tasks, but it does implement the next step in that direction:\n", + "\n", + "- It separates the `send` and `receive`, but still runs them sequentially. \n", + "- In the next tutorial you'll run these in separate `async` tasks.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QUBet__tZF0o" + }, + "source": [ + "Setup a quick logger to make debugging easier (switch to `setLevel('DEBUG')` to see debugging messages)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bWTaU8j-X3AJ" + }, + "outputs": [], + "source": [ + "import logging\n", + "\n", + "logger = logging.getLogger('Live')\n", + "logger.setLevel('INFO')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ERqyY0IFN8G9" + }, + "source": [ + "The class below implements the interaction with the Live API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3zAjMOZXFuxI" + }, + "outputs": [], + "source": [ + "class AudioLoop:\n", + " def __init__(self, turns=None, config=None):\n", + " self.session = None\n", + " self.index = 0\n", + " self.turns = turns\n", + " if config is None:\n", + " config={\n", + " \"response_modalities\": [\"AUDIO\"]}\n", + " self.config = config\n", + "\n", + " async def run(self):\n", + " logger.debug('connect')\n", + " async with client.aio.live.connect(model=MODEL, config=self.config) as session:\n", + " self.session = session\n", + "\n", + " async for sent in self.send():\n", + " # Ideally send and recv would be separate tasks.\n", + " await self.recv()\n", + "\n", + " async def _iter(self):\n", + " if self.turns:\n", + " for text in self.turns:\n", + " print(\"message >\", text)\n", + " yield text\n", + " else:\n", + " print(\"Type 'q' to quit\")\n", + " while True:\n", + " text = await asyncio.to_thread(input, \"message > \")\n", + "\n", + " # If the input returns 'q' quit.\n", + " if text.lower() == 'q':\n", + " break\n", + "\n", + " yield text\n", + "\n", + " async def send(self):\n", + " async for text in self._iter():\n", + " logger.debug('send')\n", + "\n", + " # Send the message to the model.\n", + " await self.session.send_client_content(\n", + " turns={\"role\": \"user\", \"parts\": [{\"text\": text}]}, turn_complete=True\n", + " )\n", + " logger.debug('sent')\n", + " yield text\n", + "\n", + " async def recv(self):\n", + " # Start a new `.wav` file.\n", + " file_name = f\"audio_{self.index}.wav\"\n", + " with wave_file(file_name) as wav:\n", + " self.index += 1\n", + "\n", + " logger.debug('receive')\n", + "\n", + " # Read chunks from the socket.\n", + " turn = self.session.receive()\n", + " async for n, response in async_enumerate(turn):\n", + " logger.debug(f'got chunk: {str(response)}')\n", + "\n", + " if response.data is None:\n", + " logger.debug(f'Unhandled server message! - {response}')\n", + " else:\n", + " wav.writeframes(response.data)\n", + " if n == 0:\n", + " print(response.server_content.model_turn.parts[0].inline_data.mime_type)\n", + " print('.', end='')\n", + "\n", + " print('\\n')\n", + "\n", + " display(Audio(file_name, autoplay=True))\n", + " await asyncio.sleep(2)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AwNPuC_rAHAc" + }, + "source": [ + "There are 3 methods worth describing here:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tXPhEdHIPBif" + }, + "source": [ + "**`run` - The main loop**\n", + "\n", + "This method:\n", + "\n", + "- Opens a `websocket` connecting to the Live API.\n", + "- Calls the initial `setup` method.\n", + "- Then enters the main loop where it alternates between `send` and `recv` until send returns `False`.\n", + "- The next tutorial will demonstrate how to stream media and run these asynchronously." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oCg1qFf0PV44" + }, + "source": [ + "**`send` - Sends input text to the api**\n", + "\n", + "The `send` method collects input text from the user, wraps it in a `client_content` message (an instance of `BidiGenerateContentClientContent`), and sends it to the model.\n", + "\n", + "If the user sends a `q` this method returns `False` to signal that it's time to quit." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tLukmBhPPib4" + }, + "source": [ + "**`recv` - Collects audio from the API and plays it**\n", + "\n", + "The `recv` method collects audio chunks in a loop and writes them to a `.wav` file. It breaks out of the loop once the model sends a `turn_complete` method, and then plays the audio.\n", + "\n", + "To keep things simple in Colab it collects **all** the audio before playing it. [Other examples](#next_steps) demonstrate how to play audio as soon as you start to receive it (using `PyAudio`), and how to interrupt the model (implement input and audio playback on separate tasks)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gGYtiV2N8b2o" + }, + "source": [ + "### Run\n", + "\n", + "Run it:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WxdwgTKIGIlY", + "outputId": "dfc710c3-f0f3-4ff0-9e10-75edaedf63d7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "message > Hello\n", + "audio/pcm;rate=24000\n", + "....................\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "message > What's your name?\n", + "audio/pcm;rate=24000\n", + "..........\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "await AudioLoop(['Hello', \"What's your name?\"]).run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uZD24TXrcsfk" + }, + "source": [ + "## Working with resumable sessions\n", + "\n", + "Session resumption allows you to return to a previous interaction with the Live API by sending the last session handle you got from the previous session.\n", + "\n", + "When you set your session to be resumable, the session information keeps stored on the Live API for up to 24 hours. In this time window, you can resume the conversation and refer to previous information you have shared with the model.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qveK8jxae_x0" + }, + "source": [ + "### Helper functions\n", + "\n", + "Start by creating the helper functions for your resumable interaction with the Live API. It will include:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cbkoDa1ve_C5" + }, + "outputs": [], + "source": [ + "import asyncio\n", + "import traceback\n", + "from asyncio.exceptions import CancelledError\n", + "\n", + "last_handle = None\n", + "\n", + "MODEL = \"gemini-live-2.5-flash-preview\"\n", + "\n", + "client = genai.Client(api_key=GOOGLE_API_KEY)\n", + "\n", + "async def async_enumerate(aiterable):\n", + " n=0\n", + " async for item in aiterable:\n", + " yield n, item\n", + " n+=1\n", + "\n", + "\n", + "def show_response(response):\n", + " new_handle = None\n", + " if text := response.text:\n", + " print(text, end=\"\")\n", + " else:\n", + " print(response.model_dump_json(indent=2, exclude_none=True))\n", + " if response.session_resumption_update:\n", + " new_handle = response.session_resumption_update.new_handle\n", + " return new_handle\n", + "\n", + "\n", + "async def clock():\n", + " time = 0\n", + " while True:\n", + " await asyncio.sleep(60)\n", + " time += 1\n", + " print(f\"{time}:00\")\n", + "\n", + "\n", + "async def recv(session):\n", + " global last_handle\n", + " try:\n", + " while True:\n", + " async for response in session.receive():\n", + " new_handle = show_response(response)\n", + " if new_handle:\n", + " last_handle = new_handle\n", + " except asyncio.CancelledError:\n", + " pass\n", + "\n", + "\n", + "async def send(session):\n", + " while True:\n", + " message = await asyncio.to_thread(input, \"message > \")\n", + " if message.lower() == \"q\":\n", + " break\n", + " await session.send_client_content(turns={\n", + " 'role': 'user',\n", + " 'parts': [{'text': message}]\n", + " })\n", + "\n", + "\n", + "async def async_main(last_handle=None):\n", + " config = types.LiveConnectConfig.model_validate({\n", + " \"response_modalities\": [\"TEXT\"],\n", + " \"session_resumption\": {\n", + " 'handle': last_handle,\n", + " }\n", + " })\n", + " try:\n", + " async with (\n", + " client.aio.live.connect(model=MODEL, config=config) as session,\n", + " asyncio.TaskGroup() as tg\n", + " ):\n", + " clock_task = tg.create_task(clock())\n", + " recv_task = tg.create_task(recv(session))\n", + " send_task = tg.create_task(send(session))\n", + " await send_task\n", + " raise asyncio.CancelledError()\n", + " except asyncio.CancelledError:\n", + " pass\n", + " except ExceptionGroup as EG:\n", + " traceback.print_exception(EG)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xsPClQxUkO9V" + }, + "source": [ + "Now you can start interacting with the Live API (type `q` to finish the conversation):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yqBTtKvGmKI4", + "outputId": "fda0516a-7ea2-4e6e-9371-bb3a8a3280bf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"session_resumption_update\": {}\n", + "}\n", + "Hello there! How can I help you today?{\n", + " \"server_content\": {\n", + " \"generation_complete\": true\n", + " }\n", + "}\n", + "{\n", + " \"server_content\": {\n", + " \"turn_complete\": true\n", + " },\n", + " \"usage_metadata\": {\n", + " \"prompt_token_count\": 9,\n", + " \"response_token_count\": 10,\n", + " \"total_token_count\": 19,\n", + " \"prompt_tokens_details\": [\n", + " {\n", + " \"modality\": \"TEXT\",\n", + " \"token_count\": 9\n", + " }\n", + " ],\n", + " \"response_tokens_details\": [\n", + " {\n", + " \"modality\": \"TEXT\",\n", + " \"token_count\": 10\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"session_resumption_update\": {\n", + " \"new_handle\": \"Cig2N3lqa3d3MXd4eHFoeDk3cnhmeHUydjlhdHN2cms1bDRnc3c0N2Zq\",\n", + " \"resumable\": true\n", + " }\n", + "}\n", + "1:00\n", + "{\n", + " \"session_resumption_update\": {}\n", + "}\n", + "The capital of Brazil is **Brasília**.{\n", + " \"server_content\": {\n", + " \"generation_complete\": true\n", + " }\n", + "}\n", + "{\n", + " \"server_content\": {\n", + " \"turn_complete\": true\n", + " },\n", + " \"usage_metadata\": {\n", + " \"prompt_token_count\": 36,\n", + " \"response_token_count\": 9,\n", + " \"total_token_count\": 45,\n", + " \"prompt_tokens_details\": [\n", + " {\n", + " \"modality\": \"TEXT\",\n", + " \"token_count\": 36\n", + " }\n", + " ],\n", + " \"response_tokens_details\": [\n", + " {\n", + " \"modality\": \"TEXT\",\n", + " \"token_count\": 9\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"session_resumption_update\": {\n", + " \"new_handle\": \"Cig0ZDR1OTViNHVjOWh6aGJvMmhwdWk3NzJiZWRwYW91bnNtajgxZHN1\",\n", + " \"resumable\": true\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "await async_main()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NdUy9WsrJwxi" + }, + "source": [ + "With the session resumption you have the session handle to refer to your previous sessions. In this example, the handle is saved at the `last_handle` variable as below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y5ZVUQ5vJrEJ", + "outputId": "49994add-7cc3-44cc-ba9b-8692a7c2654a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Cig0ZDR1OTViNHVjOWh6aGJvMmhwdWk3NzJiZWRwYW91bnNtajgxZHN1'" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "last_handle" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xEIhRZBYJ_8V" + }, + "source": [ + "Now you can start a new Live API session, but this time pointing to a handle from a previous session. Also, to test you could gather information from the previous session, you will ask the model what was the second question you asked before (in this example, it was \"what is the capital of Brazil?\"). You can see the Live API recovering that information:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xH_iZhTxKFtF", + "outputId": "22f4d1ab-362c-401a-ce40-8d386cf209e4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"session_resumption_update\": {}\n", + "}\n", + "The last question you asked was: \"what is the capital of brazil?\"{\n", + " \"server_content\": {\n", + " \"generation_complete\": true\n", + " }\n", + "}\n", + "{\n", + " \"server_content\": {\n", + " \"turn_complete\": true\n", + " },\n", + " \"usage_metadata\": {\n", + " \"prompt_token_count\": 63,\n", + " \"response_token_count\": 15,\n", + " \"total_token_count\": 78,\n", + " \"prompt_tokens_details\": [\n", + " {\n", + " \"modality\": \"TEXT\",\n", + " \"token_count\": 63\n", + " }\n", + " ],\n", + " \"response_tokens_details\": [\n", + " {\n", + " \"modality\": \"TEXT\",\n", + " \"token_count\": 15\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "{\n", + " \"session_resumption_update\": {\n", + " \"new_handle\": \"CihyNDg4YTkxanl5cThzYmo4a29lMHRveDJlY3U1amRyNHlqeWF0bWU2\",\n", + " \"resumable\": true\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "await async_main(last_handle)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ietchD8GbcXt" + }, + "source": [ + "## Next steps\n", + "\n", + "\n", + "\n", + "This tutorial just shows basic usage of the Live API, using the Python GenAI SDK.\n", + "\n", + "- If you aren't looking for code, and just want to try multimedia streaming use [Live API in Google AI Studio](https://aistudio.google.com/app/live).\n", + "- If you want to see how to setup streaming interruptible audio and video using the Live API see the [Audio and Video input Tutorial](../quickstarts/Get_started_LiveAPI.py).\n", + "- If you're interested in the low level details of using the websockets directly, see the [websocket version of this tutorial](../quickstarts/websockets/Get_started_LiveAPI.ipynb).\n", + "- Try the [Tool use in the live API tutorial](../quickstarts/Get_started_LiveAPI_tools.ipynb) for an walkthrough of Gemini-2.5's new use capabilities.\n", + "- There is a [Streaming audio in Colab example](../quickstarts/websockets/LiveAPI_streaming_in_colab.ipynb), but this is more of a **demo**, it's **not optimized for readability**.\n", + "- Other nice Gemini 2.5 examples can also be found in the [Cookbook's example\n", + " directory](https://github.com/google-gemini/cookbook/tree/main/examples/), in particular the [video understanding](../quickstarts/Video_understanding.ipynb) and the [spatial understanding](../quickstarts/Spatial_understanding.ipynb) ones." + ] + }, + { + "cell_type": "markdown", + "source": [ + "# New Section" + ], + "metadata": { + "id": "TKJvxpqBlBEl" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "bNlAkeqHlCkN" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "Tce3stUlHN0L" + ], + "name": "Get_started_LiveAPI.ipynb", + "toc_visible": true, + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file