diff --git a/quickstarts/Get_started_Interactions.ipynb b/quickstarts/Get_started_Interactions.ipynb new file mode 100644 index 000000000..19c053021 --- /dev/null +++ b/quickstarts/Get_started_Interactions.ipynb @@ -0,0 +1,1842 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "NtVOlmDSHmh4" + }, + "source": [ + "##### Copyright 2025 Google LLC." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "9r9Ggw012g9c" + }, + "outputs": [], + "source": [ + "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eVmFDcYOSNiV" + }, + "source": [ + "# Gemini API: Getting started with Gemini models\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mfk6YY3G5kqp" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d5027929de8f" + }, + "source": [ + "### Install SDK\n", + "\n", + "Install the SDK from [PyPI](https://github.com/googleapis/python-genai)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JOV1F11LkUp_" + }, + "outputs": [], + "source": [ + "!pip3 install -q google_genai>=1.55.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CTIfnvCn9HvH" + }, + "source": [ + "### Setup your API key\n", + "\n", + "To run the following cell, your API key must be stored it in a Colab Secret named `GEMINI_API_KEY`. If you don't already have an API key or you aren't sure how to create a Colab Secret, see [Authentication](../quickstarts/Authentication.ipynb) for an example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "A1pkoyZb9Jm3" + }, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "\n", + "os.environ['GEMINI_API_KEY'] = userdata.get('GEMINI_API_KEY')" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Utility functions" + ], + "metadata": { + "id": "VuRXzpzOpWyV" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vHLr3oLxNpPD" + }, + "outputs": [], + "source": [ + "from IPython.display import Audio,Markdown, Image, display\n", + "import textwrap\n", + "import base64\n", + "import wave\n", + "\n", + "\n", + "def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):\n", + " with wave.open(str(filename), \"wb\") as wf:\n", + " wf.setnchannels(channels)\n", + " wf.setsampwidth(sample_width)\n", + " wf.setframerate(rate)\n", + " wf.writeframes(pcm)\n", + "\n", + "\n", + "import collections\n", + "\n", + "\n", + "def show_outputs(outputs):\n", + " for part in outputs:\n", + " if part.type == 'text':\n", + " display(Markdown(part.text))\n", + " elif part.type == 'thought':\n", + " pass\n", + " elif part.type == 'image':\n", + " display(Image(data=base64.b64decode(part.data)))\n", + " elif part.type == 'function_call':\n", + " print(repr(part))\n", + " elif part.type == 'audio':\n", + " audio_data = part.data\n", + " wave_file(\"speech.wav\", base64.b64decode(audio_data))\n", + " display(Audio(\"speech.wav\"))\n", + " elif part.type == 'code_execution_call':\n", + " display(Markdown(f\"\\n```python\\n{part.arguments.code}\\n```\\n\"))\n", + " elif part.type == 'code_execution_result':\n", + " display(Markdown(part.result.join(['\\n```\\n', '\\n```\\n'])))\n", + " elif part.type == 'google_search_call':\n", + " if part.arguments is None:\n", + " display(Markdown(f\"\\n```\\ngogle_search(???)\\n```\\n\"))\n", + " else:\n", + " display(Markdown(f\"\\n```\\ngogle_search({part.arguments.queries})\\n```\\n\"))\n", + "\n", + " elif part.type == 'google_search_result':\n", + " links = []\n", + " for result in part.result:\n", + " links.append(f\"* [{result.title}]({result.url})\")\n", + " display(Markdown(\"\\n\".join(links)))\n", + " elif part.type == 'url_context_call':\n", + " display(Markdown(\"Url Context\"))\n", + " elif part.type == 'url_context_result':\n", + " markdown_lines = []\n", + " status_emoji = {\"SUCCESS\": \"✅\", \"FAIL\": \"❌\"}\n", + " for item in part.result:\n", + " emoji = status_emoji.get(item.status, \"❓\") # Default to '❓' if status is unknown\n", + " line = f\"* {emoji} [{item.url}]({item.url})\"\n", + " markdown_lines.append(line)\n", + "\n", + " display(Markdown(\"\\n\".join(markdown_lines)))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3Hx_Gw9i0Yuv" + }, + "source": [ + "### Initialize SDK client\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "COvuiM2Yyfwl" + }, + "outputs": [], + "source": [ + "from google import genai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HghvVpbU0Uap" + }, + "outputs": [], + "source": [ + "client = genai.Client()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uRrzDMysw95X" + }, + "outputs": [], + "source": [ + "for m in client.models.list():\n", + " if 'gemini-3' in m.name:\n", + " print(m.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MvA_mbi1JxD5" + }, + "source": [ + "### Choose a model\n", + "\n", + "Select the model you want to use in this guide. You can either select one from the list or enter a model name manually. Keep in mind that some models, such as the 2.5 ones are thinking models and thus take slightly more time to respond. For more details, you can see [thinking notebook](./Get_started_thinking.ipynb) to learn how to switch the thinking off.\n", + "\n", + "For a full overview of all Gemini models, check the [documentation](https://ai.google.dev/gemini-api/docs/models/gemini)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AChpZWIXu62m" + }, + "outputs": [], + "source": [ + "MODEL_ID = \"gemini-2.5-flash\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6TYNPrNvQ8ue" + }, + "source": [ + "## Standard interactions\n", + "\n", + "Use the `generate_content` method to generate responses to your prompts. You can pass text directly to `generate_content` and use the `.text` property to get the text content of the response. Note that the `.text` field will work when there's only one part in the output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T8md0ayAJ-RZ" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"What's the largest planet in our solar system?\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ltjtJdmFQH-Z" + }, + "outputs": [], + "source": [ + "print(response.model_dump_json(indent=2, exclude_none=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C0Wg1GQDWGjL" + }, + "outputs": [], + "source": [ + "response.outputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OFp2a15CQTxb" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nfMDRp9FMTV-" + }, + "source": [ + "## Deep research" + ] + }, + { + "cell_type": "code", + "source": [ + "for m in client.models.list():\n", + " if 'research' in m.name:\n", + " print(m.name)" + ], + "metadata": { + "id": "DFDPk9r_yuYp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NZKDToAOMbSc" + }, + "outputs": [], + "source": [ + "stream = client.interactions.create(\n", + " agent=\"deep-research-preview\",\n", + " input='I want to learn more about the history of hadrians wall',\n", + " agent_config={'thinking_summaries':'auto', 'type':'deep-research'},\n", + " background = True,\n", + " stream = True,\n", + ")" + ] + }, + { + "cell_type": "code", + "source": [ + "start = next(stream)\n", + "print(start)" + ], + "metadata": { + "id": "ICVstGGBy0d0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "for chunk in stream:\n", + " print(chunk)" + ], + "metadata": { + "id": "NEDqi0mZyxUj" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "If you get disconnected you can reconect with get, replaying the whole stream:" + ], + "metadata": { + "id": "UPjVuVhozfDv" + } + }, + { + "cell_type": "code", + "source": [ + "stream = client.interactions.get(\n", + " id=start.interaction.id,\n", + " stream=True)" + ], + "metadata": { + "id": "O7NkUUHmyaqs" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "for chunk in stream:\n", + " print(chunk)" + ], + "metadata": { + "id": "ayWSAC7NzFwg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Get without streaming returns the whole combined result:" + ], + "metadata": { + "id": "IkkZBJbO04_y" + } + }, + { + "cell_type": "code", + "source": [ + "interaction = client.interactions.get(id=start.interaction.id)\n", + "print(interaction)" + ], + "metadata": { + "id": "p8_1bhwK06_H" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "If you don't need the thought summaries or realtime updates, you can leave them out:" + ], + "metadata": { + "id": "ILGohJFa0JXN" + } + }, + { + "cell_type": "code", + "source": [ + "result = client.interactions.create(\n", + " agent=\"deep-research-preview\",\n", + " input='I want to learn more about the history of hadrians wall',\n", + " background = True,\n", + ")" + ], + "metadata": { + "id": "naCOF6ui0eY0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import time\n", + "\n", + "while result.status == 'in_progress':\n", + " time.sleep(10)\n", + " print('.', end='')\n", + " result = client.interactions.get(id=result.id)" + ], + "metadata": { + "id": "0eGcc_z-1TVj" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(result)" + ], + "metadata": { + "id": "myAO7bem2EfU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yww-vrxmRiIy" + }, + "source": [ + "## Send image prompts\n", + "\n", + "In this first example, you'll download an image from a specified URL, save it as a byte stream and then write those bytes to a local file named `jetpack.png`.\n", + "\n", + "Use Gemini model, a multimodal model that supports multimodal prompts. You can include text, [PDF documents](../quickstarts/PDF_Files.ipynb), images, [audio](../quickstarts/Audio.ipynb) and [video](../quickstarts/Video.ipynb) in your prompt requests and get text or code responses.\n", + "\n", + "See the \"Multimedia input\" section below for other media types.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bQ3zu5udSBuD" + }, + "outputs": [], + "source": [ + "import requests\n", + "import pathlib\n", + "from PIL import Image\n", + "\n", + "IMG = \"https://storage.googleapis.com/generativeai-downloads/data/jetpack.png\" # @param {type: \"string\"}\n", + "\n", + "img_bytes = requests.get(IMG).content\n", + "\n", + "img_path = pathlib.Path('jetpack.png')\n", + "img_path.write_bytes(img_bytes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xSjAMbVjOlnc" + }, + "source": [ + "In this second example, you'll open a previously saved image, create a thumbnail of it and then generate a short blog post based on the thumbnail, displaying both the thumbnail and the generated blog post." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nMbHG-L_WXM1" + }, + "outputs": [], + "source": [ + "image = Image.open(img_path)\n", + "image.thumbnail([512,512])\n", + "image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cDxd7Pp_SELb" + }, + "outputs": [], + "source": [ + "import base64\n", + "\n", + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=[\n", + " {\n", + " \"type\": 'image',\n", + " \"mime_type\": \"image/png\",\n", + " \"data\": base64.b64encode(img_path.read_bytes()).decode(\"utf-8\"),\n", + " },\n", + " {\n", + " \"type\": 'text',\n", + " \"text\": \"Write a short and engaging blog post based on this picture.\",\n", + " }\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a0OXdORuXC2v" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "68Dy2j1QTKy7" + }, + "source": [ + "If you've uploaded a file to the files-api, pass the file-uri instead of the bytes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fPdvD4A1f0OB" + }, + "outputs": [], + "source": [ + "uploaded_file = client.files.upload(\n", + " file=img_path,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qUg6gU-6SmJ4" + }, + "outputs": [], + "source": [ + "import base64\n", + "\n", + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=[\n", + " {\n", + " \"type\": 'image',\n", + " \"mime_type\": \"image/png\",\n", + " \"uri\": uploaded_file.uri\n", + " },\n", + " {\n", + " \"type\": 'text',\n", + " \"text\": \"Write a short and engaging blog post based on this picture.\",\n", + " }\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "diQCNdnRTEk1" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VTzuBfHyWAg5" + }, + "source": [ + "## System instructions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r5izy6jsbEnL" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"Tell me how the internet works\",\n", + " system_instruction=\"Pretend the user is a puppy who's only interested in squeaky toys.\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vMqL6xNGXXtS" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BWeVCRKKy6l4" + }, + "source": [ + "## generation Config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gapYeXxIy4h5" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"Tell me how the internet works\",\n", + " system_instruction=\"Pretend the user is a puppy who's only interested in squeaky toys.\",\n", + "\n", + " generation_config = {\n", + " \"max_output_tokens\": 255\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b6sB7W-jdGxJ" + }, + "source": [ + "## Multi-turn chat\n", + "\n", + "The Gemini API enables you to have freeform conversations across multiple turns. This is on by default, the id from one turn can be used as the starting point for the next." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jv2n_UQ9Xw1z" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " system_instruction=\"\"\"\n", + " You are an expert software developer and a helpful coding assistant.\n", + " You are able to generate high-quality code in any programming language.\n", + " \"\"\",\n", + " input=\"Write a function that checks if a year is a leap year in C#.\",\n", + ")\n", + "msg1 = response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jxE5KCsGYPSE" + }, + "outputs": [], + "source": [ + "print(response.id)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gUSpkh01Q5Ht" + }, + "source": [ + "You can look up a client by id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XUSoCWlQBYXm" + }, + "outputs": [], + "source": [ + "msc1_copy = client.interactions.get(msg1.id)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KVQZitGE7EbW" + }, + "source": [ + "Use `response.id` to continue a conversation with another message:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SnzMJJ-adOfX" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " previous_interaction_id=msg1.id,\n", + " input=\"Wait, we got disconnected, who are you? what were we talking about?\",\n", + ")\n", + "msg2 = response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rDIxSy3yxxs8" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FNX4AQ9AWddm" + }, + "source": [ + "### Branching\n", + "\n", + "Since the interactions are persistent resources, reusing an old id branches the conversation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zuoSfLW5Xl1V" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " previous_interaction_id=msg1.id,\n", + " input=\"Including this one, how many messages have I sent you so far? what were they?\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TCQQK618zyin" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iYk1PARsaqIW" + }, + "source": [ + "### Disable multi-turn" + ] + }, + { + "cell_type": "markdown", + "source": [ + "If you call interactons with `store=False` it won't store the interaction or return an ID." + ], + "metadata": { + "id": "vc38lBpfsBwK" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oz16wY3uap2F" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " store=False,\n", + " system_instruction=\"\"\"\n", + " You are an expert software developer and a helpful coding assistant.\n", + " You are able to generate high-quality code in any programming language.\n", + " \"\"\",\n", + " input=\"Write a function that checks if a year is a leap year in C#.\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4iRzLZ-ba7Kb" + }, + "outputs": [], + "source": [ + "response.id is None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eMVuUULxJTfY" + }, + "source": [ + "### Manual\n", + "\n", + "To manage a multiturn conversation yourself, pass a list of turns as the input.\n", + "\n", + "Then append additional turns and resend it to generate later outputs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aXeH3V_RHS1B" + }, + "outputs": [], + "source": [ + "conversation_history = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [{\"type\": \"text\", \"text\": \"What are the three largest cities in Spain?\"}]\n", + " },\n", + "]\n", + "\n", + "response = client.interactions.create(\n", + " model=\"gemini-2.5-flash\",\n", + " input=conversation_history,\n", + " store=False,\n", + ")\n", + "\n", + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yvgxFeZqHfF_" + }, + "outputs": [], + "source": [ + "conversation_history.append({\n", + " 'role': 'model',\n", + " 'content': response.outputs\n", + "})\n", + "\n", + "conversation_history.append({\n", + " 'role': 'user',\n", + " 'content': [{\"type\": \"text\", \"text\": \"What is the population of each city?\"}]\n", + "})\n", + "\n", + "response = client.interactions.create(\n", + " model=\"gemini-2.5-flash\",\n", + " input=conversation_history,\n", + " store=False,\n", + ")\n", + "\n", + "conversation_history.append({\n", + " 'role': 'model',\n", + " 'content': response.outputs\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Skeo1kbnJS4X" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ORw2_ipyIAjh" + }, + "outputs": [], + "source": [ + "len(conversation_history)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nyZMoM6tgnTA" + }, + "source": [ + "## Generate JSON\n", + "\n", + "The [controlled generation](https://ai.google.dev/gemini-api/docs/structured-output?lang=python#generate-json) capability in Gemini API allows you to constraint the model output to a structured format. You can provide the schemas as Pydantic Models or a JSON string." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xRJHVjr-gqHi" + }, + "outputs": [], + "source": [ + "import pydantic\n", + "import json\n", + "\n", + "class Recipe(pydantic.BaseModel):\n", + " recipe_name: str\n", + " recipe_description: str\n", + " recipe_ingredients: list[str]\n", + "\n", + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"Write a popular cookie recipe and its ingredients.\",\n", + " response_format=Recipe.model_json_schema(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8Gf6hFwUCdIw" + }, + "outputs": [], + "source": [ + "for part in response.outputs:\n", + " if text:= getattr(part, 'text', None):\n", + " parsed = Recipe.model_validate_json(part.text)\n", + " print(repr(parsed))\n", + " print()\n", + " print(parsed.model_dump_json(indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "erpe7YteLi-h" + }, + "source": [ + "###Pydantic TypeAdapter\n", + "\n", + "Use `TypeAdaptor` if you need it to generate something other than a pydantic class:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eS1F3REQHi8k" + }, + "outputs": [], + "source": [ + "\n", + "\n", + "type_adaptor = pydantic.TypeAdapter(list[Recipe])\n", + "\n", + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"Provide 3 popular cookie recipes and their ingredients.\",\n", + " response_format=type_adaptor.json_schema(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ssa7zzJmIHBr" + }, + "outputs": [], + "source": [ + "for part in response.outputs:\n", + " if text:= getattr(part, 'text', None):\n", + " parsed = type_adaptor.validate_json(part.text)\n", + " print(parsed)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cHLitKXj1hZa" + }, + "source": [ + "## Generate Images\n", + "\n", + "Gemini can output images directly as part of a conversation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qbdnNzGL6R_2" + }, + "outputs": [], + "source": [ + "from IPython.display import Image, Markdown\n", + "\n", + "response = client.interactions.create(\n", + " model=\"gemini-3-pro-image-preview\",\n", + " input='Hi, can create photorealistic image of a pig with wings, a top hat, and monocle flying over a happy futuristic \"solar punk\" scifi city with lots of greenery?',\n", + " response_modalities=['TEXT', 'IMAGE'] # this is the default for image models\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "G_KNatC_Guaw" + }, + "outputs": [], + "source": [ + "print(response.model_dump_json(indent=2, exclude_none=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c4hyw9b-GakQ" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XyxCGy8_yGql" + }, + "outputs": [], + "source": [ + "from IPython.display import Image, Markdown\n", + "\n", + "response = client.interactions.create(\n", + " model=\"gemini-2.5-flash-image\",\n", + " input='Hi, can create photorealistic image of a pig with wings, a top hat, and monocle flying over a happy futuristic \"solar punk\" scifi city with lots of greenery?',\n", + " #response_modalities=['TEXT', 'IMAGE'] # this is the default for image models\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q1tAq1kSxgoY" + }, + "source": [ + "[Imagen](./Get_started_imagen.ipynb) is another way to generate images. See the [documentation](https://ai.google.dev/gemini-api/docs/image-generation#choose-a-model) for recommendations on where to use each one." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mDaoxLemM2Tq" + }, + "source": [ + "## Generate audio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BCRCUNTZM12o" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=\"gemini-2.5-flash-preview-tts\",\n", + " input=\"Say cheerfully: Have a wonderful day!\",\n", + " generation_config={'speech_config': {\"voice\": \"algenib\", \"language\": \"en-US\"}},\n", + " #response_modalities=[\"audio\"]\n", + ")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rl1yLXvhOPp5" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uQfLCxfQtPTg" + }, + "source": [ + "## Streaming\n", + "\n", + "By default, the model returns a response after completing the entire generation process. you can set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3gIsSNqXtOXB" + }, + "outputs": [], + "source": [ + "stream = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"Tell me a story about a lonely robot who finds friendship in a most unexpected place.\",\n", + " stream=True\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CsZyQ3QmJEF0" + }, + "outputs": [], + "source": [ + "for chunk in stream:\n", + " print(chunk)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "plCtEIaHuv96" + }, + "source": [ + "## Send asynchronous requests\n", + "\n", + "Use the `AsyncGoogleGenAI` class to make async requests." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OPTI7noYuwgr" + }, + "outputs": [], + "source": [ + "response = await client.aio.interactions.create(\n", + " model=\"gemini-2.5-flash\",\n", + " input=\"Tell me a story about a lonely robot who finds friendship in a most unexpected place.\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O60J736eNL4c" + }, + "outputs": [], + "source": [ + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rl-y9SZywD0s" + }, + "source": [ + "## Function calling\n", + "\n", + "[Function calling](https://ai.google.dev/gemini-api/docs/function-calling) lets you provide a set of tools that it can use to respond to the user's prompt. You create a description of a function in your code, then pass that description to a language model in a request. The response from the model includes:\n", + "- The name of a function that matches the description.\n", + "- The arguments to call it with." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "APk6sXO6wLQp" + }, + "outputs": [], + "source": [ + "get_destination = {\n", + " 'type':'function',\n", + " \"name\": \"get_destination\",\n", + " \"description\": \"Get directions to the destination.\",\n", + " \"parameters\": {\n", + " \"type\": \"OBJECT\",\n", + " \"properties\": {\n", + " \"destination\": {\n", + " \"type\": \"STRING\",\n", + " \"description\": \"Get directions to the destination.\",\n", + " },\n", + " },\n", + " },\n", + "}\n", + "\n", + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"I'd like to travel to Paris.\",\n", + " tools=[get_destination],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TLDLB1dbgBQj" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sKayis_ggn0p" + }, + "outputs": [], + "source": [ + "import inspect\n", + "\n", + "def to_function_schema(f: callable):\n", + " \"\"\"\n", + " Converts a Python function to a function-calling JSON schema.\n", + " \"\"\"\n", + " properties = {}\n", + " required_params = []\n", + "\n", + " # Use the passed function 'f', not a hardcoded one\n", + " sig = inspect.signature(f)\n", + "\n", + " for name, param in sig.parameters.items():\n", + " # Get the JSON schema for the parameter's type annotation\n", + " properties[name] = pydantic.TypeAdapter(param.annotation).json_schema()\n", + "\n", + " # Check if the parameter has a default value\n", + " if param.default == inspect.Parameter.empty:\n", + " required_params.append(name)\n", + "\n", + " return {\n", + " \"type\": \"function\",\n", + " \"name\": f.__name__,\n", + " \"description\": f.__doc__,\n", + " \"parameters\": {\n", + " \"type\": \"object\", # 'object' is the correct JSON Schema type\n", + " \"properties\": properties,\n", + " #\"required\": required_params, # Add the list of required params here\n", + " },\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VRCLrRuSnRDx" + }, + "outputs": [], + "source": [ + "def price_cookies(recipe:Recipe):\n", + " \"Get the price of a cookie, given the recipe\"\n", + " return 2.00\n", + "\n", + "to_function_schema(price_cookies)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ug9KRgzLnhg8" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"Invent a chocolate cookie recipe, write it out as expected by the pricing function, then call the function on the recipe to compute the the price per cookie\",\n", + " tools=[to_function_schema(price_cookies)],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AA9afbhinyNZ" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NsNd3DtDFX1X" + }, + "source": [ + "## Code execution\n", + "\n", + "[Code execution](https://ai.google.dev/gemini-api/docs/code-execution?lang=python) lets the model generate and execute Python code to answer complex questions. You can find more examples in the [Code execution quickstart guide](./Code_execution.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fY062-nsGLBu" + }, + "outputs": [], + "source": [ + "from IPython.display import Image, Markdown, Code, HTML\n", + "\n", + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"Generate and run a script to count how many letter r there are in the word strawberry\",\n", + " tools=[{'type': 'code_execution'}]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Evz7fUDksHW9" + }, + "outputs": [], + "source": [ + "print(response.model_dump_json(indent=2, exclude_none=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hU9MwRfPWQAf" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-85H8Yr7jHxM" + }, + "source": [ + "## GoogleSearch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hoWh9kbojC_L" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=\"lookup a recipe for cream of brocoli soup\",\n", + " tools=[{'type': 'google_search'}]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9pMqwUXFjP_y" + }, + "outputs": [], + "source": [ + "print(response.model_dump_json(indent=2, exclude_none=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V3axdlfEaNWH" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wyEmvUJfMkCR" + }, + "source": [ + "## Use urlContext" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yr36dvTWQSAJ" + }, + "source": [ + "The URL Context tool empowers Gemini models to directly access, process, and understand content from user-provided web page URLs. This is key for enabling dynamic agentic workflows, allowing models to independently research, analyze articles, and synthesize information from the web as part of their reasoning process.\n", + "\n", + "In this example you will use two links as reference and ask Gemini to find differences between the cook receipes present in each of the links:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JgpP6_qTQeXR" + }, + "outputs": [], + "source": [ + "prompt = \"\"\"\n", + "Compare recipes from https://www.food.com/recipe/homemade-cream-of-broccoli-soup-271210\n", + "and from https://www.allrecipes.com/recipe/13313/best-cream-of-broccoli-soup/,\n", + "list the key differences between them.\n", + "\"\"\"\n", + "\n", + "\n", + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=prompt,\n", + " tools=[{'type': \"url_context\"}]\n", + "\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tAacB96mkYAI" + }, + "outputs": [], + "source": [ + "print(response.model_dump_json(indent=2, exclude_none=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xPLfXHNjcV0d" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "n72rLxFNbtC-" + }, + "source": [ + "## Multimedia input" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wll6SRpwdIQY" + }, + "source": [ + "Data can always be included inline (as below) or uploaded to the Files API, then referred to by it's uri (See example in the \"Send Image Prompts\" section above.)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QLV19RrMUlaw" + }, + "source": [ + "### Upload a PDF file\n", + "\n", + "This PDF page is an article titled [Smoothly editing material properties of objects](https://research.google/blog/smoothly-editing-material-properties-of-objects-with-text-to-image-models-and-synthetic-data/) with text-to-image models and synthetic data available on the Google Research Blog.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "b0BfhLDFWfCS" + }, + "outputs": [], + "source": [ + "# Prepare the file to be uploaded\n", + "PDF = \"https://storage.googleapis.com/generativeai-downloads/data/Smoothly%20editing%20material%20properties%20of%20objects%20with%20text-to-image%20models%20and%20synthetic%20data.pdf\" # @param {type: \"string\"}\n", + "pdf_bytes = requests.get(PDF).content\n", + "\n", + "pdf_path = pathlib.Path('article.pdf')\n", + "pdf_path.write_bytes(pdf_bytes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bjrfdaiYPuIL" + }, + "source": [ + "Secondly, you'll upload the saved PDF file and generate a bulleted list summary of its contents." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tH2h2WDVWptt" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=[\n", + " {\n", + " \"type\": 'text',\n", + " \"text\": \"Summarize this file as a bulleted list.\",\n", + " },\n", + " {\n", + " \"type\": 'document',\n", + " \"mime_type\": \"application/pdf\",\n", + " \"data\": base64.b64encode(pdf_bytes).decode(\"utf-8\"),\n", + " },\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xe-p4fbRZ8E2" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6JYIUEHcfbM2" + }, + "outputs": [], + "source": [ + "pdf_upload = client.files.upload(file=pdf_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i0Hle44qfoGb" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=[\n", + " {\n", + " \"type\": 'text',\n", + " \"text\": \"Summarize this file as a bulleted list.\",\n", + " },\n", + " {\n", + " \"type\": 'document',\n", + " \"mime_type\": \"application/pdf\",\n", + " \"uri\": pdf_upload.uri,\n", + " },\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hk7UXthhf8_v" + }, + "outputs": [], + "source": [ + "show_outputs(response.outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9NWO1moe9fx-" + }, + "source": [ + "### Upload an audio file\n", + "\n", + "In this case, you'll use a [sound recording](https://www.jfklibrary.org/asset-viewer/archives/jfkwha-006) of President John F. Kennedy’s 1961 State of the Union address." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lCSuGd9i9fEB" + }, + "outputs": [], + "source": [ + "# Prepare the file to be uploaded\n", + "AUDIO = \"https://storage.googleapis.com/generativeai-downloads/data/Walking_thoughts_3.m4a\" # @param {type: \"string\"}\n", + "audio_bytes = requests.get(AUDIO).content\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0wjKO0eI9yps" + }, + "outputs": [], + "source": [ + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=[\n", + " {\n", + " \"type\": 'text',\n", + " \"text\": \"Summarize this audio file.\",\n", + " },\n", + " {\n", + " \"type\": 'audio',\n", + " \"mime_type\": \"audio/x-m4a\",\n", + " \"data\": base64.b64encode(audio_bytes).decode(\"utf-8\"),\n", + " },\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KdUjkIQP-G_i" + }, + "source": [ + "### Upload a video file\n", + "\n", + "In this case, you'll use a short clip of [Big Buck Bunny](https://peach.blender.org/about/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "e9ohtLxU-SFE" + }, + "outputs": [], + "source": [ + "# Download the video file\n", + "VIDEO_URL = \"https://storage.googleapis.com/generativeai-downloads/videos/Big_Buck_Bunny.mp4\" # @param {type: \"string\"}\n", + "video_bytes = requests.get(VIDEO_URL).content\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cX82TyGL-e2O" + }, + "outputs": [], + "source": [ + "# Ask Gemini about the video\n", + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=[\n", + " {\n", + " \"type\": 'text',\n", + " \"text\": \"Summarize this video.\",\n", + " },\n", + " {\n", + " \"type\": 'video',\n", + " \"mime_type\": \"video/mp4\",\n", + " \"data\": base64.b64encode(video_bytes).decode(\"utf-8\"),\n", + " },\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "65_qu3UsM8_M" + }, + "source": [ + "### YouTube link" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GXNlrAsZR7bB" + }, + "source": [ + "For YouTube links, you don't need to explicitly upload the video file content, but you do need to explicitly declare the video URL you want the model to process as part of the `contents` of the request. For more information see the [vision](https://ai.google.dev/gemini-api/docs/vision?lang=python#youtube) documentation including the features and limits.\n", + "\n", + "> **Note:** You're only able to submit up to one YouTube link per `generate_content` request.\n", + "\n", + "> **Note:** If your text input includes YouTube links, the system won't process them, which may result in incorrect responses. To ensure proper handling, explicitly provide the URL using the `uri` parameter in `FileData`.\n", + "\n", + "The following example shows how you can use the model to summarize the video. In this case use a summary video of [Google I/O 2024](\"https://www.youtube.com/watch?v=WsEQjeZoEng\")." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f3owcmgfThQ4" + }, + "outputs": [], + "source": [ + "import base64\n", + "\n", + "response = client.interactions.create(\n", + " model=MODEL_ID,\n", + " input=[\n", + " {\n", + " \"type\": 'video',\n", + " \"uri\": \"https://www.youtube.com/watch?v=WsEQjeZoEng\",\n", + " },\n", + " {\n", + " \"type\": 'text',\n", + " \"text\": \"Summarize this video\",\n", + " }\n", + " ]\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qk4NwX-MlsI5" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file