From a1d8960675083a9ca19ec9821a73c2b6b2458965 Mon Sep 17 00:00:00 2001 From: Sara Date: Mon, 27 Nov 2023 13:28:58 +0100 Subject: [PATCH] Update ImageClient code to use the latest DallE available version (2023-12-01-preview) --- PodcastSocialMediaCopilot.py | 5 ++- README.md | 1 + dalle_helper.py | 84 +++++++++++------------------------- 3 files changed, 28 insertions(+), 62 deletions(-) diff --git a/PodcastSocialMediaCopilot.py b/PodcastSocialMediaCopilot.py index f8ace2d..6e07386 100644 --- a/PodcastSocialMediaCopilot.py +++ b/PodcastSocialMediaCopilot.py @@ -49,7 +49,8 @@ openai_api_type = "azure" openai_api_base = "https://TODO.openai.azure.com/" # Your endpoint will look something like this: https://YOUR_AOAI_RESOURCE_NAME.openai.azure.com/ openai_api_key = "TODO" # Your key will look something like this: 00000000000000000000000000000000 -gpt4_deployment_name="gpt-4" +gpt4_deployment_name = "gpt-4" +dalle_deployment_name = "Dalle3" # We are assuming that you have all model deployments on the same Azure OpenAI service resource above. If not, you can change these settings below to point to different resources. gpt4_endpoint = openai_api_base # Your endpoint will look something like this: https://YOUR_AOAI_RESOURCE_NAME.openai.azure.com/ @@ -250,7 +251,7 @@ def bing_grounding(input_dict:dict) -> dict: print("Calling DALL-E model on Azure OpenAI Service to get an image for social media...\n") # Establish the client class instance -client = ImageClient(dalle_endpoint, dalle_api_key, verbose=False) # change verbose to True for including debug print statements +client = ImageClient(dalle_endpoint, dalle_api_key, dalle_deployment_name, verbose=False) # change verbose to True for including debug print statements # Generate an image imageURL, postImage = client.generateImage(dalle_prompt) diff --git a/README.md b/README.md index be79f02..d359238 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ Next, update the PodcastSocialMediaCopilot.py file with your settings. + Update **openai_api_base** with the name of your Azure OpenAI resource; this value should look like this: "https://YOUR_AOAI_RESOURCE_NAME.openai.azure.com/" + Update **openai_api_key** with the corresponding API key for your Azure OpenAI resource. + Update **gpt4_deployment_name** with the name of your model deployment for GPT-4 in your Azure OpenAI resource. ++ Update **dalle_deployment_name** with the name of your model deployment for Dall-E in your Azure OpenAI resource. + If your model deployments for gpt-4, dalle, and the plugins-capable model are all on the same Azure OpenAI resource, you're all set! If not, you can override the individual endpoints and keys for the resources for the various model deployments using the variables **gpt4_endpoint**, **gpt4_api_key**, **dalle_endpoint**, **dalle_api_key**, **plugin_model_url**, and **plugin_model_api_key**. + Optionally, you can also update the **podcast_url** and **podcast_audio_file** to reflect your own podcast. diff --git a/dalle_helper.py b/dalle_helper.py index 4a8fcd1..51cc06b 100644 --- a/dalle_helper.py +++ b/dalle_helper.py @@ -5,29 +5,29 @@ import time class ImageClient: - def __init__(self, endpoint, key, api_version = "2022-08-03-preview", verbose=False): + def __init__(self, endpoint, key, deployment_name = "Dalle3", api_version = "2023-12-01-preview", verbose=False): # These are the paramters for the class: # ### endpoint: The endpoint for your Azure OpenAI resource # ### key: The API key for your Azure OpenAI resource + # ### deployment_name: The deployment name for Dall-E # ### api_version: The API version to use. This is optional and defaults to the latest version self.endpoint = endpoint self.api_key = key self.api_version = api_version + self.deployment_name = deployment_name self.verbose = verbose def text_to_image(self, prompt): # this method makes the text-to-image API call. It will return the raw response from the API call - reqURL = requests.models.PreparedRequest() params = {'api-version':self.api_version} - #the full endpoint will look something like this https://YOUR_AOAI_RESOURCE_NAME.openai.azure.com/dalle/text-to-image - reqURL.prepare_url(self.endpoint + "dalle/text-to-image", params) + #the full endpoint will look something like this https://YOUR_AOAI_RESOURCE_NAME.openai.azure.com/openai/deployments//images/generations + reqURL.prepare_url(self.endpoint + f"openai/deployments/{self.deployment_name}/images/generations", params) if self.verbose: print("Sending a POST call to the following URL: {URL}".format(URL=reqURL.url)) #Construct the data payload for the call. This includes the prompt text as well as many optional parameters. - payload = { "caption": prompt} - + payload = {"prompt": prompt} r = requests.post(reqURL.url, headers={ "Api-key": self.api_key, @@ -35,12 +35,21 @@ def text_to_image(self, prompt): }, json = payload ) - # Response Body example: { "id": "80b095cb-4248-4fa7-90c2-933f0907fb2a", "status": "Running" } - # Key headers: - # Operation-Location: URL to get response - # Retry-after: 3 //seconds to respond - if r.status_code != 202: + # Response Body example: + # { + # "created": 1698342300, + # "data": [ + # { + # "url": "https://dalletipusw2.blob.core.windows.net/private/images/e5451cc6-b1ad-4747-bd46-b89a3a3b8bc3/generated_00.png?se=2023-10-27T17%3A45%3A09Z&..." + # }, + # { + # "url": "https://dalletipusw2.blob.core.windows.net/private/images/e5451cc6-b1ad-4747-bd46-b89a3a3b8bc3/generated_01.png?se=2023-10-27T17%3A45%3A09Z&..." + # }], + # "revised_prompt": "A vivid, natural representation of Microsoft Clippy wearing a cowboy hat." + # } + + if r.status_code != 200: print("Error: {error}".format(error=r.json())) data = r.json() @@ -49,34 +58,6 @@ def text_to_image(self, prompt): print(data) return r - def getImageResults(self, operation_location): - # This method will make an API call to get the status/results of the text-to-image API call using the - # Operation-Location header from the original API call - - params = {'api-version':self.api_version} - # the full endpoint will look something like this - # https://YOUR_RESOURCE_NAME.openai.azure.com/dalle/text-to-image/operations/OPERATION_ID_FROM_PRIOR_RESPONSE?api-version=2022-08-03-preview - - if self.verbose: - print("Sending a POST call to the following URL: {URL}".format(URL=operation_location)) - - r = requests.get(operation_location, - headers={ - "Api-key": self.api_key, - "Content-Type": "application/json" - } - ) - - data = r.json() - - if self.verbose: - print('Get Image results call response body') - print(data) - return r - - # Sending a POST call to the following URL: - # {'id': 'd63fc675-f751-40b7-a297-e692c3b966b9', 'result': {'caption': 'An avocado chair.', 'contentUrl': '', 'contentUrlExpiresAt': '2022-08-13T22:52:45Z', 'createdDateTime': '2022-08-13T21:50:55Z'}, 'status': 'Succeeded'} - def getImage(self, contentUrl): # Download the images from the given URL @@ -85,35 +66,18 @@ def getImage(self, contentUrl): def generateImage(self, prompt): - submission = self.text_to_image( prompt) + submission = self.text_to_image(prompt) if self.verbose: print('Response code from submission') print(submission.status_code) print('Response body:') print(submission.json()) - if submission.status_code == 202: - operation_location = submission.headers['Operation-Location'] - retry_after = submission.headers['Retry-after'] + if submission.status_code == 200: + contentUrl = submission.json()['data'][0]['url'] else: - print('Not a 202 response') + print('Not a 200 response') return "-1" - - #wait to request - status = "not running" - while status != "Succeeded": - if self.verbose: - print('retry after: ' + retry_after) - time.sleep(int(retry_after)) - r = self.getImageResults(operation_location) - # print(r.status_code) - # print(r.headers) - # print(r.json()) - status = r.json()['status'] - # print(status) - if status == "Failed": - return "-1" - contentUrl = r.json()['result']['contentUrl'] image = self.getImage(contentUrl) return contentUrl, image.content