diff --git a/key.ipynb b/key.ipynb index 5206254..4381df0 100644 --- a/key.ipynb +++ b/key.ipynb @@ -1,170 +1,1865 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "import matplotlib as mpl\n", + "from matplotlib import pyplot as plt\n", + "\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Customize" + ] + }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Customize Pandas settings (eg: DataFrame display)\n", + "\n", + "# Columns\n", + "pd.options.display.max_columns = 10\n", + "pd.options.display.max_colwidth = 15\n", + "pd.options.display.width = 150\n", + "# Rows\n", + "pd.options.display.max_rows = pd.options.display.min_rows = 12\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Customize matplotlib\n", + "\n", + "# plt.style.use('ggplot')\n", + "plt.style.use('default')\n", + "plt.rcParams['figure.figsize'] = (12, 8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will be using ugly holiday sweater data crowdsourced from R-Ladies (and friends) in November/December 2020. If you would like to contribute your own ugly holiday sweater info to this dataset, please fill out this Google Form! See a summary of the data attributes here:\n", + "\n", + " sweater: entry number\n", + " hs_tf: Do you have a holiday sweater? (Yes/No/NA)\n", + " sparkly: is it sparkly? (Yes/No/NA)\n", + " noise: does it make noise? (Yes/No/NA)\n", + " lights: does it light up? (Yes/No/NA)\n", + " objects: does it have anything attached to it? (Yes/No/NA)\n", + " colors: What colors does it have?\n", + " image_tf: Does it have an image on it? (Yes/No/NA)\n", + " image_desc: User-provided image description\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " hs_tf sparkly noise lights objects \\\n", - "sweater \n", - "sweater1 Yes Yes No No No \n", - "sweater2 Yes No No No No \n", - "sweater3 Yes No No No No \n", - "sweater5 Yes No No No No \n", - "sweater8 Yes No Yes Yes Yes \n", - "... ... ... ... ... ... \n", - "sweater100 Yes Yes No No Yes \n", - "sweater103 Yes No Yes No Yes \n", - "sweater104 Yes Yes No No Yes \n", - "sweater105 Yes No No No No \n", - "sweater107 Yes No No No Yes \n", + "df shape:\n", + "(105, 8)\n", + "\n", + "Column names:\n", + "['hs_tf', 'sparkly', 'noise', 'lights', 'objects', 'colors', 'image_tf', 'image_desc']\n", "\n", - " colors image_tf \\\n", - "sweater \n", - "sweater1 Red, Yellow, Blue, White, teal Yes \n", - "sweater2 Green No \n", - "sweater3 Red, Yellow, Green, Brown, White, Black Yes \n", - "sweater5 Blue, White, Black Yes \n", - "sweater8 Red, Green, Blue, Purple, White, Grey No \n", - "... ... ... \n", - "sweater100 Orange, Yellow, Blue Yes \n", - "sweater103 Red, Yellow, White, Black Yes \n", - "sweater104 Red, White, Black Yes \n", - "sweater105 Red, Green, Blue, Grey No \n", - "sweater107 Red, Green, White, Black Yes \n", + " hs_tf sparkly noise lights objects colors image_tf image_desc\n", + "sweater \n", + "sweater1 Yes Yes No No No Red, Yellow... Yes octopus dre...\n", + "sweater2 Yes No No No No Green No NaN\n", + "sweater3 Yes No No No No Red, Yellow... Yes Houses\n", + "sweater4 No No No No No the limit d... No NaN\n", + "sweater5 Yes No No No No Blue, White... Yes T-rex\n", + "sweater6 No NaN NaN NaN NaN NaN NaN NaN\n", + "... ... ... ... ... ... ... ... ...\n", + "sweater102 No NaN NaN NaN NaN NaN NaN NaN\n", + "sweater103 Yes No Yes No Yes Red, Yellow... Yes Sloth\n", + "sweater104 Yes Yes No No Yes Red, White,... Yes R2D2 wearin...\n", + "sweater105 Yes No No No No Red, Green,... No NaN\n", + "sweater106 No No No No No NaN No NaN\n", + "sweater107 Yes No No No Yes Red, Green,... Yes a llama wea...\n", "\n", - " image_desc \n", - "sweater \n", - "sweater1 octopus dressed like santa \n", - "sweater2 NaN \n", - "sweater3 Houses \n", - "sweater5 T-rex \n", - "sweater8 NaN \n", - "... ... \n", - "sweater100 Menorah \n", - "sweater103 Sloth \n", - "sweater104 R2D2 wearing a Santa hat \n", - "sweater105 NaN \n", - "sweater107 a llama wearing a scarf \n", + "[105 rows x 8 columns]\n" + ] + } + ], + "source": [ + "fnm = 'ugly_sweaters.csv'\n", + "df = pd.read_csv(fnm, index_col='sweater')\n", + "# Show the dataframe\n", + "print(f'df shape:\\n{df.shape}')\n", + "print(f'\\nColumn names:\\n{df.columns.to_list()}\\n')\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " hs_tf sparkly noise lights objects colors image_tf image_desc\n", + "sweater \n", + "sweater1 Yes Yes No No No Red, Yellow... Yes octopus dre...\n", + "sweater2 Yes No No No No Green No NaN\n", + "sweater3 Yes No No No No Red, Yellow... Yes Houses\n", + "sweater5 Yes No No No No Blue, White... Yes T-rex\n", + "sweater8 Yes No Yes Yes Yes Red, Green,... No NaN\n", + "sweater11 Yes No No No No Red, Yellow... Yes Santa Claus...\n", + "... ... ... ... ... ... ... ... ...\n", + "sweater99 Yes Yes No No No Red, Blue, ... Yes Reindeer\n", + "sweater100 Yes Yes No No Yes Orange, Yel... Yes Menorah\n", + "sweater103 Yes No Yes No Yes Red, Yellow... Yes Sloth\n", + "sweater104 Yes Yes No No Yes Red, White,... Yes R2D2 wearin...\n", + "sweater105 Yes No No No No Red, Green,... No NaN\n", + "sweater107 Yes No No No Yes Red, Green,... Yes a llama wea...\n", "\n", "[68 rows x 8 columns]\n" ] } ], "source": [ - "import pandas as pd\n", - "\n", - "# Step 1 Load Data\n", - "data = pd.read_csv('ugly_sweaters.csv', index_col='sweater')\n", - "data = data[data['hs_tf'] == 'Yes']\n", + "# Filter to only include Holiday Sweaters\n", + "data_orig = df.loc[df.hs_tf == 'Yes']\n", + "# Make a copy (instead of working on the view) since we'll be changing\n", + "# col dtypes later and don't want to get SettingWithCopyWarning\n", + "data = data_orig.copy()\n", "print(data)" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inspect the data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " hs_tf sparkly noise lights objects \\\n", - "sweater \n", - "sweater1 Yes Yes No No No \n", - "sweater2 Yes No No No No \n", - "sweater3 Yes No No No No \n", - "sweater5 Yes No No No No \n", - "sweater8 Yes No Yes Yes Yes \n", - "... ... ... ... ... ... \n", - "sweater100 Yes Yes No No Yes \n", - "sweater103 Yes No Yes No Yes \n", - "sweater104 Yes Yes No No Yes \n", - "sweater105 Yes No No No No \n", - "sweater107 Yes No No No Yes \n", - "\n", - " colors image_tf \\\n", - "sweater \n", - "sweater1 [Red, Yellow, Blue, White, teal] Yes \n", - "sweater2 [Green] No \n", - "sweater3 [Red, Yellow, Green, Brown, White, Black] Yes \n", - "sweater5 [Blue, White, Black] Yes \n", - "sweater8 [Red, Green, Blue, Purple, White, Grey] No \n", - "... ... ... \n", - "sweater100 [Orange, Yellow, Blue] Yes \n", - "sweater103 [Red, Yellow, White, Black] Yes \n", - "sweater104 [Red, White, Black] Yes \n", - "sweater105 [Red, Green, Blue, Grey] No \n", - "sweater107 [Red, Green, White, Black] Yes \n", + "Index(['hs_tf', 'sparkly', 'noise', 'lights', 'objects', 'colors', 'image_tf', 'image_desc'], dtype='object')\n", "\n", - " image_desc num_colors num_words \n", - "sweater \n", - "sweater1 [octopus, dressed, like, santa] 5 4 \n", - "sweater2 NaN 1 1 \n", - "sweater3 [Houses] 6 1 \n", - "sweater5 [T-rex] 3 1 \n", - "sweater8 NaN 6 1 \n", - "... ... ... ... \n", - "sweater100 [Menorah] 3 1 \n", - "sweater103 [Sloth] 4 1 \n", - "sweater104 [R2D2, wearing, a, Santa, hat] 3 5 \n", - "sweater105 NaN 4 1 \n", - "sweater107 [a, llama, wearing, a, scarf] 4 5 \n", + "Data shape: (68, 8)\n", "\n", - "[68 rows x 10 columns]\n" + "Column dtypes:\n", + "hs_tf object\n", + "sparkly object\n", + "noise object\n", + "lights object\n", + "objects object\n", + "colors object\n", + "image_tf object\n", + "image_desc object\n", + "dtype: object\n" ] } ], "source": [ - "# Step 2 Wrangle Data\n", - "data['colors'] = data['colors'].str.split(',')\n", - "color_data = data.explode('colors').groupby('sweater').count()\n", - "data['num_colors'] = color_data['hs_tf']\n", + "# List the columns\n", + "print(data.columns)\n", "\n", - "data['image_desc'] = data['image_desc'].str.split(' ')\n", - "description_data = data.explode('image_desc').groupby('sweater').count()\n", + "# Show df shape\n", + "print(f\"\\nData shape: {data.shape}\")\n", "\n", - "data['num_words'] = description_data['hs_tf']\n", - "print(data)" + "# Show column datatypes (Numpy 'dtype')\n", + "print(\"\\nColumn dtypes:\")\n", + "print(data.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "object\n", + "sweater1\t \tRed, Yellow, Blue, White, teal\n", + "sweater2\t \tGreen\n", + "sweater3\t \tRed, Yellow, Green, Brown, White, Black\n", + "sweater5\t \tBlue, White, Black\n", + "sweater8\t \tRed, Green, Blue, Purple, White, Grey\n" + ] + } + ], + "source": [ + "# Inspect the 'colors' data column\n", + "print(data.colors.dtype)\n", + "for idx, val in data.colors.head().items():\n", + " print(f\"{idx}\\t {type(val)}\\t{val}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sweater1\t \toctopus dressed like santa\n", + "sweater2\t \tnan\n", + "sweater3\t \tHouses\n", + "sweater5\t \tT-rex\n", + "sweater8\t \tnan\n" + ] + } + ], + "source": [ + "# Inspect the 'image_desc' data column\n", + "# Note how this heterogeneous data... str and float (NaN)\n", + "for idx, val in data.image_desc.head().items():\n", + " print(f\"{idx}\\t {type(val)}\\t{val}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wrangle Data -- convert to long-form\n", + "Make data tidy and then analyze in a general way (using groupby + aggregations)" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hs_tfsparklynoiselightsobjectscolorsimage_tfimage_desc
sweater
sweater1YesYesNoNoNoRed, Yellow...Yesoctopus dre...
sweater2YesNoNoNoNoGreenNo
sweater3YesNoNoNoNoRed, Yellow...YesHouses
sweater5YesNoNoNoNoBlue, White...YesT-rex
sweater8YesNoYesYesYesRed, Green,...No
sweater11YesNoNoNoNoRed, Yellow...YesSanta Claus...
...........................
sweater99YesYesNoNoNoRed, Blue, ...YesReindeer
sweater100YesYesNoNoYesOrange, Yel...YesMenorah
sweater103YesNoYesNoYesRed, Yellow...YesSloth
sweater104YesYesNoNoYesRed, White,...YesR2D2 wearin...
sweater105YesNoNoNoNoRed, Green,...No
sweater107YesNoNoNoYesRed, Green,...Yesa llama wea...
\n", + "

68 rows × 8 columns

\n", + "
" + ], "text/plain": [ - "" + " hs_tf sparkly noise lights objects colors image_tf image_desc\n", + "sweater \n", + "sweater1 Yes Yes No No No Red, Yellow... Yes octopus dre...\n", + "sweater2 Yes No No No No Green No \n", + "sweater3 Yes No No No No Red, Yellow... Yes Houses\n", + "sweater5 Yes No No No No Blue, White... Yes T-rex\n", + "sweater8 Yes No Yes Yes Yes Red, Green,... No \n", + "sweater11 Yes No No No No Red, Yellow... Yes Santa Claus...\n", + "... ... ... ... ... ... ... ... ...\n", + "sweater99 Yes Yes No No No Red, Blue, ... Yes Reindeer\n", + "sweater100 Yes Yes No No Yes Orange, Yel... Yes Menorah\n", + "sweater103 Yes No Yes No Yes Red, Yellow... Yes Sloth\n", + "sweater104 Yes Yes No No Yes Red, White,... Yes R2D2 wearin...\n", + "sweater105 Yes No No No No Red, Green,... No \n", + "sweater107 Yes No No No Yes Red, Green,... Yes a llama wea...\n", + "\n", + "[68 rows x 8 columns]" ] }, - "execution_count": 50, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" - }, + } + ], + "source": [ + "# Refresh dataframe from original data\n", + "data = data_orig.copy()\n", + "\n", + "# Replace NaN with empty str\n", + "data.image_desc.fillna('', inplace=True)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Convert str to list of str" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ { "data": { - "image/png": "\n", "text/plain": [ - "
" + "sweater\n", + "sweater1 [Red, Yell...\n", + "sweater2 [Green]\n", + "sweater3 [Red, Yell...\n", + "sweater5 [Blue, Whi...\n", + "sweater8 [Red, Gree...\n", + "sweater11 [Red, Yell...\n", + " ... \n", + "sweater99 [Red, Blue...\n", + "sweater100 [Orange, Y...\n", + "sweater103 [Red, Yell...\n", + "sweater104 [Red, Whit...\n", + "sweater105 [Red, Gree...\n", + "sweater107 [Red, Gree...\n", + "Name: colors_ls, Length: 68, dtype: object" ] }, - "metadata": { - "needs_background": "light" + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert 'colors' column from single comma-delim str to list\n", + "# of string\n", + "data['colors_ls'] = data.colors.str.split(',')\n", + "data.colors_ls" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sweater\n", + "sweater1 [octopus, d...\n", + "sweater2 []\n", + "sweater3 [Houses]\n", + "sweater5 [T-rex]\n", + "sweater8 []\n", + "sweater11 [Santa, Cla...\n", + " ... \n", + "sweater99 [Reindeer]\n", + "sweater100 [Menorah]\n", + "sweater103 [Sloth]\n", + "sweater104 [R2D2, wear...\n", + "sweater105 []\n", + "sweater107 [a, llama, ...\n", + "Name: image_desc_ls, Length: 68, dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert 'image_desc' column from single space-delim str to list\n", + "data['image_desc_ls'] = data.image_desc.str.split()\n", + "data.image_desc_ls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tidy & analyze 'colors'" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hs_tfsparklynoiselightsobjectscolorsimage_tfimage_desccolors_lsimage_desc_ls
sweater
sweater1YesYesNoNoNoRed, Yellow...Yesoctopus dre...Red[octopus, d...
sweater1YesYesNoNoNoRed, Yellow...Yesoctopus dre...Yellow[octopus, d...
sweater1YesYesNoNoNoRed, Yellow...Yesoctopus dre...Blue[octopus, d...
sweater1YesYesNoNoNoRed, Yellow...Yesoctopus dre...White[octopus, d...
sweater1YesYesNoNoNoRed, Yellow...Yesoctopus dre...teal[octopus, d...
sweater2YesNoNoNoNoGreenNoGreen[]
.................................
sweater105YesNoNoNoNoRed, Green,...NoBlue[]
sweater105YesNoNoNoNoRed, Green,...NoGrey[]
sweater107YesNoNoNoYesRed, Green,...Yesa llama wea...Red[a, llama, ...
sweater107YesNoNoNoYesRed, Green,...Yesa llama wea...Green[a, llama, ...
sweater107YesNoNoNoYesRed, Green,...Yesa llama wea...White[a, llama, ...
sweater107YesNoNoNoYesRed, Green,...Yesa llama wea...Black[a, llama, ...
\n", + "

273 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " hs_tf sparkly noise lights objects colors image_tf image_desc colors_ls image_desc_ls\n", + "sweater \n", + "sweater1 Yes Yes No No No Red, Yellow... Yes octopus dre... Red [octopus, d...\n", + "sweater1 Yes Yes No No No Red, Yellow... Yes octopus dre... Yellow [octopus, d...\n", + "sweater1 Yes Yes No No No Red, Yellow... Yes octopus dre... Blue [octopus, d...\n", + "sweater1 Yes Yes No No No Red, Yellow... Yes octopus dre... White [octopus, d...\n", + "sweater1 Yes Yes No No No Red, Yellow... Yes octopus dre... teal [octopus, d...\n", + "sweater2 Yes No No No No Green No Green []\n", + "... ... ... ... ... ... ... ... ... ... ...\n", + "sweater105 Yes No No No No Red, Green,... No Blue []\n", + "sweater105 Yes No No No No Red, Green,... No Grey []\n", + "sweater107 Yes No No No Yes Red, Green,... Yes a llama wea... Red [a, llama, ...\n", + "sweater107 Yes No No No Yes Red, Green,... Yes a llama wea... Green [a, llama, ...\n", + "sweater107 Yes No No No Yes Red, Green,... Yes a llama wea... White [a, llama, ...\n", + "sweater107 Yes No No No Yes Red, Green,... Yes a llama wea... Black [a, llama, ...\n", + "\n", + "[273 rows x 10 columns]" + ] }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Tidy colors data (1 color per row)\n", + "df_colors = data.explode('colors_ls')\n", + "df_colors" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sweater\n", + "sweater1 5\n", + "sweater2 1\n", + "sweater3 6\n", + "sweater5 3\n", + "sweater8 6\n", + "sweater11 8\n", + " ..\n", + "sweater99 4\n", + "sweater100 3\n", + "sweater103 4\n", + "sweater104 3\n", + "sweater105 4\n", + "sweater107 4\n", + "Name: num_colors, Length: 68, dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calc how many colors for each sweater & add to dataframe\n", + "data['num_colors'] = df_colors.groupby('sweater', sort=False).colors_ls.count()\n", + "data.num_colors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tidy & analyze 'image_desc'" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hs_tfsparklynoiselightsobjects...image_tfimage_desccolors_lsimage_desc_lsnum_colors
sweater
sweater1YesYesNoNoNo...Yesoctopus dre...[Red, Yell...octopus5
sweater1YesYesNoNoNo...Yesoctopus dre...[Red, Yell...dressed5
sweater1YesYesNoNoNo...Yesoctopus dre...[Red, Yell...like5
sweater1YesYesNoNoNo...Yesoctopus dre...[Red, Yell...santa5
sweater2YesNoNoNoNo...No[Green]NaN1
sweater3YesNoNoNoNo...YesHouses[Red, Yell...Houses6
....................................
sweater105YesNoNoNoNo...No[Red, Gree...NaN4
sweater107YesNoNoNoYes...Yesa llama wea...[Red, Gree...a4
sweater107YesNoNoNoYes...Yesa llama wea...[Red, Gree...llama4
sweater107YesNoNoNoYes...Yesa llama wea...[Red, Gree...wearing4
sweater107YesNoNoNoYes...Yesa llama wea...[Red, Gree...a4
sweater107YesNoNoNoYes...Yesa llama wea...[Red, Gree...scarf4
\n", + "

278 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " hs_tf sparkly noise lights objects ... image_tf image_desc colors_ls image_desc_ls num_colors\n", + "sweater ... \n", + "sweater1 Yes Yes No No No ... Yes octopus dre... [Red, Yell... octopus 5\n", + "sweater1 Yes Yes No No No ... Yes octopus dre... [Red, Yell... dressed 5\n", + "sweater1 Yes Yes No No No ... Yes octopus dre... [Red, Yell... like 5\n", + "sweater1 Yes Yes No No No ... Yes octopus dre... [Red, Yell... santa 5\n", + "sweater2 Yes No No No No ... No [Green] NaN 1\n", + "sweater3 Yes No No No No ... Yes Houses [Red, Yell... Houses 6\n", + "... ... ... ... ... ... ... ... ... ... ... ...\n", + "sweater105 Yes No No No No ... No [Red, Gree... NaN 4\n", + "sweater107 Yes No No No Yes ... Yes a llama wea... [Red, Gree... a 4\n", + "sweater107 Yes No No No Yes ... Yes a llama wea... [Red, Gree... llama 4\n", + "sweater107 Yes No No No Yes ... Yes a llama wea... [Red, Gree... wearing 4\n", + "sweater107 Yes No No No Yes ... Yes a llama wea... [Red, Gree... a 4\n", + "sweater107 Yes No No No Yes ... Yes a llama wea... [Red, Gree... scarf 4\n", + "\n", + "[278 rows x 11 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Tidy image_desc data (1 word per row)\n", + "df_image_desc = data.explode('image_desc_ls')\n", + "df_image_desc" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sweater\n", + "sweater1 4\n", + "sweater2 0\n", + "sweater3 1\n", + "sweater5 1\n", + "sweater8 0\n", + "sweater11 10\n", + " ..\n", + "sweater99 1\n", + "sweater100 1\n", + "sweater103 1\n", + "sweater104 5\n", + "sweater105 0\n", + "sweater107 5\n", + "Name: num_words, Length: 68, dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calc how many description words for each sweater & add to dataframe\n", + "data['num_words'] = df_image_desc.groupby('sweater', sort=False).image_desc_ls.count()\n", + "data.num_words" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inspect results" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hs_tfsparklynoiselightsobjects...image_desccolors_lsimage_desc_lsnum_colorsnum_words
sweater
sweater1YesYesNoNoNo...octopus dre...[Red, Yell...[octopus, d...54
sweater2YesNoNoNoNo...[Green][]10
sweater3YesNoNoNoNo...Houses[Red, Yell...[Houses]61
sweater5YesNoNoNoNo...T-rex[Blue, Whi...[T-rex]31
sweater8YesNoYesYesYes...[Red, Gree...[]60
sweater11YesNoNoNoNo...Santa Claus...[Red, Yell...[Santa, Cla...810
....................................
sweater99YesYesNoNoNo...Reindeer[Red, Blue...[Reindeer]41
sweater100YesYesNoNoYes...Menorah[Orange, Y...[Menorah]31
sweater103YesNoYesNoYes...Sloth[Red, Yell...[Sloth]41
sweater104YesYesNoNoYes...R2D2 wearin...[Red, Whit...[R2D2, wear...35
sweater105YesNoNoNoNo...[Red, Gree...[]40
sweater107YesNoNoNoYes...a llama wea...[Red, Gree...[a, llama, ...45
\n", + "

68 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " hs_tf sparkly noise lights objects ... image_desc colors_ls image_desc_ls num_colors num_words\n", + "sweater ... \n", + "sweater1 Yes Yes No No No ... octopus dre... [Red, Yell... [octopus, d... 5 4\n", + "sweater2 Yes No No No No ... [Green] [] 1 0\n", + "sweater3 Yes No No No No ... Houses [Red, Yell... [Houses] 6 1\n", + "sweater5 Yes No No No No ... T-rex [Blue, Whi... [T-rex] 3 1\n", + "sweater8 Yes No Yes Yes Yes ... [Red, Gree... [] 6 0\n", + "sweater11 Yes No No No No ... Santa Claus... [Red, Yell... [Santa, Cla... 8 10\n", + "... ... ... ... ... ... ... ... ... ... ... ...\n", + "sweater99 Yes Yes No No No ... Reindeer [Red, Blue... [Reindeer] 4 1\n", + "sweater100 Yes Yes No No Yes ... Menorah [Orange, Y... [Menorah] 3 1\n", + "sweater103 Yes No Yes No Yes ... Sloth [Red, Yell... [Sloth] 4 1\n", + "sweater104 Yes Yes No No Yes ... R2D2 wearin... [Red, Whit... [R2D2, wear... 3 5\n", + "sweater105 Yes No No No No ... [Red, Gree... [] 4 0\n", + "sweater107 Yes No No No Yes ... a llama wea... [Red, Gree... [a, llama, ... 4 5\n", + "\n", + "[68 rows x 12 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Show whole dataframe\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
num_colorsnum_words
sweater
sweater154
sweater210
sweater361
sweater531
sweater860
sweater11810
.........
sweater9941
sweater10031
sweater10341
sweater10435
sweater10540
sweater10745
\n", + "

68 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " num_colors num_words\n", + "sweater \n", + "sweater1 5 4\n", + "sweater2 1 0\n", + "sweater3 6 1\n", + "sweater5 3 1\n", + "sweater8 6 0\n", + "sweater11 8 10\n", + "... ... ...\n", + "sweater99 4 1\n", + "sweater100 3 1\n", + "sweater103 4 1\n", + "sweater104 3 5\n", + "sweater105 4 0\n", + "sweater107 4 5\n", + "\n", + "[68 rows x 2 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Inspect calculated columns only\n", + "data[['num_colors', 'num_words']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualize\n", + "There are several ways to visualize our results... most are built on top of the Matplotlib package." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Change default plot size\n", + "plt.rcParams['figure.figsize'] = (12, 8)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pandas' plotter tools" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA+UAAAKnCAYAAADgJOxZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBI0lEQVR4nO3de3hV9Z3o/88GIhIkEZKAULlKvE5RvA4GUafWy2l1+Gk7leNUrbbT6UEZtdVKz1gv1Wrbp44tWnuzXmqh1el4OZ5ptVIhQm2LQqzWFoPc64UkQCJJBST794clh3CJIYR8d3Zer+fZz9N8V7L54OreO2/22mtlstlsNgAAAIAu1yv1AAAAANBTiXIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACCRPqkH2Nuam5vj9ddfjwEDBkQmk0k9DgAAAHkum83G22+/HcOGDYtevdp+Lzzvo/z111+P4cOHpx4DAACAHmbVqlVx4IEHtvk9eR/lAwYMiIj3/mMUFRUlngYAAIB819DQEMOHD2/p0bbkfZRvPWS9qKhIlAMAANBl2vMRaid6AwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIpE/qAQA629KaDbFibVOMKukfo0v7px4HAAB2SZQDeWN906aYNqsqKqtrWtYmlZfFjCnjo7iwIOFkAACwcw5fB/LGtFlVMX9Jbau1+Utq4/JZixJNBAAAbRPlQF5YWrMhKqtrYks222p9SzYbldU1say2MdFkAACwa6IcyAsr1ja1uX15nSgHACD3iHIgL4wcVNjm9lElTvgGAEDuEeVAXhhTtl9MKi+L3plMq/XemUxMKi9zFnYAAHKSKAfyxowp46NibGmrtYqxpTFjyvhEEwEAQNtcEg3IG8WFBfHApcfHstrGWF7X6DrlAADkPFEO5J3RpWIcAIDuweHrAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIkmj/NZbb43jjjsuBgwYEIMHD47JkyfH4sWLW33PO++8E1OnTo2SkpLYb7/94rzzzou33nor0cQAAADQeZJG+dy5c2Pq1Knx29/+Nn71q1/F5s2b4/TTT4/GxsaW77nyyivj//yf/xMPP/xwzJ07N15//fU499xzE04NAAAAnSOTzWazqYfYqqamJgYPHhxz586NSZMmRX19fZSVlcXMmTPjYx/7WERE/PnPf47DDjssnnvuufj7v//7973PhoaGKC4ujvr6+igqKtrbfwUAAAB6uN3p0Jz6THl9fX1ERAwaNCgiIl544YXYvHlznHbaaS3fc+ihh8aIESPiueee2+l9bNy4MRoaGlrdAAAAIBflTJQ3NzfHFVdcERUVFfF3f/d3ERHx5ptvxj777BP7779/q+8dMmRIvPnmmzu9n1tvvTWKi4tbbsOHD9/bowMAAECH5EyUT506NV5++eX46U9/ukf3M3369Kivr2+5rVq1qpMmBAAAgM7VJ/UAERGXXXZZPPHEE1FZWRkHHnhgy/oBBxwQmzZtivXr17d6t/ytt96KAw44YKf31bdv3+jbt+/eHhkAAAD2WNJ3yrPZbFx22WXxyCOPxK9//esYPXp0q+3HHHNMFBQUxOzZs1vWFi9eHCtXrowJEyZ09bgAAADQqZK+Uz516tSYOXNmPPbYYzFgwICWz4kXFxdHv379ori4OC699NK46qqrYtCgQVFUVBSXX355TJgwoV1nXgcAAIBclvSSaJlMZqfr9957b1x88cUREfHOO+/E5z//+Zg1a1Zs3LgxzjjjjPjOd76zy8PXt+eSaAAAAHSl3enQnLpO+d4gygEAAOhK3fY65QAAANCTiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgET6pB4AoLMtrdkQK9Y2xaiS/jG6tH/qcQAAYJdEOZA31jdtimmzqqKyuqZlbVJ5WcyYMj6KCwsSTgYAADvn8HUgb0ybVRXzl9S2Wpu/pDYun7Uo0UQAANA2UQ7khaU1G6Kyuia2ZLOt1rdks1FZXRPLahsTTQYAALsmyoG8sGJtU5vbl9eJcgAAco8oB/LCyEGFbW4fVeKEbwAA5B5RDuSFMWX7xaTysuidybRa753JxKTyMmdhBwAgJ4lyIG/MmDI+KsaWtlqrGFsaM6aMTzQRAAC0zSXRgLxRXFgQD1x6fCyrbYzldY2uUw4AQM4T5UDeGV0qxgEA6B4cvg4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgESSRnllZWWcffbZMWzYsMhkMvHoo4+22n7xxRdHJpNpdTvzzDPTDAsAPczSmg3xzOI1say2MfUoAJC3+qT8wxsbG+PII4+MSy65JM4999ydfs+ZZ54Z9957b8vXffv27arxAKBHWt+0KabNqorK6pqWtUnlZTFjyvgoLixIOBkA5J+kUX7WWWfFWWed1eb39O3bNw444IAumggAmDarKuYvqW21Nn9JbVw+a1E8cOnxiaYCgPyU858pnzNnTgwePDgOOeSQ+NznPhd1dXVtfv/GjRujoaGh1Q0AaJ+lNRuisromtmSzrda3ZLNRWV3jUHYA6GQ5HeVnnnlmPPDAAzF79uz42te+FnPnzo2zzjortmzZssufufXWW6O4uLjlNnz48C6cGAC6txVrm9rcvrxOlANAZ0p6+Pr7Of/881v+9wc/+MEYN25cHHTQQTFnzpz40Ic+tNOfmT59elx11VUtXzc0NAhzAGinkYMK29w+qqR/F00CAD1DTr9Tvr0xY8ZEaWlpLFmyZJff07dv3ygqKmp1AwDaZ0zZfjGpvCx6ZzKt1ntnMjGpvCxGl4pyAOhM3SrKV69eHXV1dTF06NDUowBA3poxZXxUjC1ttVYxtjRmTBmfaCIAyF9JD1/fsGFDq3e9ly1bFlVVVTFo0KAYNGhQ3HjjjXHeeefFAQccEK+99lpcc801MXbs2DjjjDMSTg0A+a24sCAeuPT4WFbbGMvrGmNUSX/vkAPAXpLJZrc7vWoXmjNnTpx66qk7rF900UVx9913x+TJk2PRokWxfv36GDZsWJx++unxla98JYYMGdLuP6OhoSGKi4ujvr7eoewAAADsdbvToUmjvCuIcgAAALrS7nRot/pMOQAAAOQTUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkEif1AMAdLalNRtixdqmGFXSP0aX9k89DnRbHksAsPeJciBvrG/aFNNmVUVldU3L2qTyspgxZXwUFxYknAy6F48lAOg6Dl8H8sa0WVUxf0ltq7X5S2rj8lmLEk0E3ZPHEgB0HVEO5IWlNRuisromtmSzrda3ZLNRWV0Ty2obE00G3YvHEgB0LVEO5IUVa5va3L68TkhAe3gsAUDX6lCUL1y4MF566aWWrx977LGYPHlyfOlLX4pNmzZ12nAA7TVyUGGb20eVOEkVtIfHEgB0rQ5F+Wc/+9l49dVXIyJi6dKlcf7550dhYWE8/PDDcc0113TqgADtMaZsv5hUXha9M5lW670zmZhUXubM0dBOHksA0LU6FOWvvvpqHHXUURER8fDDD8ekSZNi5syZcd9998XPf/7zzpwPoN1mTBkfFWNLW61VjC2NGVPGJ5oIuiePJQDoOh26JFo2m43m5uaIiHj66afjox/9aEREDB8+PGpra9v6UYC9priwIB649PhYVtsYy+saXVsZOshjCQC6Toei/Nhjj42bb745TjvttJg7d27cfffdERGxbNmyGDJkSKcOCLC7RpcKCOgMHksAsPd16PD1O+64IxYuXBiXXXZZ/O///b9j7NixERHxn//5n3HiiSd26oAAAACQrzLZ7HYXIt0D77zzTvTu3TsKCgo66y73WENDQxQXF0d9fX0UFRWlHgcAAIA8tzsd2qHD13dl33337cy7AwAAgLzW7igfOHBgZLa7PMqurF27tsMDAQAAQE/R7ii/4447Wv53XV1d3HzzzXHGGWfEhAkTIiLiueeeiyeffDKuu+66Th8SAAAA8lGHPlN+3nnnxamnnhqXXXZZq/U777wznn766Xj00Uc7a7495jPlAAAAdKXd6dAOnX39ySefjDPPPHOH9TPPPDOefvrpjtwlAAAA9DgdivKSkpJ47LHHdlh/7LHHoqSkZI+HAgAAgJ6gQ2dfv/HGG+PTn/50zJkzJ0444YSIiPjd734Xv/zlL+MHP/hBpw4IAAAA+apDUX7xxRfHYYcdFt/+9rfjv/7rvyIi4rDDDot58+a1RDoAAADQtt2O8s2bN8dnP/vZuO666+InP/nJ3pgJAAAAeoTd/kx5QUFB/PznP98bswAAAECP0qETvU2ePDmnLnsGAAAA3VGHPlNeXl4eN910U8yfPz+OOeaY6N+/f6vt06ZN65ThAAAAIJ9lstlsdnd/aPTo0bu+w0wmli5dukdDdabduWg7AAB0trmL10TV6vVx9IiBcVJ5WepxgC6wOx3aoXfKly1b1qHBAACgp1hR1xiT75of65o2t6wNLCyIx6dOjOElhQknA3JJhz5Tvq1sNhsdeLMdAADy2vZBHhGxrmlznHPXvEQTAbmow1H+wAMPxAc/+MHo169f9OvXL8aNGxc//vGPO3M2AADoluYuXrNDkG+1rmlzPFtd08UTAbmqQ4ev33777XHdddfFZZddFhUVFRERMW/evPjXf/3XqK2tjSuvvLJThwQAgO6kavX6NrcvXLnO58uBiOhglM+YMSPuvvvuuPDCC1vWzjnnnDjiiCPihhtuEOUAAPRoRx24f5vbjx4xsGsGAXJehw5ff+ONN+LEE0/cYf3EE0+MN954Y4+HAgCA7uzkQwbHwMKCnW4bWFjgXXKgRYeifOzYsfHQQw/tsP6zn/0sysvL93goAADo7h6fOnGHMN969nWArTp0+PqNN94Yn/jEJ6KysrLlM+Xz58+P2bNn7zTWAQCgpxleUhiLvnx6PFtdEwtXrnOdcmCnMtkOXs/shRdeiP/4j/+IP/3pTxERcdhhh8XnP//5GD9+fKcOuKd256LtAAAAsKd2p0M7HOXdhSgHAACgK+1Oh3boM+UXXnhh3HvvvbF06dIODQgAAAB0MMr32WefuPXWW2Ps2LExfPjw+Od//uf44Q9/GNXV1Z09HwAAAOStPTp8/S9/+UtUVlbG3LlzY+7cufHqq6/G0KFDY/Xq1Z054x5x+DoAAABdaa8fvr7VwIEDo6SkJAYOHBj7779/9OnTJ8rKnFESAAAA2qNDUf6lL30pTjzxxCgpKYlrr7023nnnnbj22mvjzTffjEWLFnX2jAAAAJCXOnT4eq9evaKsrCyuvPLKOPfcc+Pggw/eG7N1CoevAwAA0JV2p0P7dOQPWLRoUcydOzfmzJkT3/zmN2OfffaJk08+OU455ZQ45ZRTcjrSAQAAIFd0ynXKX3zxxfiP//iP+MlPfhLNzc2xZcuWzpitU3inHAAAgK60198pz2azsWjRopgzZ07MmTMn5s2bFw0NDTFu3Lg4+eSTOzQ0AAAA9DQdivJBgwbFhg0b4sgjj4yTTz45PvOZz8RJJ50U+++/fyePBwAAAPmrQ1H+4IMPxkknnfS+b8OvXr06hg0bFr167dGV1wAAACAvdaiWP/KRj7Tr89mHH354LF++vCN/BAAAAOS9vfoWdiecQw4AAADyluPKAQAAIBFRDgAAAImIcgAAAEhkr0Z5JpPZm3cPAAAA3ZoTvQEAAEAiHbpOeXu98sorMWzYsL35RwAAAEC31aEof+edd2LGjBnxzDPPxJo1a6K5ubnV9oULF0ZExPDhw/d8QgAAAMhTHYrySy+9NJ566qn42Mc+Fscff7zPjgMAAEAHdCjKn3jiifjv//7vqKio6Ox5AAAAoMfo0InePvCBD8SAAQM6exYAAADoUToU5d/85jfji1/8YqxYsaKz5wEAAIAeo0OHrx977LHxzjvvxJgxY6KwsDAKCgpabV+7dm2nDAcAAAD5rENRPmXKlPjLX/4SX/3qV2PIkCFO9AYAAAAd0KEo/81vfhPPPfdcHHnkkZ09DwAAAPQYHfpM+aGHHhp//etfO3sWAAAA6FE6FOW33XZbfP7zn485c+ZEXV1dNDQ0tLoBAAAA7y+TzWazu/tDvXq91/Lbf5Y8m81GJpOJLVu2dM50naChoSGKi4ujvr4+ioqKUo8DAABAntudDu3QZ8qfeeaZDg0GAAAA/D8divKTTz65s+cAAACAHqdDUV5ZWdnm9kmTJnVoGAAAAOhJOhTlp5xyyg5r236+PJc+Uw4AAAC5qkNnX1+3bl2r25o1a+KXv/xlHHfccfHUU0919owAAACQlzr0TnlxcfEOax/+8Idjn332iauuuipeeOGFPR4MAAAA8l2H3inflSFDhsTixYs78y4BAAAgb3XonfI//OEPrb7OZrPxxhtvxG233RZHHXVUZ8wFAAAAea9D75QfddRRMX78+DjqqKNa/vf/+B//IzZt2hQ//OEP230/lZWVcfbZZ8ewYcMik8nEo48+2mp7NpuNL3/5yzF06NDo169fnHbaaVFdXd2RkQGA3bS0ZkM8s3hNLKttTD0KwF7nOY9UOvRO+bJly1p93atXrygrK4t99913t+6nsbExjjzyyLjkkkvi3HPP3WH717/+9fj2t78d999/f4wePTquu+66OOOMM+KVV17Z7T8LAGif9U2bYtqsqqisrmlZm1ReFjOmjI/iwoKEkwF0Ps95pJbJZrPZjvzg7NmzY/bs2bFmzZpobm5ute1HP/rR7g+SycQjjzwSkydPjoj33iUfNmxYfP7zn48vfOELERFRX18fQ4YMifvuuy/OP//8dt1vQ0NDFBcXR319fRQVFe32XADQ01x4z+9j/pLa2LLNrwi9M5moGFsaD1x6fMLJADqf5zz2ht3p0A4dvn7jjTfG6aefHrNnz47a2todLpHWGZYtWxZvvvlmnHbaaS1rxcXFccIJJ8Rzzz23y5/buHFjNDQ0tLoBAO2ztGZDVFbXtPrlNCJiSzYbldU1DusE8ornPHJBhw5f/+53vxv33XdffPKTn+zseVq8+eabEfHeGd23NWTIkJZtO3PrrbfGjTfeuNfmAoB8tmJtU5vbl9c1xujS/l00DcDe5TmPXNChd8o3bdoUJ554YmfP0immT58e9fX1LbdVq1alHgkAuo2Rgwrb3D6qxC+nQP7wnEcu6FCUf/rTn46ZM2d29iytHHDAARER8dZbb7Vaf+utt1q27Uzfvn2jqKio1Q0AaJ8xZfvFpPKy6J3JtFrvncnEpPIy7xgBecVzHrmgQ4evv/POO/H9738/nn766Rg3blwUFLQ+K+Htt9++x4ONHj06DjjggJg9e3bLtc8bGhrid7/7XXzuc5/b4/sHAHZuxpTxcfmsRa3ORFwxtjRmTBmfcCqAvcNzHql1KMr/8Ic/tITyyy+/3GpbZrt/ZWrLhg0bYsmSJS1fL1u2LKqqqmLQoEExYsSIuOKKK+Lmm2+O8vLylkuiDRs2rOUM7QBA5ysuLIgHLj0+ltU2xvK6xhhV0t+7RUDe8pxHah2+JFpnmDNnTpx66qk7rF900UVx3333RTabjeuvvz6+//3vx/r162PixInxne98Jw4++OB2/xkuiQYAAEBX2p0OTRrlXUGUAwAA0JX2+nXKAQAAgD0nygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIn1SDwAAQMfNXbwmqlavj6NHDIyTystSjwPAbhLlAADd0Iq6xph81/xY17S5ZW1gYUE8PnViDC8pTDgZALvD4esAAN3Q9kEeEbGuaXOcc9e8RBMB0BGiHACgm5m7eM0OQb7VuqbN8Wx1TRdPBEBHiXIAgG6mavX6NrcvXLmuawYBYI+JcgCAbuaoA/dvc/vRIwZ2zSAA7DFRDgDQzZx8yOAYWFiw020DCwuchR2gGxHlAADd0ONTJ+4Q5lvPvg5A9+GSaAAA3dDwksJY9OXT49nqmli4cp3rlAN0U6IcAKAbO6m8TIwDdGMOXwcAAIBERDkAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABIR5QAAAJCIKAcAAIBERDkAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABLpk3oAgM62tGZDrFjbFKNK+sfo0v6px2En7CMAgPeIciBvrG/aFNNmVUVldU3L2qTyspgxZXwUFxYknIyt7CMAgNYcvg7kjWmzqmL+ktpWa/OX1MblsxYlmojt2UcAAK2JciAvLK3ZEJXVNbElm221viWbjcrqmlhW25hoMrayjwAAdiTKgbywYm1Tm9uX1wm+1OwjAIAdiXIgL4wcVNjm9lElTiaWmn0EALAjUQ7khTFl+8Wk8rLoncm0Wu+dycSk8jJn+M4B9hEAwI5EOZA3ZkwZHxVjS1utVYwtjRlTxieaiO3ZRwAArWWy2e3OuJNnGhoaori4OOrr66OoqCj1OEAXWFbbGMvrGl0DO4fZRwBAPtudDnWdciDvjC4VernOPgIAeI/D1wEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkEif1AMAALlpac2GWLG2KUaV9I/Rpf1TjwPdlscS0BZRDgC0sr5pU0ybVRWV1TUta5PKy2LGlPFRXFiQcDLoXjyWgPZw+DoA0Mq0WVUxf0ltq7X5S2rj8lmLEk0E3ZPHEtAeohwAaLG0ZkNUVtfElmy21fqWbDYqq2tiWW1josmge/FYAtpLlAMALVasbWpz+/I6IQHt4bEEtJcoBwBajBxU2Ob2USVOUgXt4bEEtJcoBwBajCnbLyaVl0XvTKbVeu9MJiaVlzlzNLSTxxLQXqIcAGhlxpTxUTG2tNVaxdjSmDFlfKKJoHvyWALaI5PNbnf2iTzT0NAQxcXFUV9fH0VFRanHAYBuY1ltYyyva3RtZdhDHkvQ8+xOh7pOOQCwU6NLBQR0Bo8loC0OXwcAAIBERDkAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABIR5QAAAJCIKAcAAIBERDkAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABLJ+Si/4YYbIpPJtLodeuihqceih1pasyGeWbwmltU2ph6FNthPuW/u4jXxrdmvxrPVNalHoQ0eSwDkmnx8beqTeoD2OOKII+Lpp59u+bpPn24xNnlkfdOmmDarKiq3CYhJ5WUxY8r4KC4sSDgZ27Kfct+KusaYfNf8WNe0uWVtYGFBPD51YgwvKUw4GdvyWAIg1+Tza1POv1Me8V6EH3DAAS230tLS1CPRw0ybVRXzl9S2Wpu/pDYun7Uo0UTsjP2U+7YP8oiIdU2b45y75iWaiJ3xWAIg1+Tza1O3iPLq6uoYNmxYjBkzJi644IJYuXLlLr9348aN0dDQ0OoGe2JpzYaorK6JLdlsq/Ut2WxUVtfk1aEz3Zn9lPvmLl6zQ5Bvta5ps0PZc4THEgC5Jt9fm3I+yk844YS477774pe//GXcfffdsWzZsjjppJPi7bff3un333rrrVFcXNxyGz58eBdPTL5Zsbapze3L67r3k0C+sJ9yX9Xq9W1uX7hyXdcMQps8lgDINfn+2pTzUX7WWWfFxz/+8Rg3blycccYZ8d///d+xfv36eOihh3b6/dOnT4/6+vqW26pVq7p4YvLNyEFtf851VEn/LpqEtthPue+oA/dvc/vRIwZ2zSC0yWMJgFyT769NOR/l29t///3j4IMPjiVLlux0e9++faOoqKjVDfbEmLL9YlJ5WfTOZFqt985kYlJ5WYwu7d5PAvnCfsp9Jx8yOAbu4kQsAwsL4qTysi6eiJ3xWAIg1+T7a1O3i/INGzbEa6+9FkOHDk09Cj3IjCnjo2Js6xMMVowtjRlTxieaiJ2xn3Lf41Mn7hDmW8++Tu7wWAIg1+Tza1Mmm93u0/I55gtf+EKcffbZMXLkyHj99dfj+uuvj6qqqnjllVeirOz931VpaGiI4uLiqK+v9645e2xZbWMsr2uMUSX9u/2/yOUz+yn3PVtdEwtXroujRwz0DnkO81gCINd0l9em3enQnI/y888/PyorK6Ouri7Kyspi4sSJccstt8RBBx3Urp8X5QAAAHSl3enQPl00U4f99Kc/TT0CAAAA7BXd7jPlAAAAkC9EOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIpE/qAQA629zFa6Jq9fo4esTAOKm8LPU40G0trdkQK9Y2xaiS/jG6tH/qcdgF+wmgexPlQN5YUdcYk++aH+uaNresDSwsiMenTozhJYUJJ4PuZX3Tppg2qyoqq2ta1iaVl8WMKeOjuLAg4WRsy34CyA8OXwfyxvZBHhGxrmlznHPXvEQTQfc0bVZVzF9S22pt/pLauHzWokQTsTP2E0B+EOVAXpi7eM0OQb7VuqbN8ew27yQBu7a0ZkNUVtfElmy21fqWbDYqq2tiWW1josnYlv0EkD9EOZAXqlavb3P7wpXrumYQ6OZWrG1qc/vyOrGXC+wngPwhyoG8cNSB+7e5/egRA7tmEOjmRg5q+/wLo0qcSCwX2E8A+UOUA3nh5EMGx8BdnNhoYGGBs7BDO40p2y8mlZdF70ym1XrvTCYmlZc5u3eOsJ8A8ocoB/LG41Mn7hDmW8++DrTfjCnjo2Jsaau1irGlMWPK+EQTsTP2E0B+yGSz250hJM80NDREcXFx1NfXR1FRUepxgC7wbHVNLFy5znXKYQ8tq22M5XWNrn+d4+wngNyzOx0qygEAAKAT7U6HOnwdAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIpE/qAaA7mbt4TVStXh9HjxgYJ5WXpR6HXVhasyFWrG2KUSX9Y3Rp/9TjsBP2EdCTeM4D2iLKoR1W1DXG5Lvmx7qmzS1rAwsL4vGpE2N4SWHCydjW+qZNMW1WVVRW17SsTSovixlTxkdxYUHCydjKPgJ6Es95QHs4fB3aYfsgj4hY17Q5zrlrXqKJ2Jlps6pi/pLaVmvzl9TG5bMWJZqI7dlHQE/iOQ9oD1EO72Pu4jU7BPlW65o2x7Pb/Os36Syt2RCV1TWxJZtttb4lm43K6ppYVtuYaDK2so+AnsRzHtBeohzeR9Xq9W1uX7hyXdcMQptWrG1qc/vyOr/8pGYfAT2J5zygvUQ5vI+jDty/ze1HjxjYNYPQppGD2v5s/6gSJ9ZJzT4CehLPeUB7iXJ4HycfMjgG7uJkLAMLC5yFPUeMKdsvJpWXRe9MptV670wmJpWXOdttDrCPgJ7Ecx7QXqIc2uHxqRN3CPOtZ18nd8yYMj4qxpa2WqsYWxozpoxPNBHbs4+AnsRzHtAemWx2u7NP5JmGhoYoLi6O+vr6KCoqSj0O3dyz1TWxcOU61ynPcctqG2N5XaPrweYw+wjoSTznQc+zOx0qygEAAKAT7U6HOnwdAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACARUQ4AAACJ9Ek9AEBnu+nxP8ZvltbGxLFl8e8fPTz1OAB71dzFa6Jq9fo4esTAOKm8LPU47MTSmg2xYm1TjCrpH6NL+6ceB8gxohzIG0+9/Eb8y4MLW77+85sb4ofzlsU9Fx4bHzp8SMLJADrfirrGmHzX/FjXtLllbWBhQTw+dWIMLylMOBlbrW/aFNNmVUVldU3L2qTyspgxZXwUFxYknAzIJQ5fB/LGtkG+rUsfeL6LJwHY+7YP8oiIdU2b45y75iWaiO1Nm1UV85fUtlqbv6Q2Lp+1KNFEQC4S5UBeuOnxP7a5/eYnXumiSQD2vrmL1+wQ5Futa9ocz27zzixpLK3ZEJXVNbElm221viWbjcrqmlhW25hoMiDXiHIgL/xmaW2b2+ct8QsqkD+qVq9vc/vCleu6ZhB2acXapja3L68T5cB7RDmQF04cU9rm9oljnfwIyB9HHbh/m9uPHjGwawZhl0YOavtz/aNKnPANeI8oB/LCl885os3tzsIO5JOTDxkcA3dxorCBhQXOwp4DxpTtF5PKy6J3JtNqvXcmE5PKy5yFHWghyoG8cc+Fx+7WOkB39vjUiTuE+dazr5MbZkwZHxVjWx/JVTG2NGZMGZ9oIiAXZbLZ7c4+kWcaGhqiuLg46uvro6ioKPU4QBe4+YlXYt6SGtcpB3qEZ6trYuHKda5TnsOW1TbG8rpG1ymHHmR3OlSUAwAAQCfanQ51+DoAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABIR5QAAAJCIKAcAAIBERDkAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABLpk3oA6E5+9vuV8dyyuqg4qDQ+fuzw1OOwC5//WVX8bnldTBhTGt/4+JGpx2EnPJa6h6U1G2LF2qYYVdI/Rpf2Tz0Ou2A/AXRvmWw2m009xN7U0NAQxcXFUV9fH0VFRanHoZt6afX6+P++85t4t/n/PVz69MrE41Mr4vAPFCecjG098sKquPLhP+yw/u1PHBXnjP9AgonYnsdS97C+aVNMm1UVldU1LWuTystixpTxUVxYkHAytmU/AeSu3elQh69DO2wfERER7zZn45y75ieaiJ3ZWZBHREz7WVXXDsIueSx1D9NmVcX8JbWt1uYvqY3LZy1KNBE7Yz8B5AdRDu/jZ79fuUNEbPVuczYefn5VF0/Eznz+fcL76odf7JpB2CWPpe5hac2GqKyuiS3bHUi3JZuNyuqaWFbbmGgytmU/AeQPUQ7v47lldW1un/9abZvb6Rq/W972fnpuqf2UmsdS97BibVOb25fXib1cYD8B5A9RDu9jwuiSNrdXHFTaRZPQlhNGtb2fJoyxn1LzWOoeRg4qbHP7qBInEssF9hNA/hDl8D4+cfyI6NMrs9NtfXplnDk6R3zzE0e1ud1Z2NPzWOoexpTtF5PKy6J3pvW+6p3JxKTyMmf3zhH2E0D+EOXQDo9PrdghJraeMZrc8e1dhPmu1ul6Hkvdw4wp46NibOsjFyrGlsaMKeMTTcTO2E8A+cEl0WA3PPz8qpj/Wq1rK+e4qx9+MZ5bWus65TnMY6l7WFbbGMvrGl3/OsfZTwC5Z3c6VJQDAABAJ3KdcgAAAOgGRDkAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABIR5QAAAJCIKAcAAIBERDkAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABLpFlF+1113xahRo2LfffeNE044IX7/+9+nHqnT3fT4H+PMO+bGzU+8knoU2jB38Zr41uxX49nqmtSj0Ab7Kff97Pcr44qfLYqHn1+VehTasLRmQzyzeE0sq21MPQpt8JwH9CT5+NqUyWaz2dRDtOVnP/tZXHjhhfHd7343TjjhhLjjjjvi4YcfjsWLF8fgwYPf9+cbGhqiuLg46uvro6ioqAsm3j1PvfxG/MuDC3dYv+fCY+NDhw9JMBE7s6KuMSbfNT/WNW1uWRtYWBCPT50Yw0sKE07Gtuyn3PfS6vXx/33nN/Fu8/976enTKxOPT62Iwz9QnHAytrW+aVNMm1UVldtE3qTyspgxZXwUFxYknIxtec4DepLu9tq0Ox2a8++U33777fGZz3wmPvWpT8Xhhx8e3/3ud6OwsDB+9KMfpR6tU+wsyCMiLn3g+S6ehLZs/0tPRMS6ps1xzl3zEk3EzthPuW/7II+IeLc5G+fcNT/RROzMtFlVMX9Jbau1+Utq4/JZixJNxM54zgN6knx+bcrpKN+0aVO88MILcdppp7Ws9erVK0477bR47rnndvozGzdujIaGhla3XHXT439sc7tD2XPD3MVrdvilZ6t1TZsdLpgj7Kfc97Pfr9whyLd6tznrUPYcsbRmQ1RW18SW7Q6k25LNRmV1TV4dLtidec4DepJ8f23K6Sivra2NLVu2xJAhrQ/jHjJkSLz55ps7/Zlbb701iouLW27Dhw/vilE75DdLa9vcPm+JF9RcULV6fZvbF65c1zWD0Cb7Kfc9t6yuze3zX2v7OZGusWJtU5vbl9d171988oXnPKAnyffXppyO8o6YPn161NfXt9xWrcrdd15OHFPa5vaJY8u6aBLactSB+7e5/egRA7tmENpkP+W+CaNL2txecVDbz4l0jZGD2v4s8qiS/l00CW3xnAf0JPn+2pTTUV5aWhq9e/eOt956q9X6W2+9FQcccMBOf6Zv375RVFTU6parvnzOEW1u//ePHt5Fk9CWkw8ZHAN3cfKIgYUFcVK5fzzJBfZT7vvE8SOiT6/MTrf16ZWJjx+bu0c29SRjyvaLSeVl0TvTel/1zmRiUnlZjC7t3r/45AvPeUBPku+vTTkd5fvss08cc8wxMXv27Ja15ubmmD17dkyYMCHhZJ3nnguP3a110nh86sQdfvnZeoZbcof9lPsen1qxQ5hvPfs6uWPGlPFRMbb1kQsVY0tjxpTxiSZiZzznAT1JPr82dYtLol100UXxve99L44//vi444474qGHHoo///nPO3zWfGdy/ZJoW938xCsxb0lNTBxb5h3yHPZsdU0sXLkujh4x0LsQOcx+yn0PP78q5r9WGxUHlXqHPIctq22M5XWNMaqkf7d/FyKfec4DepLu8tq0Ox2a81EeEXHnnXfGN77xjXjzzTfjqKOOim9/+9txwgkntOtnu0uUAwAAkB/yLsr3hCgHAACgK+1Oh+b0Z8oBAAAgn4lyAAAASESUAwAAQCKiHAAAABIR5QAAAJCIKAcAAIBERDkAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABIR5QAAAJCIKAcAAIBERDkAAAAkIsoBAAAgEVEOAAAAiYhyAAAASESUAwAAQCKiHAAAABLpk3qAvS2bzUZERENDQ+JJAAAA6Am29ufWHm1L3kf522+/HRERw4cPTzwJAAAAPcnbb78dxcXFbX5PJtuedO/Gmpub4/XXX48BAwZEJpNJPc4uNTQ0xPDhw2PVqlVRVFSUehx2wX7qHuyn3GcfdQ/2U/dgP+U++6h7sJ+6h+6yn7LZbLz99tsxbNiw6NWr7U+N5/075b169YoDDzww9RjtVlRUlNP/5+I99lP3YD/lPvuoe7Cfugf7KffZR92D/dQ9dIf99H7vkG/lRG8AAACQiCgHAACARER5jujbt29cf/310bdv39Sj0Ab7qXuwn3KffdQ92E/dg/2U++yj7sF+6h7ycT/l/YneAAAAIFd5pxwAAAASEeUAAACQiCgHAACAREQ5AAAAJCLKE6usrIyzzz47hg0bFplMJh599NHUI7ETt956axx33HExYMCAGDx4cEyePDkWL16ceiy2cffdd8e4ceOiqKgoioqKYsKECfGLX/wi9Vi8j9tuuy0ymUxcccUVqUdhGzfccENkMplWt0MPPTT1WGznL3/5S/zzP/9zlJSURL9+/eKDH/xgPP/886nHYhujRo3a4bGUyWRi6tSpqUdjG1u2bInrrrsuRo8eHf369YuDDjoovvKVr4TzYeeWt99+O6644ooYOXJk9OvXL0488cRYsGBB6rE6RZ/UA/R0jY2NceSRR8Yll1wS5557bupx2IW5c+fG1KlT47jjjot33303vvSlL8Xpp58er7zySvTv3z/1eETEgQceGLfddluUl5dHNpuN+++/P/7xH/8xFi1aFEcccUTq8diJBQsWxPe+970YN25c6lHYiSOOOCKefvrplq/79PErQy5Zt25dVFRUxKmnnhq/+MUvoqysLKqrq2PgwIGpR2MbCxYsiC1btrR8/fLLL8eHP/zh+PjHP55wKrb3ta99Le6+++64//7744gjjojnn38+PvWpT0VxcXFMmzYt9Xj8zac//el4+eWX48c//nEMGzYsHnzwwTjttNPilVdeiQ984AOpx9sjLomWQzKZTDzyyCMxefLk1KPwPmpqamLw4MExd+7cmDRpUupx2IVBgwbFN77xjbj00ktTj8J2NmzYEEcffXR85zvfiZtvvjmOOuqouOOOO1KPxd/ccMMN8eijj0ZVVVXqUdiFa6+9NubPnx/PPvts6lHYDVdccUU88cQTUV1dHZlMJvU4/M1HP/rRGDJkSNxzzz0ta+edd17069cvHnzwwYSTsdVf//rXGDBgQDz22GPxkY98pGX9mGOOibPOOituvvnmhNPtOYevQwfU19dHxHvRR+7ZsmVL/PSnP43GxsaYMGFC6nHYialTp8ZHPvKROO2001KPwi5UV1fHsGHDYsyYMXHBBRfEypUrU4/ENh5//PE49thj4+Mf/3gMHjw4xo8fHz/4wQ9Sj0UbNm3aFA8++GBccsklgjzHnHjiiTF79ux49dVXIyLixRdfjHnz5sVZZ52VeDK2evfdd2PLli2x7777tlrv169fzJs3L9FUncexaLCbmpub44orroiKior4u7/7u9TjsI2XXnopJkyYEO+8807st99+8cgjj8Thhx+eeiy289Of/jQWLlyYN58Dy0cnnHBC3HfffXHIIYfEG2+8ETfeeGOcdNJJ8fLLL8eAAQNSj0dELF26NO6+++646qqr4ktf+lIsWLAgpk2bFvvss09cdNFFqcdjJx599NFYv359XHzxxalHYTvXXnttNDQ0xKGHHhq9e/eOLVu2xC233BIXXHBB6tH4mwEDBsSECRPiK1/5Shx22GExZMiQmDVrVjz33HMxduzY1OPtMVEOu2nq1Knx8ssv58W/yuWbQw45JKqqqqK+vj7+8z//My666KKYO3euMM8hq1atin/7t3+LX/3qVzv8aze5Y9t3h8aNGxcnnHBCjBw5Mh566CEfB8kRzc3Nceyxx8ZXv/rViIgYP358vPzyy/Hd735XlOeoe+65J84666wYNmxY6lHYzkMPPRQ/+clPYubMmXHEEUdEVVVVXHHFFTFs2DCPpxzy4x//OC655JL4wAc+EL17946jjz46pkyZEi+88ELq0faYKIfdcNlll8UTTzwRlZWVceCBB6Yeh+3ss88+Lf9aeswxx8SCBQviW9/6Vnzve99LPBlbvfDCC7FmzZo4+uijW9a2bNkSlZWVceedd8bGjRujd+/eCSdkZ/bff/84+OCDY8mSJalH4W+GDh26wz84HnbYYfHzn/880US0ZcWKFfH000/Hf/3Xf6UehZ24+uqr49prr43zzz8/IiI++MEPxooVK+LWW28V5TnkoIMOirlz50ZjY2M0NDTE0KFD4xOf+ESMGTMm9Wh7zGfKoR2y2Wxcdtll8cgjj8Svf/3rGD16dOqRaIfm5ubYuHFj6jHYxoc+9KF46aWXoqqqquV27LHHxgUXXBBVVVWCPEdt2LAhXnvttRg6dGjqUfibioqKHS7N+eqrr8bIkSMTTURb7r333hg8eHCrE1SRO5qamqJXr9ZZ1Lt372hubk40EW3p379/DB06NNatWxdPPvlk/OM//mPqkfaYd8oT27BhQ6t3HpYtWxZVVVUxaNCgGDFiRMLJ2NbUqVNj5syZ8dhjj8WAAQPizTffjIiI4uLi6NevX+LpiIiYPn16nHXWWTFixIh4++23Y+bMmTFnzpx48sknU4/GNgYMGLDDuRj69+8fJSUlztGQQ77whS/E2WefHSNHjozXX389rr/++ujdu3dMmTIl9Wj8zZVXXhknnnhifPWrX41/+qd/it///vfx/e9/P77//e+nHo3tNDc3x7333hsXXXSRSwvmqLPPPjtuueWWGDFiRBxxxBGxaNGiuP322+OSSy5JPRrbePLJJyObzcYhhxwSS5YsiauvvjoOPfTQ+NSnPpV6tD3mmSGx559/Pk499dSWr6+66qqIiLjooovivvvuSzQV27v77rsjIuKUU05ptX7vvfc6YUuOWLNmTVx44YXxxhtvRHFxcYwbNy6efPLJ+PCHP5x6NOh2Vq9eHVOmTIm6urooKyuLiRMnxm9/+9soKytLPRp/c9xxx8UjjzwS06dPj5tuuilGjx4dd9xxhxNT5aCnn346Vq5cKfBy2IwZM+K6666L//W//lesWbMmhg0bFp/97Gfjy1/+curR2EZ9fX1Mnz49Vq9eHYMGDYrzzjsvbrnlligoKEg92h5znXIAAABIxGfKAQAAIBFRDgAAAImIcgAAAEhElAMAAEAiohwAAAASEeUAAACQiCgHAACAREQ5ALBXXHzxxTF58uTUYwBAThPlAAAAkIgoBwByUjabjXfffTf1GACwV4lyAMgBp5xySkybNi2uueaaGDRoUBxwwAFxww03RETE8uXLI5PJRFVVVcv3r1+/PjKZTMyZMyciIubMmROZTCaefPLJGD9+fPTr1y/+4R/+IdasWRO/+MUv4rDDDouioqL4n//zf0ZTU1O7Zmpubo6vf/3rMXbs2Ojbt2+MGDEibrnllpbtL730UvzDP/xD9OvXL0pKSuJf/uVfYsOGDbu8v40bN8a0adNi8ODBse+++8bEiRNjwYIFLdu3/h1+8YtfxDHHHBN9+/aNefPmxYsvvhinnnpqDBgwIIqKiuKYY46J559/vv3/cQEgh4lyAMgR999/f/Tv3z9+97vfxde//vW46aab4le/+tVu3ccNN9wQd955Z/zmN7+JVatWxT/90z/FHXfcETNnzoz/+3//bzz11FMxY8aMdt3X9OnT47bbbovrrrsuXnnllZg5c2YMGTIkIiIaGxvjjDPOiIEDB8aCBQvi4Ycfjqeffjouu+yyXd7fNddcEz//+c/j/vvvj4ULF8bYsWPjjDPOiLVr17b6vmuvvTZuu+22+NOf/hTjxo2LCy64IA488MBYsGBBvPDCC3HttddGQUHBbv13AYBc1Sf1AADAe8aNGxfXX399RESUl5fHnXfeGbNnz47y8vJ238fNN98cFRUVERFx6aWXxvTp0+O1116LMWPGRETExz72sXjmmWfii1/8Ypv38/bbb8e3vvWtuPPOO+Oiiy6KiIiDDjooJk6cGBERM2fOjHfeeSceeOCB6N+/f0RE3HnnnXH22WfH1772tZZ436qxsTHuvvvuuO++++Kss86KiIgf/OAH8atf/SruueeeuPrqq1u+96abbooPf/jDLV+vXLkyrr766jj00ENb/tsAQL7wTjkA5Ihx48a1+nro0KGxZs2aDt/HkCFDorCwsCXIt6615z7/9Kc/xcaNG+NDH/rQLrcfeeSRLUEeEVFRURHNzc2xePHiHb7/tddei82bN7f8g0FEREFBQRx//PHxpz/9qdX3Hnvssa2+vuqqq+LTn/50nHbaaXHbbbfFa6+99r7zA0B3IcoBIEdsf0h2JpOJ5ubm6NXrvZfrbDbbsm3z5s3vex+ZTGaX9/l++vXr1+65O9u2oR/x3iH5f/zjH+MjH/lI/PrXv47DDz88HnnkkUTTAUDnEuUAkOPKysoiIuKNN95oWdv2pG97Q3l5efTr1y9mz5690+2HHXZYvPjii9HY2NiyNn/+/OjVq1cccsghO3z/QQcdFPvss0/Mnz+/ZW3z5s2xYMGCOPzww993noMPPjiuvPLKeOqpp+Lcc8+Ne++9twN/KwDIPaIcAHJcv3794u///u9bTn42d+7c+Pd///e9+mfuu+++8cUvfjGuueaaeOCBB+K1116L3/72t3HPPfdERMQFF1wQ++67b1x00UXx8ssvxzPPPBOXX355fPKTn9zh8+QR7737/bnPfS6uvvrq+OUvfxmvvPJKfOYzn4mmpqa49NJLdznHX//617jssstizpw5sWLFipg/f34sWLAgDjvssL32dweAruREbwDQDfzoRz+KSy+9NI455pg45JBD4utf/3qcfvrpe/XPvO6666JPnz7x5S9/OV5//fUYOnRo/Ou//mtERBQWFsaTTz4Z//Zv/xbHHXdcFBYWxnnnnRe33377Lu/vtttui+bm5vjkJz8Zb7/9dhx77LHx5JNPxsCBA3f5M7179466urq48MIL46233orS0tI499xz48Ybb+z0vy8ApJDJbvsBNQAAAKDLOHwdAAAAEhHlANADrVy5Mvbbb79d3lauXJl6RADoERy+DgA90LvvvhvLly/f5fZRo0ZFnz5OPQMAe5soBwAAgEQcvg4AAACJiHIAAABIRJQDAABAIqIcAAAAEhHlAAAAkIgoBwAAgEREOQAAACQiygEAACCR/x+Bl/b5ZsvORQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Pandas built-in plot tools (which use Matplotlib under the hood)\n", + "# This is convenient, but doesn't give as much control as using the Matplotlib API\n", + "data.plot.scatter('num_colors', 'num_words');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Matplotlib API" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Solarize_Light2', '_classic_test_patch', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']\n" + ] + } + ], + "source": [ + "# There are many pre-defined styles... view the available options\n", + "print(mpl.style.available)\n", + "# or use the default style\n", + "plt.style.use('default')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# Matplotlib scatter plot doesn't have built-in jitter option...\n", + "# but it's not too hard\n", + "\n", + "def jitterify(arr, factor=0.01):\n", + " \"\"\"Add jitter 'factor' to 'arr' data\n", + " :param arr: array-like, eg: list, ndarray\n", + " :param factor: float, 0.0 -> 1.0\n", + " :return: arr with added jitter\n", + " \"\"\"\n", + " assert 0.0 <= factor <= 1.0, f\"Error, invalid factor {factor}\"\n", + " arr = np.array(arr)\n", + " assert arr.ndim == 1, f\"Expected 1-d array, got {arr.ndim}\"\n", + " ptp = arr.ptp()\n", + " jitter = np.random.randn(arr.size) * factor * ptp\n", + " return arr + jitter\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "code_folding": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, "output_type": "display_data" } ], "source": [ - "# Step 3 Visualize\n", - "data.plot.scatter('num_colors', 'num_words')" + "# Scatterplot docs:\n", + "# https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html\n", + "\n", + "fontsize = 12\n", + "markersize = 75\n", + "color = '#424242'\n", + "alpha = 0.4\n", + "jitter = 0.01\n", + "\n", + "# Instantiate plot objects\n", + "fig, ax = plt.subplots()\n", + "\n", + "# Add labels to axes\n", + "ax.set_xlabel('Number of colors on sweater', fontsize=fontsize, color='k')\n", + "ax.set_ylabel('Number of words\\nin sweater description', fontsize=fontsize, color='k')\n", + "# Add figure title\n", + "fig.suptitle(\"Relationship between the number of colors and\\nlength of description for ugly holiday sweaters\",\n", + " color='k', fontsize=fontsize + 2)\n", + "\n", + "# Specify what data to plot\n", + "x = data.num_colors\n", + "y = data.num_words\n", + "# Add jitter to data so completely overlapping\n", + "x = jitterify(x, jitter)\n", + "y = jitterify(y, jitter)\n", + "\n", + "# Plot\n", + "ax.scatter(x, y,\n", + " c=color,\n", + " s=markersize,\n", + " alpha=alpha,\n", + " edgecolors=color,\n", + " linewidths=1.\n", + " )\n", + "\n", + "# Add polyfit curve\n", + "coeffs = np.polyfit(data.num_colors, data.num_words, 1)\n", + "xlim = ax.get_xlim()\n", + "ax.plot(xlim, np.polyval(coeffs, xlim), color='k', alpha=0.9)\n", + "\n", + "# Set tick increment\n", + "incr_x = 2\n", + "incr_y = 5\n", + "ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(incr_x))\n", + "ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(incr_y))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Seaborn library\n", + "This library is designed with data-science and clean asthetics in mind... check it out!
\n", + "https://seaborn.pydata.org/" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -175,7 +1870,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -189,7 +1884,20 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "My cool stuff", + "title_sidebar": "My cool stuff", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true } }, "nbformat": 4,