From 4d314835ea0fe2f7f55cb5b750243d73f5aee4ef Mon Sep 17 00:00:00 2001 From: George Skoumas Date: Thu, 22 Jul 2021 12:27:49 +0300 Subject: [PATCH 1/3] notebook placeholdrs --- .../pytorch_tiledb_cloud_ml_model_array.ipynb | 36 +++++++++++++++++++ .../sklearn_tiledb_cloud_ml_model_array.ipynb | 36 +++++++++++++++++++ ...ow_keras_tiledb_cloud_ml_model_array.ipynb | 36 +++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb create mode 100644 machine_learning/models/sklearn_tiledb_cloud_ml_model_array.ipynb create mode 100644 machine_learning/models/tensorflow_keras_tiledb_cloud_ml_model_array.ipynb diff --git a/machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb b/machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb new file mode 100644 index 0000000..f574694 --- /dev/null +++ b/machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb @@ -0,0 +1,36 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/machine_learning/models/sklearn_tiledb_cloud_ml_model_array.ipynb b/machine_learning/models/sklearn_tiledb_cloud_ml_model_array.ipynb new file mode 100644 index 0000000..f574694 --- /dev/null +++ b/machine_learning/models/sklearn_tiledb_cloud_ml_model_array.ipynb @@ -0,0 +1,36 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/machine_learning/models/tensorflow_keras_tiledb_cloud_ml_model_array.ipynb b/machine_learning/models/tensorflow_keras_tiledb_cloud_ml_model_array.ipynb new file mode 100644 index 0000000..f574694 --- /dev/null +++ b/machine_learning/models/tensorflow_keras_tiledb_cloud_ml_model_array.ipynb @@ -0,0 +1,36 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 48a80b55be95aa550814677dc77aa749761426b9 Mon Sep 17 00:00:00 2001 From: George Skoumas Date: Thu, 22 Jul 2021 12:33:25 +0300 Subject: [PATCH 2/3] ml model cloud examples placeholders --- ...l_array.ipynb => tensorflow_tiledb_cloud_ml_model_array.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename machine_learning/models/{tensorflow_keras_tiledb_cloud_ml_model_array.ipynb => tensorflow_tiledb_cloud_ml_model_array.ipynb} (100%) diff --git a/machine_learning/models/tensorflow_keras_tiledb_cloud_ml_model_array.ipynb b/machine_learning/models/tensorflow_tiledb_cloud_ml_model_array.ipynb similarity index 100% rename from machine_learning/models/tensorflow_keras_tiledb_cloud_ml_model_array.ipynb rename to machine_learning/models/tensorflow_tiledb_cloud_ml_model_array.ipynb From ab9b05f325fc758b9796c58e959caf71166f0a53 Mon Sep 17 00:00:00 2001 From: George Skoumas Date: Thu, 22 Jul 2021 12:42:02 +0300 Subject: [PATCH 3/3] added notebooks --- .../pytorch_tiledb_cloud_ml_model_array.ipynb | 350 +++++++++++++++++- .../sklearn_tiledb_cloud_ml_model_array.ipynb | 210 ++++++++++- ...nsorflow_tiledb_cloud_ml_model_array.ipynb | 214 ++++++++++- 3 files changed, 762 insertions(+), 12 deletions(-) diff --git a/machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb b/machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb index f574694..42af3de 100644 --- a/machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb +++ b/machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb @@ -1,15 +1,357 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "In this notebook, we show how we can train a model with PyTorch and save it as a TileDB array on TileDB-Cloud.\n", + "Firstly, let's import what we need and define some variables needed for training a model." + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", "execution_count": null, + "outputs": [], + "source": [ + "import tiledb.cloud\n", + "import os\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import torchvision\n", + "\n", + "from tiledb.ml.models.pytorch import PyTorchTileDBModel\n", + "\n", + "epochs = 1\n", + "batch_size_train = 128\n", + "batch_size_test = 1000\n", + "learning_rate = 0.01\n", + "momentum = 0.5\n", + "log_interval = 10\n", + "\n", + "# Set random seeds for anything using random number generation\n", + "random_seed = 1\n", + "\n", + "# Disable nondeterministic algorithms\n", + "torch.backends.cudnn.enabled = False\n", + "torch.manual_seed(random_seed)" + ], "metadata": { - "collapsed": true - }, + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We then have to export and load our TileDB-Cloud credentials. For TileDB cloud you can also use a token.\n", + "You have to also set up your AWS credentials on your TileDB-Cloud account." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# This is also our namespace on TileDB-Cloud.\n", + "TILEDB_USER_NAME = os.environ.get('TILEDB_USER_NAME')\n", + "TILEDB_PASSWD = os.environ.get('TILEDB_PASSWD')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We then create a TileDB-Cloud context and set up our communication with TileDB-Cloud." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "ctx = tiledb.cloud.Ctx()\n", + "tiledb.cloud.login(username=TILEDB_USER_NAME, password=TILEDB_PASSWD)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We will also need the DataLoaders API for the dataset. We will also employ TorchVision which let's as load the MNIST\n", + "dataset in a handy way. We'll use a batch_size of 64 for training while the values 0.1307 and 0.3081 used for\n", + "the Normalize() transformation below are the global mean and standard deviation of the MNIST dataset,\n", + "we'll take them as a given here." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import logging\n", + "logging.getLogger(\"lightning\").setLevel(logging.ERROR)\n", + "\n", + "train_loader = torch.utils.data.DataLoader(\n", + " torchvision.datasets.MNIST('', train=True, download=True,\n", + " transform=torchvision.transforms.Compose([\n", + " torchvision.transforms.ToTensor(),\n", + " torchvision.transforms.Normalize(\n", + " (0.1307,), (0.3081,))\n", + " ])),\n", + " batch_size=batch_size_train, shuffle=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Moving on, we build our network. We'll use two 2-D convolutional layers followed by two fully-connected\n", + "layers. As activation function we'll choose ReLUs and as a means of regularization we'll use two dropout layers." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(1, 10, kernel_size=5)\n", + " self.conv2 = nn.Conv2d(10, 20, kernel_size=5)\n", + " self.conv2_drop = nn.Dropout2d()\n", + " self.fc1 = nn.Linear(320, 50)\n", + " self.fc2 = nn.Linear(50, 10)\n", + "\n", + " def forward(self, x):\n", + " x = F.relu(F.max_pool2d(self.conv1(x), 2))\n", + " x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))\n", + " x = x.view(-1, 320)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.dropout(x, training=self.training)\n", + " x = self.fc2(x)\n", + " return F.log_softmax(x, dim = 1)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We will now initialise our Neural Network and optimizer." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "model = Net()\n", + "optimizer = optim.SGD(model.parameters(), lr=learning_rate,\n", + " momentum=momentum)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We continue with the training loop and we iterate over all training data once per epoch. Loading the individual batches\n", + "is handled by the DataLoader. We need to set the gradients to zero using optimizer.zero_grad() since PyTorch by default\n", + "accumulates gradients. We then produce the output of the network (forward pass) and compute a negative log-likelihodd\n", + "loss between the output and the ground truth label. The backward() call we now collect a new set of gradients which we\n", + "propagate back into each of the network's parameters using optimizer.step()." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "train_losses = []\n", + "train_counter = []\n", + "\n", + "def train(epoch):\n", + " model.train()\n", + " for batch_idx, (data, target) in enumerate(train_loader):\n", + " optimizer.zero_grad()\n", + " output = model(data)\n", + " loss = F.nll_loss(output, target)\n", + " loss.backward()\n", + " optimizer.step()\n", + " if batch_idx % log_interval == 0:\n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", + " epoch, batch_idx * len(data), len(train_loader.dataset),\n", + " 100. * batch_idx / len(train_loader), loss.item()))\n", + " train_losses.append(loss.item())\n", + " train_counter.append(\n", + " (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))\n", + "\n", + "for epoch in range(1, epochs + 1):\n", + " train(epoch)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We can move on by defining a TileDBPyTorch model and use model save functionality in order to save it directly to\n", + "our bucket on S3 (defined with AWS credentials in your TileDB-Cloud account) and register it on TileDB-Cloud." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, "outputs": [], "source": [ - "" - ] + "# Define array model uri.\n", + "uri = \"tiledb-pytorch-model\"\n", + "\n", + "print('Defining PyTorchTileDBModel model...')\n", + "# In order to save our model on S3 and register it on TileDB-Cloud we have to pass our Namespace and TileDB Context.\n", + "tiledb_model = PyTorchTileDBModel(uri=uri, namespace=TILEDB_USER_NAME, ctx=ctx, model=model)\n", + "\n", + "# We will need the uri that was created from our model class\n", + "# (and follows pattern tiledb://my_username/s3://my_bucket/my_array),\n", + "# in order to interact with our model on TileDB-Cloud.\n", + "tiledb_cloud_model_uri = tiledb_model.uri\n", + "\n", + "print('Saving model on S3 and registering on TileDB-Cloud...')\n", + "tiledb_model.save(meta={'epochs': epochs,\n", + " 'train_loss': train_losses})\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Finally, we can use TileDB-Cloud API as described in our [cloud documentation](https://docs.tiledb.com/cloud/), in order\n", + "to list our models, get information and deregister them." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# List all our models. Here, we filter with file_type = 'ml_model'. All machine learning model TileDB arrays are of type\n", + "# 'ml_model'\n", + "print(\n", + "tiledb.cloud.client.list_arrays(\n", + " file_type=['ml_model'],\n", + " namespace=TILEDB_USER_NAME))\n", + "\n", + "# Get model's info\n", + "print(tiledb.cloud.array.info(tiledb_cloud_model_uri))\n", + "\n", + "# Load our model for inference\n", + "# Place holder for the loaded model\n", + "loaded_model = Net()\n", + "loaded_optimizer = optim.SGD(model.parameters(), lr=learning_rate,\n", + " momentum=momentum)\n", + "\n", + "PyTorchTileDBModel(uri=tiledb_cloud_model_uri, ctx=ctx).load(model=loaded_model, optimizer=loaded_optimizer)\n", + "\n", + "\n", + "# Check model parameters\n", + "for key_item_1, key_item_2 in zip(\n", + " model.state_dict().items(), loaded_model.state_dict().items()\n", + "):\n", + " print(torch.equal(key_item_1[1], key_item_2[1]))\n", + "\n", + "# Check optimizer parameters\n", + "for key_item_1, key_item_2 in zip(\n", + " optimizer.state_dict().items(), loaded_optimizer.state_dict().items()\n", + "):\n", + " print(all([a == b for a, b in zip(key_item_1[1], key_item_2[1])]))\n", + "\n", + "# Deregister model\n", + "tiledb.cloud.deregister_array(tiledb_cloud_model_uri)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } } ], "metadata": { diff --git a/machine_learning/models/sklearn_tiledb_cloud_ml_model_array.ipynb b/machine_learning/models/sklearn_tiledb_cloud_ml_model_array.ipynb index f574694..fa1afaf 100644 --- a/machine_learning/models/sklearn_tiledb_cloud_ml_model_array.ipynb +++ b/machine_learning/models/sklearn_tiledb_cloud_ml_model_array.ipynb @@ -1,15 +1,217 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "In this notebook, we show how we can train a model with Scikit-learn and save it as a TileDB array on TileDB-Cloud.\n", + "Firstly, let's import what we need." + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", "execution_count": null, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tiledb.cloud\n", + "import os\n", + "\n", + "from sklearn import preprocessing\n", + "from sklearn.linear_model import LogisticRegression\n", + "from tiledb.ml.models.sklearn import SklearnTileDBModel" + ], "metadata": { - "collapsed": true - }, + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We then have to export and load our TileDB-Cloud credentials. For TileDB cloud you can also use a token.\n", + "You have to also set up your AWS credentials on your TileDB-Cloud account." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# This is also our namespace on TileDB-Cloud.\n", + "TILEDB_USER_NAME = os.environ.get('TILEDB_USER_NAME')\n", + "TILEDB_PASSWD = os.environ.get('TILEDB_PASSWD')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We then create a TileDB-Cloud context and set up our communication with TileDB-Cloud." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "ctx = tiledb.cloud.Ctx()\n", + "tiledb.cloud.login(username=TILEDB_USER_NAME, password=TILEDB_PASSWD)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "And move on with training a sklearn model with some random data." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, "outputs": [], "source": [ - "" - ] + "X_train = np.random.random((1000, 784))\n", + "y_train = np.random.randint(9, size=1000)\n", + "\n", + "X_test = np.random.random((500, 784))\n", + "y_test = np.random.randint(9, size=500)\n", + "\n", + "scaler = preprocessing.StandardScaler().fit(X_train)\n", + "\n", + "scaled_X_train = scaler.transform(X_train)\n", + "scaled_X_test = scaler.transform(X_test)\n", + "\n", + "print(\"Model fit...\")\n", + "model = LogisticRegression(random_state=0).fit(scaled_X_train, y_train)\n", + "\n", + "print(\"Model score...\")\n", + "sparsity = np.mean(model.coef_ == 0) * 100\n", + "score = model.score(scaled_X_test, y_test)\n", + "\n", + "print(\"Sparsity with L1 penalty: %.2f%%\" % sparsity)\n", + "print(\"Test score with L1 penalty: %.4f\" % score)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We can move on by defining a TileDB Sklearn model and use model save functionality in order to save it directly to\n", + "our bucket on S3 (defined with AWS credentials in your TileDB-Cloud account) and register it on TileDB-Cloud." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Define array model uri.\n", + "uri = \"tiledb-sklearn-model\"\n", + "\n", + "print('Defining SklearnTileDBModel model...')\n", + "# In order to save our model on S3 and register it on TileDB-Cloud we have to pass our Namespace and TileDB Context.\n", + "tiledb_model = SklearnTileDBModel(uri=uri, namespace=TILEDB_USER_NAME, ctx=ctx, model=model)\n", + "\n", + "print(tiledb_model.uri)\n", + "\n", + "# We will need the uri that was created from our model class\n", + "# (and follows pattern tiledb://my_username/s3://my_bucket/my_array),\n", + "# in order to interact with our model on TileDB-Cloud.\n", + "tiledb_cloud_model_uri = tiledb_model.uri\n", + "\n", + "print('Saving model on S3 and registering on TileDB-Cloud...')\n", + "tiledb_model.save(meta={\"Sparsity_with_L1_penalty\": sparsity, \"score\": score})\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Finally, we can use TileDB-Cloud API as described in our [cloud documentation](https://docs.tiledb.com/cloud/), in order\n", + "to list our models, get information and deregister them." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# List all our models. Here, we filter with file_type = 'ml_model'. All machine learning model TileDB arrays are of type\n", + "# 'ml_model'\n", + "print(\n", + "tiledb.cloud.client.list_arrays(\n", + " file_type=['ml_model'],\n", + " namespace=TILEDB_USER_NAME))\n", + "\n", + "# Get model's info\n", + "print(tiledb.cloud.array.info(tiledb_cloud_model_uri))\n", + "\n", + "# Load our model for inference\n", + "loaded_tiledb_model = SklearnTileDBModel(uri=tiledb_cloud_model_uri, ctx=ctx).load()\n", + "\n", + "print(score == loaded_tiledb_model.score(X_test, y_test))\n", + "\n", + "# Deregister model\n", + "tiledb.cloud.deregister_array(tiledb_cloud_model_uri)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } } ], "metadata": { diff --git a/machine_learning/models/tensorflow_tiledb_cloud_ml_model_array.ipynb b/machine_learning/models/tensorflow_tiledb_cloud_ml_model_array.ipynb index f574694..a57f54c 100644 --- a/machine_learning/models/tensorflow_tiledb_cloud_ml_model_array.ipynb +++ b/machine_learning/models/tensorflow_tiledb_cloud_ml_model_array.ipynb @@ -1,15 +1,221 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "In this notebook, we show how we can train a model with Tensorflow and save it as a TileDB array on TileDB-Cloud.\n", + "Firstly, let's import what we need." + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", "execution_count": null, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "import tiledb.cloud\n", + "import os\n", + "\n", + "from tiledb.ml.models.tensorflow_keras import TensorflowKerasTileDBModel" + ], "metadata": { - "collapsed": true - }, + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We then have to export and load our TileDB-Cloud credentials. For TileDB cloud you can also use a token.\n", + "You have to also set up your AWS credentials on your TileDB-Cloud account." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# This is also our namespace on TileDB-Cloud.\n", + "TILEDB_USER_NAME = os.environ.get('TILEDB_USER_NAME')\n", + "TILEDB_PASSWD = os.environ.get('TILEDB_PASSWD')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We then create a TileDB-Cloud context and set up our communication with TileDB-Cloud." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, "outputs": [], "source": [ - "" - ] + "ctx = tiledb.cloud.Ctx()\n", + "tiledb.cloud.login(username=TILEDB_USER_NAME, password=TILEDB_PASSWD)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "And move on with training a Tensorflow model with some random data." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "X_train = np.random.random((1000, 784))\n", + "y_train = np.random.randint(9, size=1000)\n", + "\n", + "X_test = np.random.random((500, 784))\n", + "y_test = np.random.randint(9, size=500)\n", + "\n", + "# Create a model\n", + "model = tf.keras.models.Sequential([\n", + " tf.keras.layers.Flatten(input_shape=(784,)),\n", + " tf.keras.layers.Dense(128, activation='relu'),\n", + " tf.keras.layers.Dropout(0.2),\n", + " tf.keras.layers.Dense(10)\n", + " ])\n", + "\n", + "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", + "\n", + "model.compile(optimizer='adam',\n", + " loss=loss_fn,\n", + " metrics=['accuracy'])\n", + "\n", + "\n", + "print(\"Model fit...\")\n", + "model.fit(X_train, y_train, epochs=2)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We can move on by defining a TileDBTensorflow model and use model save functionality in order to save it directly to\n", + "our bucket on S3 (defined with AWS credentials in your TileDB-Cloud account) and register it on TileDB-Cloud." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Define array model uri.\n", + "uri = \"tiledb-tensorflow-model\"\n", + "\n", + "print('Defining TensorflowKerasTileDBModel model...')\n", + "# In order to save our model on S3 and register it on TileDB-Cloud we have to pass our Namespace and TileDB Context.\n", + "tiledb_model = TensorflowKerasTileDBModel(uri=uri, namespace=TILEDB_USER_NAME, ctx=ctx, model=model)\n", + "\n", + "print(tiledb_model.uri)\n", + "\n", + "# We will need the uri that was created from our model class\n", + "# (and follows pattern tiledb://my_username/s3://my_bucket/my_array),\n", + "# in order to interact with our model on TileDB-Cloud.\n", + "tiledb_cloud_model_uri = tiledb_model.uri\n", + "\n", + "print('Saving model on S3 and registering on TileDB-Cloud...')\n", + "tiledb_model.save()\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Finally, we can use TileDB-Cloud API as described in our [cloud documentation](https://docs.tiledb.com/cloud/), in order\n", + "to list our models, get information and deregister them." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# List all our models. Here, we filter with file_type = 'ml_model'. All machine learning model TileDB arrays are of type\n", + "# 'ml_model'\n", + "print(\n", + "tiledb.cloud.client.list_arrays(\n", + " file_type=['ml_model'],\n", + " namespace=TILEDB_USER_NAME))\n", + "\n", + "# Get model's info\n", + "print(tiledb.cloud.array.info(tiledb_cloud_model_uri))\n", + "\n", + "# Load our model for inference\n", + "loaded_model = TensorflowKerasTileDBModel(uri=tiledb_cloud_model_uri, ctx=ctx).load()\n", + "\n", + "print(np.equal(model.predict(X_test), loaded_model.predict(X_test)))\n", + "\n", + "# Deregister model\n", + "tiledb.cloud.deregister_array(tiledb_cloud_model_uri)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } } ], "metadata": {