From d4698935cdc9862d0fd19e963884174478c3d2db Mon Sep 17 00:00:00 2001 From: Deepesh Garg Date: Thu, 23 Jul 2020 04:03:50 +0530 Subject: [PATCH] Added The File For Splitting The Data Into Training And Test Sets ! --- .../Splitting _Data.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 project_2_image_captioning_project/Splitting _Data.py diff --git a/project_2_image_captioning_project/Splitting _Data.py b/project_2_image_captioning_project/Splitting _Data.py new file mode 100644 index 0000000..8be8a78 --- /dev/null +++ b/project_2_image_captioning_project/Splitting _Data.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import os +import sys +from pycocotools.coco import COCO +from sklearn.model_selection import train_test_split +ROOT_DIR = os.getcwd() +sys.path.append('/opt/cocoapi/PythonAPI') +sys.path.append(os.path.join(ROOT_DIR, "samples/coco/")) # To find local version +import coco + +dataset = coco.CocoDataset() + + +(x_train, y_train), (x_test, y_test)= dataset.load_coco() +x = np.concatenate((x_train, x_test)) +y = np.concatenate((y_train, y_test)) + +train= 0.8 +x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = train, random_seed=2019) +