{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Project work\n", "\n", "1. step:\n", "Kaggle registration: https://www.kaggle.com\n", "\n", "2. step:\n", "Join the competition: https://www.kaggle.com/t/b0fc1fc485b146a2887ab6ab8b71c2a8\n", "\n", "3. step:\n", "Api key generation (my account-> create new API token), and upload it to the cloud computer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!mkdir ~/.kaggle\n", "!mv kaggle.json ~/.kaggle/kaggle.json\n", "!sudo pip3 install kaggle\n", "!sudo pip3 install -U tensorflow-gpu" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Download data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "import kaggle\n", "\n", "kaggle.api.authenticate()\n", "\n", "!kaggle competitions download -c artificial-neural-networks-and-their-applications" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!mkdir project\n", "\n", "!mv *.tar.gz project/\n", "\n", "%cd project\n", "\n", "!ls .\n", "\n", "!tar -xzf train.tar.gz\n", "!tar -xzf test_images.tar.gz\n", "\n", "%cd ..\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the training data\n", "The name of the class is the same as the name of the subdir" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "from matplotlib.image import imread\n", "\n", "train_x = np.zeros((100000,64*64*3))\n", "train_y = np.array([])\n", "class_id = 0\n", "idx = 0\n", "class_mapping = {}\n", "for category in os.listdir(\"project/train\"):\n", " print(category)\n", " \n", " for img in os.listdir(\"project/train/\"+category+\"/images\"):\n", " image = imread(\"project/train/\"+category+\"/images/\"+img)\n", " image_n = np.float32(image/255.0) # normalize the image\n", " \n", " #handle grayscale images\n", " if len(image.shape) ==2:\n", " #print(img, image.shape)\n", " image_n = np.zeros((64,64,3))\n", " for i in range(3):\n", " image_n[:,:,i] = np.float32(image/255.0)\n", " #flatten the image\n", " train_x[idx,:] = np.reshape(image_n, [1,64*64*3])\n", " idx += 1\n", " train_y = np.append(train_y, class_id)\n", " class_mapping[class_id] = category\n", " class_id += 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(train_x.shape, train_y.shape)\n", "import tensorflow as tf\n", "#test if GPU is visiable\n", "tf.test.is_gpu_available()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train a simple ANN" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "\n", "\n", "# Parameters\n", "learning_rate = 0.01\n", "batch_size = 128\n", "num_steps = train_x.shape[0]/batch_size*10 #10 epoch\n", "display_step = 100\n", "\n", "# Network Parameters\n", "n_hidden_1 = 256 # 1st layer number of neurons\n", "n_hidden_2 = 256 # 2nd layer number of neurons\n", "num_input = 64*64*3 # MNIST data input (img shape: 28*28)\n", "num_classes = 200 # MNIST total classes (0-9 digits)\n", "\n", "# Define the input function for training\n", "input_fn = tf.estimator.inputs.numpy_input_fn(\n", " x={'images': train_x}, y=train_y,\n", " batch_size=batch_size, num_epochs=None, shuffle=True)\n", "\n", "\n", "# Define the neural network\n", "def neural_net(x_dict):\n", " # TF Estimator input is a dict, in case of multiple inputs\n", " x = x_dict['images']\n", " # Hidden fully connected layer with 256 neurons\n", " layer_1 = tf.layers.dense(x, n_hidden_1)\n", " # Hidden fully connected layer with 256 neurons\n", " layer_2 = tf.layers.dense(layer_1, n_hidden_2)\n", " # Output fully connected layer with a neuron for each class\n", " out_layer = tf.layers.dense(layer_2, num_classes)\n", " return out_layer\n", "\n", "# Define the model function (following TF Estimator Template)\n", "def model_fn(features, labels, mode):\n", " \n", " # Build the neural network\n", " logits = neural_net(features)\n", " \n", " # Predictions\n", " pred_classes = tf.argmax(logits, axis=1)\n", " pred_probas = tf.nn.softmax(logits)\n", " \n", " # If prediction mode, early return\n", " if mode == tf.estimator.ModeKeys.PREDICT:\n", " return tf.estimator.EstimatorSpec(mode, predictions=pred_classes) \n", " \n", " # Define loss and optimizer\n", " loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\n", " logits=logits, labels=tf.cast(labels, dtype=tf.int32)))\n", " optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", " train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())\n", " \n", " # Evaluate the accuracy of the model\n", " acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)\n", " \n", " # TF Estimators requires to return a EstimatorSpec, that specify\n", " # the different ops for training, evaluating, ...\n", " estim_specs = tf.estimator.EstimatorSpec(\n", " mode=mode,\n", " predictions=pred_classes,\n", " loss=loss_op,\n", " train_op=train_op,\n", " eval_metric_ops={'accuracy': acc_op})\n", "\n", " return estim_specs\n", "\n", "# Build the Estimator\n", "model = tf.estimator.Estimator(model_fn)\n", "\n", "# Train the Model\n", "model.train(input_fn, steps=num_steps)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the test data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_x = np.zeros((10000,64*64*3))\n", "test_filenames = []\n", "idx = 0\n", "for img in os.listdir(\"project/test_images\"):\n", " image = imread(\"project/test_images/\"+img)\n", " image_n = np.float32(image/255.0) # normalize the image\n", " test_filenames.append(img) \n", " #handle grayscale images\n", " if len(image.shape) ==2:\n", " #print(img, image.shape)\n", " image_n = np.zeros((64,64,3))\n", " for i in range(3):\n", " image_n[:,:,i] = np.float32(image/255.0)\n", " #flatten the image\n", " test_x[idx,:] = np.reshape(image_n, [1,64*64*3])\n", " idx += 1\n", "print(test_x.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generate submission" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "input_fn_test = tf.estimator.inputs.numpy_input_fn(\n", " x={'images': test_x},\n", " batch_size=batch_size, shuffle=False)\n", "\n", "preds = list(model.predict(input_fn_test))\n", "\n", "f = open('submission.csv', 'w')\n", "f.write('Id,Category\\n') # write header\n", "for idx, pred_label in enumerate(preds):\n", " f.write('%s,%s\\n' % (test_filenames[idx], class_mapping[pred_label]))\n", " \n", "f.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Don't forget to upload the submission.csv!**" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 2 }