#!/usr/bin/python3.6 import math import numpy as np import h5py import matplotlib.pyplot as plt #import tensorflow.compat.v1 as tf import tensorflow as tf from tensorflow.python.framework import ops from tf_utils import load_dataset_rb_dots, random_mini_batches, convert_to_one_hot, predict #eager execution #tf.compat.v1.enable_eager_execution() TEST_CASE = True #TEST_CASE = False #NUM_EPOCH = 3000 NUM_EPOCH = 10000 #NUM_EPOCH = 90000 np.random.seed(1) ######################################## ### This pgm copied from course 2 week2 pgm => course2/week2/test_cr2_wk2.py ### We are going to write same pgm with tensorflow functions now ### We implement it for batch gd only (not other ones) ######################################## # GRADED FUNCTION: create_placeholders def create_placeholders(n_x, n_y): """ Creates the placeholders for the tensorflow session. Arguments: n_x -- scalar, size of an image vector (num_px * num_px = 64 * 64 * 3 = 12288) n_y -- scalar, number of classes (from 0 to 5, so -> 6) Returns: X -- placeholder for the data input, of shape [n_x, None] and dtype "tf.float32" Y -- placeholder for the input labels, of shape [n_y, None] and dtype "tf.float32" Tips: - You will use None because it let's us be flexible on the number of examples you will for the placeholders. In fact, the number of examples during test/train is different. """ ### START CODE HERE ### (approx. 2 lines) X = tf.placeholder(tf.float32,shape=[n_x,None]) Y = tf.placeholder(tf.float32,shape=[n_y,None]) ### END CODE HERE ### return X, Y if (TEST_CASE): X, Y = create_placeholders(12288, 6) print ("X = " + str(X)) print ("Y = " + str(Y)) # GRADED FUNCTION: initialize_parameters def initialize_parameters(layer_dims): """ Initializes parameters to build a neural network with tensorflow. The shapes are: W1 : [25, 12288] b1 : [25, 1] W2 : [12, 25] b2 : [12, 1] W3 : [6, 12] b3 : [6, 1] Returns: parameters -- a dictionary of tensors containing W1, b1, W2, b2, W3, b3 """ tf.set_random_seed(1) # so that your "random" numbers match ours ### START CODE HERE ### (approx. 6 lines of code) W1 = tf.get_variable("W1", [layer_dims[1],layer_dims[0]], initializer = tf.contrib.layers.xavier_initializer(seed = 1)) #W1 = tf.get_variable("W1", [layer_dims[1],layer_dims[0]], initializer = tf.initializers.glorot_uniform(seed = 1)) b1 = tf.get_variable("b1", [layer_dims[1],1], initializer = tf.zeros_initializer()) W2 = tf.get_variable("W2", [layer_dims[2],layer_dims[1]], initializer = tf.contrib.layers.xavier_initializer(seed = 1)) b2 = tf.get_variable("b2", [layer_dims[2],1], initializer = tf.zeros_initializer()) W3 = tf.get_variable("W3", [layer_dims[3],layer_dims[2]], initializer = tf.contrib.layers.xavier_initializer(seed = 1)) b3 = tf.get_variable("b3", [layer_dims[3],1], initializer = tf.zeros_initializer()) ### END CODE HERE ### parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2, "W3": W3, "b3": b3} return parameters if (TEST_CASE): tf.reset_default_graph() with tf.Session() as sess: parameters = initialize_parameters([12288,25,12,6]) print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"])) print("b2 = " + str(parameters["b2"])) # GRADED FUNCTION: forward_propagation def forward_propagation(X, parameters): """ Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX Arguments: X -- input dataset placeholder, of shape (input size, number of examples) parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3" the shapes are given in initialize_parameters Returns: Z3 -- the output of the last LINEAR unit """ # Retrieve the parameters from the dictionary "parameters" #KA: These W1,b1, etc are Tensors, and NOT numpy arrays W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] ### START CODE HERE ### (approx. 5 lines) # Numpy Equivalents: Z1 = tf.add(tf.matmul(W1,X),b1) # Z1 = np.dot(W1, X) + b1 A1 = tf.nn.relu(Z1) # A1 = relu(Z1) Z2 = tf.add(tf.matmul(W2,A1),b2) # Z2 = np.dot(W2, A1) + b2 A2 = tf.nn.relu(Z2) # A2 = relu(Z2) Z3 = tf.add(tf.matmul(W3,A2),b3) # Z3 = np.dot(W3, A2) + b3 ### END CODE HERE ### return Z3 if (TEST_CASE): tf.reset_default_graph() with tf.Session() as sess: X, Y = create_placeholders(12288, 6) parameters = initialize_parameters([12288,25,12,6]) Z3 = forward_propagation(X, parameters) print("Z3 = " + str(Z3)) # GRADED FUNCTION: compute_cost def compute_cost(Z3, Y): """ Computes the cost Arguments: Z3 -- output of forward propagation (output of the last LINEAR unit), of shape (6, number of examples) Y -- "true" labels vector placeholder, same shape as Z3 Returns: cost - Tensor of the cost function """ # to fit the tensorflow requirement for tf.nn.softmax_cross_entropy_with_logits(...,...) logits = tf.transpose(Z3) labels = tf.transpose(Y) ### START CODE HERE ### (1 line of code) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels)) ### END CODE HERE ### return cost if (TEST_CASE): tf.reset_default_graph() with tf.Session() as sess: X, Y = create_placeholders(12288, 6) parameters = initialize_parameters([12288,25,12,6]) Z3 = forward_propagation(X, parameters) cost = compute_cost(Z3, Y) print("cost = " + str(cost)) def model(X_train, Y_train, layers_dims, optimizer, learning_rate = 0.0007, mini_batch_size = 64, beta = 0.9, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8, num_epochs = 10000, print_cost = True): """ 3-layer neural network model which can be run in different optimizer modes. Arguments: X -- input data, of shape (2, number of examples) Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (1, number of examples) layers_dims -- python list, containing the size of each layer learning_rate -- the learning rate, scalar. mini_batch_size -- the size of a mini batch beta -- Momentum hyperparameter beta1 -- Exponential decay hyperparameter for the past gradients estimates beta2 -- Exponential decay hyperparameter for the past squared gradients estimates epsilon -- hyperparameter preventing division by zero in Adam updates num_epochs -- number of epochs print_cost -- True to print the cost every 1000 epochs Returns: parameters -- python dictionary containing your updated parameters """ L = len(layers_dims) # number of layers in the neural networks costs = [] # to keep track of the cost t = 0 # initializing the counter required for Adam update seed = 10 # For grading purposes, so that your "random" minibatches are the same as ours m = X_train.shape[1] # number of training examples ########## from test_cr2_wk3.py ########### ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables tf.set_random_seed(1) # to keep consistent results seed = 3 # to keep consistent results (n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set) n_y = Y_train.shape[0] # n_y : output size costs = [] # To keep track of the cost print("n_x = ",n_x," n_y = ", n_y, "m = ",m) # Create Placeholders of shape (n_x, n_y) ### START CODE HERE ### (1 line) X, Y = create_placeholders(n_x, n_y) ### END CODE HERE ### # Initialize parameters ### START CODE HERE ### (1 line) parameters = initialize_parameters(layers_dims) ### END CODE HERE ### # Forward propagation: Build the forward propagation in the tensorflow graph ### START CODE HERE ### (1 line) Z3 = forward_propagation(X, parameters) ### END CODE HERE ### # Cost function: Add cost function to tensorflow graph ### START CODE HERE ### (1 line) cost = compute_cost(Z3, Y) ### END CODE HERE ### # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer. ### START CODE HERE ### (1 line) optimizer_tf = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost) #optimizer_tf = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost) ### END CODE HERE ### # Initialize all the variables init = tf.global_variables_initializer() #### end of test_cr2_wk3.py ########### #init param already done above # Initialize parameters #parameters = initialize_parameters(layers_dims) #init param for all algo done above. dW, db and any other needed param are all init by tf optimizer # Initialize the optimizer if optimizer == "gd": pass # no initialization required for gradient descent elif optimizer == "momentum": v = initialize_velocity(parameters) elif optimizer == "adam": v, s = initialize_adam(parameters) # Start the session to compute the tensorflow graph with tf.Session() as sess: # Run the initialization sess.run(init) # Optimization loop for i in range(num_epochs): # Define the random minibatches. We increment the seed to reshuffle differently the dataset after each epoch seed = seed + 1 minibatches = random_mini_batches(X_train, Y_train, mini_batch_size, seed) cost_total = 0 for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch # Forward propagation #a3, caches = forward_propagation(minibatch_X, parameters) # Compute cost and add to the cost total #cost_total += compute_cost(a3, minibatch_Y) # Backward propagation #grads = backward_propagation(minibatch_X, minibatch_Y, caches) # Update parameters #if optimizer == "gd": #parameters = update_parameters_with_gd(parameters, grads, learning_rate) #elif optimizer == "momentum": #parameters, v = update_parameters_with_momentum(parameters, grads, v, beta, learning_rate) #elif optimizer == "adam": #t = t + 1 # Adam counter #parameters, v, s = update_parameters_with_adam(parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon) #### Above code for fwd prop, compute_cost, back_prop and update_para is replaced by single line in tf ### ### START CODE HERE ### (1 line) #para = sess.run(parameters) #print("tmp_W1 = " + str(para["W1"])) _ , minibatch_cost = sess.run([optimizer_tf, cost], feed_dict={X: minibatch_X, Y: minibatch_Y}) ### END CODE HERE ### #print("cost tot = ",minibatch_cost) cost_total += minibatch_cost / mini_batch_size #Below cost avg done in test_cr2_wk2.py. That was needed since no /m was done there #But here, we already did a /mini_batch_size for each minibatch, so cost_total is already /m #So, no need to cost avg again. #cost_avg = cost_total / m # Print the cost every 1000 epoch if print_cost and i % 1000 == 0: print ("Cost after epoch %i: %f" %(i, cost_total)) #parameters = sess.run(parameters) #print("W1 = " + str(parameters["W1"])) #print("b1 = " + str(parameters["b1"])) #print("W2 = " + str(parameters["W2"])) #print("b2 = " + str(parameters["b2"])) #print("W3 = " + str(parameters["W3"])) #print("b3 = " + str(parameters["b3"])) if print_cost and i % 100 == 0: costs.append(cost_total) # plot the cost plt.plot(costs) plt.ylabel('cost') plt.xlabel('epochs (per 100)') plt.title("Learning rate = " + str(learning_rate)) plt.show() #### below 3 lines added from test_cr2_wk3.py # lets save the parameters in a variable parameters = sess.run(parameters) print ("Parameters have been trained!") # Calculate the correct predictions correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y)) # Calculate accuracy on the test set accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train})) #print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test})) return parameters ######################################## ########## Main pgm #################### ####################################### train_X, train_Y_orig = load_dataset_rb_dots() train_Y = convert_to_one_hot(train_Y_orig, 2) print ("number of training examples = " + str(train_X.shape[1])) print ("X_train shape: " + str(train_X.shape)) print ("Y_train shape: " + str(train_Y.shape)) ### Mini batch gd ##### print("\n Running mini batch gd \n") # train 3-layer model layers_dims = [train_X.shape[0], 25, 12, 2] parameters = model(train_X, train_Y, layers_dims, optimizer = "gd", num_epochs=NUM_EPOCH) # Predict => Prediction already done within model() #predictions = predict(train_X, train_Y, parameters) # Plot decision boundary => Doesn't work as predict, predict_dec error out. Don't need anyway, as we got our accuracy numbers from model() above. plt.title("Model with Gradient Descent optimization") axes = plt.gca() axes.set_xlim([-1.5,2.5]) axes.set_ylim([-1,1.5]) #plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y) #plot_decision_boundary(lambda x: predict(parameters, x.T), train_X, train_Y)