From 245b8b9edf53093e44cc5cd875904de556d10474 Mon Sep 17 00:00:00 2001 From: Petra Date: Wed, 15 Jan 2020 14:48:23 +0100 Subject: [PATCH] Added a basic ANN example 8_neural_nets-perceptron.py --- 8_neural_nets-perceptron.py | 83 +++++++++++++++++++++++ 9_neural_nets-4-time_series.py | 117 --------------------------------- fix_random_seed.py | 15 +++++ 3 files changed, 98 insertions(+), 117 deletions(-) create mode 100644 8_neural_nets-perceptron.py delete mode 100644 9_neural_nets-4-time_series.py create mode 100644 fix_random_seed.py diff --git a/8_neural_nets-perceptron.py b/8_neural_nets-perceptron.py new file mode 100644 index 0000000..af27623 --- /dev/null +++ b/8_neural_nets-perceptron.py @@ -0,0 +1,83 @@ +""" ---------------------------------------------------------------""" +""" Neural networks introduction """ +""" ---------------------------------------------------------------""" +import pandas as pd +from sklearn.model_selection import train_test_split +#import fix_random_seed + + +# ------------------------------------------------------------------------------- +# For loading the data, the code is practically the same as in 2_classification.py +print(""" --- Load the data ---""") +csvFileName = r"./Datasets/A-greater-then-B.csv " +df = pd.read_csv(csvFileName) +print(df.head()) +print("data shape: ", df.shape) + +print(""" --- Set the features (independednt variables, attributes) and target ---""") +feature_cols = ['A', 'B', 'C'] +target_var = 'A>B' + +print( """ --- transform from categorical target (True, False) into numeric (1, 0) --- """) +df[target_var] = df[target_var].map(lambda x: 1 if x==True else 0) +print(df.head()) + +X = df[feature_cols].values +y = df[target_var].values +print("Features: ", feature_cols, "\nTarget:", target_var) + + +# one hot encode outputs +#y_train = np_utils.to_categorical(y_train) +#y_validation = np_utils.to_categorical(y_validation) +#y_test = np_utils.to_categorical(y_test) +#num_classes = y_test.shape[1] + + +print(""" --- Train-test split ---""") +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) +print("train set X shape: ", X_train.shape, "train set y shape: ", y_train.shape) +print("test set X shape: ", X_test.shape, "test set y shape: ", y_test.shape) + +# ------------------------------------------------------------------------------- +print (""" --- Introducing a validation set --- """) +# train-validation split +X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.1, random_state=42) +print("train set X shape: ", X_train.shape, "train set y shape: ", y_train.shape) +print("validation set X shape: ", X_validation.shape, "validation set y shape: ", y_validation.shape) +print("test set X shape: ", X_test.shape, "test set y shape: ", y_test.shape) + + +# ------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- +print("""-----------------------------------------------------""") +print("""\n ---- A Single Perceptron or Shallow network --- \n""") + +from keras.models import Sequential +from keras.layers import Dense + +model = Sequential() +model.add(Dense(input_dim=3, output_dim=1, init='uniform', activation='sigmoid')) +model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae']) + +# Fit the model +model.fit(X_train, y_train, validation_data=(X_validation, y_validation), epochs=10, batch_size=64, verbose=0) + +print(""" --- Predict --- (10 examples from the test set)""") +y_pred = model.predict(X_test) +print(" Actual Predicted Difference") +#for i in range(len(y_test)): +for i in range(10): + print("{0:6.2f} {1:8.2f} {2:8.2f}".format(y_test[i], y_pred[i][0], y_test[i]- y_pred[i][0])) + +# Model perfomance +print(""" -- Model Performance ---""") +# Returns the loss value & metrics values for the model in test mode. +scores = model.evaluate(X_train, y_train, verbose=0) +print("Train set error: ", scores) + +scores = model.evaluate(X_validation, y_validation, verbose=0) +print("Validation set error: ", scores) + +scores = model.evaluate(X_test, y_test, verbose=0) +print("Test set error: ", scores) diff --git a/9_neural_nets-4-time_series.py b/9_neural_nets-4-time_series.py deleted file mode 100644 index e375a11..0000000 --- a/9_neural_nets-4-time_series.py +++ /dev/null @@ -1,117 +0,0 @@ -from pandas import read_csv -from matplotlib import pyplot -from sklearn.preprocessing import LabelEncoder -from sklearn.preprocessing import MinMaxScaler -from pandas import DataFrame -from pandas import concat -from keras import Sequential -from keras.layers import Dense -from keras.layers import LSTM -from sklearn.metrics import mean_squared_error -from numpy import concatenate -from math import sqrt - -#load and plot dataset - -# load dataset -dataset = read_csv('./Datasets/pollution_clean.csv', header=0, index_col=0) -values = dataset.values -# specify columns to plot -groups = [0, 1, 2, 3, 5, 6, 7] -i = 1 -# plot each column -pyplot.figure() -for group in groups: - pyplot.subplot(len(groups), 1, i) - pyplot.plot(values[:, group]) - pyplot.title(dataset.columns[group], y=0.5, loc='right') - i += 1 -pyplot.show() - -#--------------------------------------------------- -# convert series to supervised learning - -def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): - n_vars = 1 if type(data) is list else data.shape[1] - df = DataFrame(data) - cols, names = list(), list() - # input sequence (t-n, ... t-1) - for i in range(n_in, 0, -1): - cols.append(df.shift(i)) - names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)] - # forecast sequence (t, t+1, ... t+n) - for i in range(0, n_out): - cols.append(df.shift(-i)) - if i == 0: - names += [('var%d(t)' % (j + 1)) for j in range(n_vars)] - else: - names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)] - # put it all together - agg = concat(cols, axis=1) - agg.columns = names - # drop rows with NaN values - if dropnan: - agg.dropna(inplace=True) - return agg - - -# load dataset -#dataset = read_csv('.\Datasets\pollution_clean.csv', header=0, index_col=0) -#values = dataset.values -# integer encode direction -encoder = LabelEncoder() -values[:, 4] = encoder.fit_transform(values[:, 4]) -# ensure all data is float -values = values.astype('float32') -# normalize features -scaler = MinMaxScaler(feature_range=(0, 1)) -scaled = scaler.fit_transform(values) -# frame as supervised learning -reframed = series_to_supervised(scaled, 1, 1) -# drop columns we don't want to predict -reframed.drop(reframed.columns[[9, 10, 11, 12, 13, 14, 15]], axis=1, inplace=True) -print(reframed.head()) - - -# split into train and test sets -values = reframed.values -n_train_hours = 365 * 24 -train = values[:n_train_hours, :] -test = values[n_train_hours:, :] -# split into input and outputs -train_X, train_y = train[:, :-1], train[:, -1] -test_X, test_y = test[:, :-1], test[:, -1] -# reshape input to be 3D [samples, timesteps, features] -train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1])) -test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1])) -print(train_X.shape, train_y.shape, test_X.shape, test_y.shape) - -# design network -model = Sequential() -model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2]))) -model.add(Dense(1)) -model.compile(loss='mae', optimizer='adam') -# fit network -history = model.fit(train_X, train_y, epochs=50, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False) -# plot history -pyplot.plot(history.history['loss'], label='train') -pyplot.plot(history.history['val_loss'], label='test') -pyplot.legend() -pyplot.show() - -print ("make prediction") -# make a prediction -yhat = model.predict(test_X) -test_X = test_X.reshape((test_X.shape[0], test_X.shape[2])) -# invert scaling for forecast -inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1) -inv_yhat = scaler.inverse_transform(inv_yhat) -inv_yhat = inv_yhat[:,0] -# invert scaling for actual -test_y = test_y.reshape((len(test_y), 1)) -inv_y = concatenate((test_y, test_X[:, 1:]), axis=1) -inv_y = scaler.inverse_transform(inv_y) -inv_y = inv_y[:,0] -# calculate RMSE -rmse = sqrt(mean_squared_error(inv_y, inv_yhat)) -print('Test RMSE: %.3f' % rmse) \ No newline at end of file diff --git a/fix_random_seed.py b/fix_random_seed.py new file mode 100644 index 0000000..934014d --- /dev/null +++ b/fix_random_seed.py @@ -0,0 +1,15 @@ +import numpy as np +import tensorflow as tf +import random as rn +import os +from keras import backend as K + +random_seed = 42 + +os.environ['PYTHONHASHSEED'] = '0' +np.random.seed(random_seed) +rn.seed(random_seed) +session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) +tf.set_random_seed(random_seed) +sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) +K.set_session(sess) -- 2.24.1