Commit 245b8b9e authored by Petra's avatar Petra

Added a basic ANN example 8_neural_nets-perceptron.py

parent 6d2a1413
""" ---------------------------------------------------------------"""
""" Neural networks introduction """
""" ---------------------------------------------------------------"""
import pandas as pd
from sklearn.model_selection import train_test_split
#import fix_random_seed
# -------------------------------------------------------------------------------
# For loading the data, the code is practically the same as in 2_classification.py
print(""" --- Load the data ---""")
csvFileName = r"./Datasets/A-greater-then-B.csv "
df = pd.read_csv(csvFileName)
print(df.head())
print("data shape: ", df.shape)
print(""" --- Set the features (independednt variables, attributes) and target ---""")
feature_cols = ['A', 'B', 'C']
target_var = 'A>B'
print( """ --- transform from categorical target (True, False) into numeric (1, 0) --- """)
df[target_var] = df[target_var].map(lambda x: 1 if x==True else 0)
print(df.head())
X = df[feature_cols].values
y = df[target_var].values
print("Features: ", feature_cols, "\nTarget:", target_var)
# one hot encode outputs
#y_train = np_utils.to_categorical(y_train)
#y_validation = np_utils.to_categorical(y_validation)
#y_test = np_utils.to_categorical(y_test)
#num_classes = y_test.shape[1]
print(""" --- Train-test split ---""")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
print("train set X shape: ", X_train.shape, "train set y shape: ", y_train.shape)
print("test set X shape: ", X_test.shape, "test set y shape: ", y_test.shape)
# -------------------------------------------------------------------------------
print (""" --- Introducing a validation set --- """)
# train-validation split
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
print("train set X shape: ", X_train.shape, "train set y shape: ", y_train.shape)
print("validation set X shape: ", X_validation.shape, "validation set y shape: ", y_validation.shape)
print("test set X shape: ", X_test.shape, "test set y shape: ", y_test.shape)
# -------------------------------------------------------------------------------
# -------------------------------------------------------------------------------
print("""-----------------------------------------------------""")
print("""\n ---- A Single Perceptron or Shallow network --- \n""")
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(input_dim=3, output_dim=1, init='uniform', activation='sigmoid'))
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
# Fit the model
model.fit(X_train, y_train, validation_data=(X_validation, y_validation), epochs=10, batch_size=64, verbose=0)
print(""" --- Predict --- (10 examples from the test set)""")
y_pred = model.predict(X_test)
print(" Actual Predicted Difference")
#for i in range(len(y_test)):
for i in range(10):
print("{0:6.2f} {1:8.2f} {2:8.2f}".format(y_test[i], y_pred[i][0], y_test[i]- y_pred[i][0]))
# Model perfomance
print(""" -- Model Performance ---""")
# Returns the loss value & metrics values for the model in test mode.
scores = model.evaluate(X_train, y_train, verbose=0)
print("Train set error: ", scores)
scores = model.evaluate(X_validation, y_validation, verbose=0)
print("Validation set error: ", scores)
scores = model.evaluate(X_test, y_test, verbose=0)
print("Test set error: ", scores)
from pandas import read_csv
from matplotlib import pyplot
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from pandas import DataFrame
from pandas import concat
from keras import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.metrics import mean_squared_error
from numpy import concatenate
from math import sqrt
#load and plot dataset
# load dataset
dataset = read_csv('./Datasets/pollution_clean.csv', header=0, index_col=0)
values = dataset.values
# specify columns to plot
groups = [0, 1, 2, 3, 5, 6, 7]
i = 1
# plot each column
pyplot.figure()
for group in groups:
pyplot.subplot(len(groups), 1, i)
pyplot.plot(values[:, group])
pyplot.title(dataset.columns[group], y=0.5, loc='right')
i += 1
pyplot.show()
#---------------------------------------------------
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
# put it all together
agg = concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg
# load dataset
#dataset = read_csv('.\Datasets\pollution_clean.csv', header=0, index_col=0)
#values = dataset.values
# integer encode direction
encoder = LabelEncoder()
values[:, 4] = encoder.fit_transform(values[:, 4])
# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
# frame as supervised learning
reframed = series_to_supervised(scaled, 1, 1)
# drop columns we don't want to predict
reframed.drop(reframed.columns[[9, 10, 11, 12, 13, 14, 15]], axis=1, inplace=True)
print(reframed.head())
# split into train and test sets
values = reframed.values
n_train_hours = 365 * 24
train = values[:n_train_hours, :]
test = values[n_train_hours:, :]
# split into input and outputs
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
# design network
model = Sequential()
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')
# fit network
history = model.fit(train_X, train_y, epochs=50, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()
print ("make prediction")
# make a prediction
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
# invert scaling for forecast
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)
\ No newline at end of file
import numpy as np
import tensorflow as tf
import random as rn
import os
from keras import backend as K
random_seed = 42
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(random_seed)
rn.seed(random_seed)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
tf.set_random_seed(random_seed)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment