Commit 3271b901 authored by Petra's avatar Petra

added code for neural networks from...

added code for neural networks from https://machinelearningmastery.com/handwritten-digit-recognition-using-convolutional-neural-networks-python-keras/
parent 05a47ae2
......@@ -16,40 +16,40 @@ df = pd.read_csv(csvFileName)
print(df.head())
print("data shape: ", df.shape)
# feature_cols = ['Age']
# target_var = 'Height'
#
# X = df[feature_cols].values
# y = df[target_var].values
#
# """ Train-test split """
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) #, random_state=42)
#
# """ Initialize the learners """
# dummy = dummy.DummyRegressor()
# regr = linear_model.LinearRegression()
# reg_tree = tree.DecisionTreeRegressor()
# knn = KNeighborsRegressor(n_neighbors=2)
#
# learner = reg_tree
#
# """" Train and apply """
# learner.fit(X_train, y_train)
# y_pred = learner.predict(X_test)
#
# print ("\n Actual Predicted")
# for i in range(len(y_test)):
# print("{0:6.2f} {1:8.2f}".format(y_test[i], y_pred[i]))
#
# print("Performance:")
# print("MAE \t{0:5.2f}".format( metrics.mean_absolute_error(y_test,y_pred)))
# print("MSE \t{0:5.2f}".format( metrics.mean_squared_error(y_test,y_pred)))
# print("R2 \t{0:5.2f}".format( metrics.r2_score(y_test,y_pred)))
feature_cols = ['Age']
target_var = 'Height'
X = df[feature_cols].values
y = df[target_var].values
""" Train-test split """
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
""" Initialize the learners """
dummy = dummy.DummyRegressor()
regr = linear_model.LinearRegression()
reg_tree = tree.DecisionTreeRegressor(min_samples_leaf=8)
knn = KNeighborsRegressor(n_neighbors=2)
learner = reg_tree
"""" Train and apply """
learner.fit(X_train, y_train)
y_pred = learner.predict(X_test)
print ("\n Actual Predicted")
for i in range(len(y_test)):
print("{0:6.2f} {1:8.2f}".format(y_test[i], y_pred[i]))
print("Performance:")
print("MAE \t{0:5.2f}".format( metrics.mean_absolute_error(y_test,y_pred)))
print("MSE \t{0:5.2f}".format( metrics.mean_squared_error(y_test,y_pred)))
print("R2 \t{0:5.2f}".format( metrics.r2_score(y_test,y_pred)))
#
# # """ Visualize the tree """ # https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html#sphx-glr-auto-examples-tree-plot-unveil-tree-structure-py
# # treeFileName = 'reg_tree.dot'
# # tree.export_graphviz(learner, out_file='treeFileName.dot')
treeFileName = 'reg_tree.dot'
tree.export_graphviz(learner, out_file='treeFileName.dot')
#
# # install GraphViz
# # $ dot -Tps reg_tree.dot -o tree.ps (PostScript format)
#$ dot -Tps reg_tree.dot -o tree.ps (PostScript format)
# # $ dot -Tpng reg_tree.dot -o tree.png (PNG format)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
......@@ -25,34 +26,34 @@ frame = plt.gca()
frame.axes.get_xaxis().set_visible(False)
frame.axes.get_yaxis().set_visible(False)
#
# def plot_clusters(data, algorithm, args, kwds):
# start_time = time.time()
# labels = algorithm(*args, **kwds).fit_predict(data)
# end_time = time.time()
# palette = sns.color_palette('deep', np.unique(labels).max() + 1)
# colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels]
# plt.scatter(data.T[0], data.T[1], c=colors, **plot_kwds)
# frame = plt.gca()
# frame.axes.get_xaxis().set_visible(False)
# frame.axes.get_yaxis().set_visible(False)
# plt.title('Clusters found by {}'.format(str(algorithm.__name__)), fontsize=24)
# plt.text(-0.55, 0.65, 'Clustering took {:.2f} s'.format(end_time - start_time), fontsize=14)
#
#
#
# f2 = plt.figure(2)
# plot_clusters(data, cluster.KMeans, (), {'n_clusters':6})
#
# # add a plot with a different number of clusters
#
# f3 = plt.figure(3)
# plot_clusters(data, cluster.AgglomerativeClustering, (), {'n_clusters':6, 'linkage':'ward'})
#
# f4 = plt.figure(4)
# plot_clusters(data, cluster.SpectralClustering, (), {'n_clusters':6})
#
# f5 = plt.figure(5)
# plot_clusters(data, cluster.DBSCAN, (), {'eps':0.025})
def plot_clusters(data, algorithm, args, kwds):
start_time = time.time()
labels = algorithm(*args, **kwds).fit_predict(data)
end_time = time.time()
palette = sns.color_palette('deep', np.unique(labels).max() + 1)
colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels]
plt.scatter(data.T[0], data.T[1], c=colors, **plot_kwds)
frame = plt.gca()
frame.axes.get_xaxis().set_visible(False)
frame.axes.get_yaxis().set_visible(False)
plt.title('Clusters found by {}'.format(str(algorithm.__name__)), fontsize=24)
plt.text(-0.55, 0.65, 'Clustering took {:.2f} s'.format(end_time - start_time), fontsize=14)
f2 = plt.figure(2)
plot_clusters(data, cluster.KMeans, (), {'n_clusters':6})
# add a plot with a different number of clusters
f3 = plt.figure(3)
plot_clusters(data, cluster.AgglomerativeClustering, (), {'n_clusters':6, 'linkage':'ward'})
f4 = plt.figure(4)
plot_clusters(data, cluster.SpectralClustering, (), {'n_clusters':6})
f5 = plt.figure(5)
plot_clusters(data, cluster.DBSCAN, (), {'eps':0.025})
plt.show()
\ No newline at end of file
from keras.datasets import mnist
import matplotlib.pyplot as plt
""" ---------------------------------------"""
""" Neural networks 1 """
""" ---------------------------------------"""
"""https://machinelearningmastery.com/handwritten-digit-recognition-using-convolutional-neural-networks-python-keras/"""
# Plot ad hoc mnist instances
(X_train, y_train), (X_test, y_test) = mnist.load_data() # Dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images.
# plot 4 images as gray scale
plt.subplot(221)
plt.imshow(X_train[0], cmap=plt.get_cmap('gray'))
plt.subplot(222)
plt.imshow(X_train[1], cmap=plt.get_cmap('gray'))
plt.subplot(223)
plt.imshow(X_train[2], cmap=plt.get_cmap('gray'))
plt.subplot(224)
plt.imshow(X_train[3], cmap=plt.get_cmap('gray'))
# show the plot
plt.show()
""" ---------------------------------------------------------------"""
""" Neural networks 1 - Multilayer perceptron """
""" ---------------------------------------------------------------"""
"""https://machinelearningmastery.com/handwritten-digit-recognition-using-convolutional-neural-networks-python-keras/"""
import numpy
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import np_utils
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
# define baseline model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# build the model
model = baseline_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Baseline Error: %.2f%%" % (100 - scores[1] * 100))
""" ---------------------------------------------------------------"""
""" Neural networks 2 - Convolutinal NN small """
""" ---------------------------------------------------------------"""
"""https://machinelearningmastery.com/handwritten-digit-recognition-using-convolutional-neural-networks-python-keras/"""
import numpy
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# reshape to be [samples][pixels][width][height]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(1, 28, 28), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# build the model
model = baseline_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment