from pandas import read_csv
from matplotlib import pyplot
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from pandas import DataFrame
from pandas import concat
from keras import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.metrics import mean_squared_error
from numpy import concatenate
from math import sqrt
#load and plot dataset
#file = r'd:\tmp\technical_analysis_test_eth_usd_bitstamp_900_2018-12-01T00-00-00_2019-02-15T00-00-00_5minfuture.csv'
file = r"d:\tmp\technical_tmp.csv "
# load dataset
dataset = read_csv(file , header=0, index_col=0)
values = dataset.values
# specify columns to plot
groups = range(len(dataset.columns))
i = 1
# plot each column
for group in groups:
pyplot.subplot(len(groups), 1, i)
pyplot.plot(values[:, group])
pyplot.title(dataset.columns[group], y=0.5, loc='right')
i += 1
# convert series to supervised learning
print (dataset.head)
# load dataset
#dataset = read_csv('.\Datasets\pollution_clean.csv', header=0, index_col=0)
#values = dataset.values
# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
print ("shape ", scaled.shape)
# drop columns we don't want to predict
# split into train and test sets
#values = reframed.values
n_train_hours = 5000
train = values[:n_train_hours, :]
print ("tainn ", train.shape)
test = values[n_train_hours:, :]
print ("test ", test.shape)
# split into input and outputs
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
# design network
model = Sequential()
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
model.compile(loss='mae', optimizer='adam')
# fit network
history =, train_y, epochs=50, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
print ("make prediction")
# make a prediction
yhat = model.predict(test_X)
print ("yhat", yhat.shape)
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
print ("test_X", test_X.shape)
# invert scaling for actual
#test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
print ("inv_y", inv_y.shape)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# invert scaling for forecast
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
print ("inv_yhat conc", inv_yhat.shape)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)
