Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Petra
DM_course
Commits
8cab96ea
Commit
8cab96ea
authored
Mar 21, 2019
by
Petra
Browse files
minor updates
parent
33e24499
Changes
3
Hide whitespace changes
Inline
Side-by-side
7_text_classification.py
View file @
8cab96ea
# Large Movie Review Dataset
# First download the data from the following URL an unzip it.
# using http://ai.stanford.edu/~amaas/data/sentiment/
# Change four paths to the data in lines 35 - 43:
# positive_reviews_dir, negative_reviews_dir, test_positive_reviews_dir, test_negative_reviews_dir
# When you first use the stopwords and the wordnet lemmatizer, there will be an error (resource not found).
# The following two lines of code will download the resources.
# nltk.download('stopwords')
# nltk.download('wordnet')
...
...
9_neural_nets-4-time_series.py
0 → 100644
View file @
8cab96ea
from
pandas
import
read_csv
from
matplotlib
import
pyplot
from
sklearn.preprocessing
import
LabelEncoder
from
sklearn.preprocessing
import
MinMaxScaler
from
pandas
import
DataFrame
from
pandas
import
concat
from
keras
import
Sequential
from
keras.layers
import
Dense
from
keras.layers
import
LSTM
from
sklearn.metrics
import
mean_squared_error
from
numpy
import
concatenate
from
math
import
sqrt
#load and plot dataset
# load dataset
dataset
=
read_csv
(
'./Datasets/pollution_clean.csv'
,
header
=
0
,
index_col
=
0
)
values
=
dataset
.
values
# specify columns to plot
groups
=
[
0
,
1
,
2
,
3
,
5
,
6
,
7
]
i
=
1
# plot each column
pyplot
.
figure
()
for
group
in
groups
:
pyplot
.
subplot
(
len
(
groups
),
1
,
i
)
pyplot
.
plot
(
values
[:,
group
])
pyplot
.
title
(
dataset
.
columns
[
group
],
y
=
0.5
,
loc
=
'right'
)
i
+=
1
pyplot
.
show
()
#---------------------------------------------------
# convert series to supervised learning
def
series_to_supervised
(
data
,
n_in
=
1
,
n_out
=
1
,
dropnan
=
True
):
n_vars
=
1
if
type
(
data
)
is
list
else
data
.
shape
[
1
]
df
=
DataFrame
(
data
)
cols
,
names
=
list
(),
list
()
# input sequence (t-n, ... t-1)
for
i
in
range
(
n_in
,
0
,
-
1
):
cols
.
append
(
df
.
shift
(
i
))
names
+=
[(
'var%d(t-%d)'
%
(
j
+
1
,
i
))
for
j
in
range
(
n_vars
)]
# forecast sequence (t, t+1, ... t+n)
for
i
in
range
(
0
,
n_out
):
cols
.
append
(
df
.
shift
(
-
i
))
if
i
==
0
:
names
+=
[(
'var%d(t)'
%
(
j
+
1
))
for
j
in
range
(
n_vars
)]
else
:
names
+=
[(
'var%d(t+%d)'
%
(
j
+
1
,
i
))
for
j
in
range
(
n_vars
)]
# put it all together
agg
=
concat
(
cols
,
axis
=
1
)
agg
.
columns
=
names
# drop rows with NaN values
if
dropnan
:
agg
.
dropna
(
inplace
=
True
)
return
agg
# load dataset
#dataset = read_csv('.\Datasets\pollution_clean.csv', header=0, index_col=0)
#values = dataset.values
# integer encode direction
encoder
=
LabelEncoder
()
values
[:,
4
]
=
encoder
.
fit_transform
(
values
[:,
4
])
# ensure all data is float
values
=
values
.
astype
(
'float32'
)
# normalize features
scaler
=
MinMaxScaler
(
feature_range
=
(
0
,
1
))
scaled
=
scaler
.
fit_transform
(
values
)
# frame as supervised learning
reframed
=
series_to_supervised
(
scaled
,
1
,
1
)
# drop columns we don't want to predict
reframed
.
drop
(
reframed
.
columns
[[
9
,
10
,
11
,
12
,
13
,
14
,
15
]],
axis
=
1
,
inplace
=
True
)
print
(
reframed
.
head
())
# split into train and test sets
values
=
reframed
.
values
n_train_hours
=
365
*
24
train
=
values
[:
n_train_hours
,
:]
test
=
values
[
n_train_hours
:,
:]
# split into input and outputs
train_X
,
train_y
=
train
[:,
:
-
1
],
train
[:,
-
1
]
test_X
,
test_y
=
test
[:,
:
-
1
],
test
[:,
-
1
]
# reshape input to be 3D [samples, timesteps, features]
train_X
=
train_X
.
reshape
((
train_X
.
shape
[
0
],
1
,
train_X
.
shape
[
1
]))
test_X
=
test_X
.
reshape
((
test_X
.
shape
[
0
],
1
,
test_X
.
shape
[
1
]))
print
(
train_X
.
shape
,
train_y
.
shape
,
test_X
.
shape
,
test_y
.
shape
)
# design network
model
=
Sequential
()
model
.
add
(
LSTM
(
50
,
input_shape
=
(
train_X
.
shape
[
1
],
train_X
.
shape
[
2
])))
model
.
add
(
Dense
(
1
))
model
.
compile
(
loss
=
'mae'
,
optimizer
=
'adam'
)
# fit network
history
=
model
.
fit
(
train_X
,
train_y
,
epochs
=
50
,
batch_size
=
72
,
validation_data
=
(
test_X
,
test_y
),
verbose
=
2
,
shuffle
=
False
)
# plot history
pyplot
.
plot
(
history
.
history
[
'loss'
],
label
=
'train'
)
pyplot
.
plot
(
history
.
history
[
'val_loss'
],
label
=
'test'
)
pyplot
.
legend
()
pyplot
.
show
()
print
(
"make prediction"
)
# make a prediction
yhat
=
model
.
predict
(
test_X
)
test_X
=
test_X
.
reshape
((
test_X
.
shape
[
0
],
test_X
.
shape
[
2
]))
# invert scaling for forecast
inv_yhat
=
concatenate
((
yhat
,
test_X
[:,
1
:]),
axis
=
1
)
inv_yhat
=
scaler
.
inverse_transform
(
inv_yhat
)
inv_yhat
=
inv_yhat
[:,
0
]
# invert scaling for actual
test_y
=
test_y
.
reshape
((
len
(
test_y
),
1
))
inv_y
=
concatenate
((
test_y
,
test_X
[:,
1
:]),
axis
=
1
)
inv_y
=
scaler
.
inverse_transform
(
inv_y
)
inv_y
=
inv_y
[:,
0
]
# calculate RMSE
rmse
=
sqrt
(
mean_squared_error
(
inv_y
,
inv_yhat
))
print
(
'Test RMSE: %.3f'
%
rmse
)
\ No newline at end of file
9_neural_nets-4-time_series_Metod.py
0 → 100644
View file @
8cab96ea
from
pandas
import
read_csv
from
matplotlib
import
pyplot
from
sklearn.preprocessing
import
LabelEncoder
from
sklearn.preprocessing
import
MinMaxScaler
from
pandas
import
DataFrame
from
pandas
import
concat
from
keras
import
Sequential
from
keras.layers
import
Dense
from
keras.layers
import
LSTM
from
sklearn.metrics
import
mean_squared_error
from
numpy
import
concatenate
from
math
import
sqrt
#load and plot dataset
#file = r'd:\tmp\technical_analysis_test_eth_usd_bitstamp_900_2018-12-01T00-00-00_2019-02-15T00-00-00_5minfuture.csv'
file
=
r
"d:\tmp\technical_tmp.csv "
# load dataset
dataset
=
read_csv
(
file
,
header
=
0
,
index_col
=
0
)
values
=
dataset
.
values
# specify columns to plot
groups
=
range
(
len
(
dataset
.
columns
))
i
=
1
# plot each column
pyplot
.
figure
()
for
group
in
groups
:
pyplot
.
subplot
(
len
(
groups
),
1
,
i
)
pyplot
.
plot
(
values
[:,
group
])
pyplot
.
title
(
dataset
.
columns
[
group
],
y
=
0.5
,
loc
=
'right'
)
i
+=
1
pyplot
.
show
()
#---------------------------------------------------
# convert series to supervised learning
print
(
dataset
.
head
)
# load dataset
#dataset = read_csv('.\Datasets\pollution_clean.csv', header=0, index_col=0)
#values = dataset.values
# ensure all data is float
values
=
values
.
astype
(
'float32'
)
# normalize features
scaler
=
MinMaxScaler
(
feature_range
=
(
0
,
1
))
scaled
=
scaler
.
fit_transform
(
values
)
print
(
"shape "
,
scaled
.
shape
)
# drop columns we don't want to predict
# split into train and test sets
#values = reframed.values
n_train_hours
=
5000
train
=
values
[:
n_train_hours
,
:]
print
(
"tainn "
,
train
.
shape
)
test
=
values
[
n_train_hours
:,
:]
print
(
"test "
,
test
.
shape
)
# split into input and outputs
train_X
,
train_y
=
train
[:,
:
-
1
],
train
[:,
-
1
]
test_X
,
test_y
=
test
[:,
:
-
1
],
test
[:,
-
1
]
# reshape input to be 3D [samples, timesteps, features]
train_X
=
train_X
.
reshape
((
train_X
.
shape
[
0
],
1
,
train_X
.
shape
[
1
]))
test_X
=
test_X
.
reshape
((
test_X
.
shape
[
0
],
1
,
test_X
.
shape
[
1
]))
print
(
train_X
.
shape
,
train_y
.
shape
,
test_X
.
shape
,
test_y
.
shape
)
# design network
model
=
Sequential
()
model
.
add
(
LSTM
(
50
,
input_shape
=
(
train_X
.
shape
[
1
],
train_X
.
shape
[
2
])))
model
.
add
(
Dense
(
1
))
model
.
compile
(
loss
=
'mae'
,
optimizer
=
'adam'
)
# fit network
history
=
model
.
fit
(
train_X
,
train_y
,
epochs
=
50
,
batch_size
=
72
,
validation_data
=
(
test_X
,
test_y
),
verbose
=
2
,
shuffle
=
False
)
# plot history
pyplot
.
plot
(
history
.
history
[
'loss'
],
label
=
'train'
)
pyplot
.
plot
(
history
.
history
[
'val_loss'
],
label
=
'test'
)
pyplot
.
legend
()
pyplot
.
show
()
print
(
"make prediction"
)
# make a prediction
yhat
=
model
.
predict
(
test_X
)
print
(
"yhat"
,
yhat
.
shape
)
test_X
=
test_X
.
reshape
((
test_X
.
shape
[
0
],
test_X
.
shape
[
2
]))
print
(
"test_X"
,
test_X
.
shape
)
# invert scaling for actual
#test_y = test_y.reshape((len(test_y), 1))
inv_y
=
concatenate
((
test_y
,
test_X
[:,
1
:]),
axis
=
1
)
print
(
"inv_y"
,
inv_y
.
shape
)
inv_y
=
scaler
.
inverse_transform
(
inv_y
)
inv_y
=
inv_y
[:,
0
]
# invert scaling for forecast
inv_yhat
=
concatenate
((
yhat
,
test_X
[:,
1
:]),
axis
=
1
)
print
(
"inv_yhat conc"
,
inv_yhat
.
shape
)
inv_yhat
=
scaler
.
inverse_transform
(
inv_yhat
)
inv_yhat
=
inv_yhat
[:,
0
]
# calculate RMSE
rmse
=
sqrt
(
mean_squared_error
(
inv_y
,
inv_yhat
))
print
(
'Test RMSE: %.3f'
%
rmse
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment