Skip to content

Commit 588b3d2

Browse files
committed
第六章代码及详解
1 parent 6bbee99 commit 588b3d2

15 files changed

+421245
-0
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
chapter6/aclImdb*
2+
chapter6/glove.6B*
3+
14
# Byte-compiled / optimized / DLL files
25
__pycache__/
36
*.py[cod]

chapter6/RNN_naive_implement.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import numpy as np
2+
3+
# 时间步数
4+
timesteps = 100
5+
# 输入特征维度
6+
inputs_features = 32
7+
# 输出特征维度
8+
output_features = 64
9+
10+
# 输入数据
11+
inputs = np.random.random((timesteps, inputs_features))
12+
13+
# 初始状态
14+
state_t = np.zeros((output_features,))
15+
16+
# 创建权重矩阵
17+
W = np.random.random((inputs_features, output_features))
18+
U = np.random.random((output_features, output_features))
19+
b = np.random.random((output_features,))
20+
21+
successive_outputs = []
22+
for input_t in inputs:
23+
# 使用当前输入和当前状态(前一个输出)去得到当前输出
24+
output_t = np.tanh(np.dot(W, input_t) + np.dot(U, state_t) + b)
25+
26+
# 存储输出
27+
successive_outputs.append(output_t)
28+
29+
# 更新状态
30+
state_t = output_t
31+
32+
# 最终的输出(timesteps, output_features)
33+
final_output_sequence = np.concatenate(successive_outputs, axis=0)

chapter6/character_level_one_hot.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import string
2+
import numpy as np
3+
4+
samples = ['The cat sat on the mat.', 'The dog ate my homework.']
5+
6+
# 所有可以打印出来的ASCII编码的字符
7+
characters = string.printable
8+
9+
# 得到符号->索引的对应字典
10+
token_index = dict(zip(characters, range(1, len(characters) + 1)))
11+
12+
max_length = 50
13+
results = np.zeros((len(samples), max_length, max(token_index.values()) + 1))
14+
for i, sample in enumerate(samples):
15+
for j, character in enumerate(sample[:max_length]):
16+
index = token_index.get(character)
17+
results[i, j, index] = 1.
18+
19+
print(results)

chapter6/conv1D_imdb.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from keras.datasets import imdb
2+
from keras.preprocessing import sequence
3+
from keras.models import Sequential
4+
from keras import layers
5+
from keras.optimizers import RMSprop
6+
import matplotlib.pyplot as plt
7+
8+
# 预处理IMDB数据
9+
max_features = 10000 # 考虑作为特征的词数
10+
maxlen = 500 # 考虑每句中的前500个词
11+
12+
print('Loading data...')
13+
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
14+
print(len(x_train), 'train sequences')
15+
print(len(x_test), 'test sequences')
16+
17+
print('Pad sequences (samples x time)')
18+
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
19+
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
20+
print('x_train shape:', x_train.shape)
21+
print('x_test shape:', x_test.shape)
22+
23+
# 训练一个简单的一维卷积网络
24+
model = Sequential()
25+
model.add(layers.Embedding(max_features, 128, input_length=maxlen))
26+
model.add(layers.Conv1D(32, 7, activation='relu'))
27+
model.add(layers.MaxPooling1D(5))
28+
model.add(layers.Conv1D(32, 7, activation='relu'))
29+
model.add(layers.GlobalMaxPool1D())
30+
model.add(layers.Dense(1))
31+
32+
model.summary()
33+
34+
model.compile(optimizer=RMSprop(lr=1e-4),
35+
loss='binary_crossentropy',
36+
metrics=['acc'])
37+
history = model.fit(x_train, y_train,
38+
epochs=10,
39+
batch_size=128,
40+
validation_split=0.2)
41+
42+
# 画出训练过程中损失函数值和准确率值的变化曲线
43+
acc = history.history['acc']
44+
val_acc = history.history['val_acc']
45+
loss = history.history['loss']
46+
val_loss = history.history['val_loss']
47+
48+
epochs = range(1, len(acc) + 1)
49+
50+
plt.plot(epochs, acc, 'bo', label='Training acc')
51+
plt.plot(epochs, val_acc, 'b', label='Validation acc')
52+
plt.title('Training and validation accuracy')
53+
plt.legend()
54+
55+
plt.figure()
56+
57+
plt.plot(epochs, loss, 'bo', label='Training loss')
58+
plt.plot(epochs, val_loss, 'b', label='Validation loss')
59+
plt.title('Training and validation loss')
60+
plt.legend()
61+
62+
plt.show()
63+
print('end')
64+
65+
+194
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
import os
2+
import numpy as np
3+
from matplotlib import pyplot as plt
4+
from keras.models import Sequential
5+
from keras import layers
6+
from keras.optimizers import RMSprop
7+
8+
data_dir = ''
9+
fname = os.path.join(data_dir, 'jena_climate_2009_2016.csv')
10+
11+
f = open(fname)
12+
data = f.read()
13+
f.close()
14+
15+
lines = data.split('\n')
16+
header = lines[0].split(',')
17+
lines = lines[1:]
18+
19+
print(header)
20+
print(len(lines))
21+
22+
# 解析数据
23+
float_data = np.zeros((len(lines), len(header) - 1))
24+
for i, line in enumerate(lines):
25+
values = [float(x) for x in line.split(',')[1:]]
26+
float_data[i, :] = values
27+
28+
# 画出温度序列
29+
temp = float_data[:, 1] # 温度序列
30+
plt.plot(range(len(temp)), temp)
31+
plt.show()
32+
33+
# 画出前10天的温度序列
34+
plt.plot(range(1440), temp[:1440])
35+
plt.show()
36+
37+
# 把前200000个序列作为训练数据,正规化
38+
mean = float_data[:200000].mean(axis=0)
39+
float_data -= mean
40+
std = float_data[:200000].std(axis=0)
41+
float_data /= std
42+
43+
44+
# 定义一个生成序列样本和目标序列的生成器
45+
# data:正则化后的原始数据
46+
# lookback:往前看多少步
47+
# delay:往后看第多少步
48+
# min_index和max_index:限制序列的选择区间
49+
# shuffle:是否打乱数据
50+
# batch_size:每个样本多少数据
51+
# step:多少一个周期,设置为6表示一小时一个数据点
52+
def generator(data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=6):
53+
if max_index is None:
54+
max_index = len(data) - delay - 1
55+
i = min_index + lookback
56+
while 1:
57+
if shuffle:
58+
rows = np.random.randint(min_index + lookback, max_index, size=batch_size)
59+
else:
60+
if i + batch_size >= max_index:
61+
i = min_index + lookback
62+
rows = np.arange(i, min(i + batch_size, max_index))
63+
i += len(rows)
64+
65+
samples = np.zeros((len(rows), lookback // step, data.shape[-1]))
66+
targets = np.zeros((len(rows),))
67+
for j, row in enumerate(rows):
68+
indices = range(rows[j] - lookback, rows[j], step)
69+
samples[j] = data[indices]
70+
targets[j] = data[rows[j] + delay][1]
71+
yield samples, targets
72+
73+
74+
# 准备训练、验证和测试数据生成器
75+
lookback = 1440
76+
step = 6
77+
delay = 144
78+
batch_size = 128
79+
80+
train_gen = generator(float_data,
81+
lookback=lookback,
82+
delay=delay,
83+
min_index=0,
84+
max_index=200000,
85+
shuffle=True,
86+
step=step,
87+
batch_size=batch_size)
88+
val_gen = generator(float_data,
89+
lookback=lookback,
90+
delay=delay,
91+
min_index=200001,
92+
max_index=300000,
93+
shuffle=True,
94+
step=step,
95+
batch_size=batch_size)
96+
test_gen = generator(float_data,
97+
lookback=lookback,
98+
delay=delay,
99+
min_index=300001,
100+
max_index=None,
101+
shuffle=True,
102+
step=step,
103+
batch_size=batch_size)
104+
105+
# 看到所有验证数据需要生成的次数
106+
val_steps = (300000 - 200001 - lookback) // batch_size
107+
108+
# 看到所有测试数据需要生成的次数
109+
test_steps = (len(float_data) - 300001 - lookback) // batch_size
110+
111+
'''
112+
# 用一个只使用两个全连接层的模型去训练
113+
model = Sequential()
114+
model.add(layers.Flatten(input_shape=(lookback // step, float_data.shape[-1])))
115+
model.add(layers.Dense(32, activation='relu'))
116+
model.add(layers.Dense(1))
117+
118+
model.compile(optimizer=RMSprop(), loss='mae')
119+
history = model.fit_generator(train_gen, steps_per_epoch=500,
120+
epochs=20,
121+
validation_data=val_gen,
122+
validation_steps=val_steps)
123+
'''
124+
'''
125+
# 使用一个GRU模型
126+
model = Sequential()
127+
model.add(layers.GRU(32, input_shape=(None, float_data.shape[-1])))
128+
model.add(layers.Dense(1))
129+
130+
model.compile(optimizer=RMSprop(), loss='mae')
131+
history = model.fit_generator(train_gen, steps_per_epoch=500,
132+
epochs=20,
133+
validation_data=val_gen,
134+
validation_steps=val_steps)
135+
'''
136+
'''
137+
# 使用一个正则化后的GRU模型
138+
model = Sequential()
139+
model.add(layers.GRU(32,
140+
dropout=0.2,
141+
recurrent_dropout=0.2,
142+
input_shape=(None, float_data.shape[-1])))
143+
model.add(layers.Dense(1))
144+
145+
model.compile(optimizer=RMSprop(), loss='mae')
146+
history = model.fit_generator(train_gen, steps_per_epoch=500,
147+
epochs=40,
148+
validation_data=val_gen,
149+
validation_steps=val_steps)
150+
'''
151+
'''
152+
# 使用一个正则化后多层的GRU模型
153+
model = Sequential()
154+
model.add(layers.GRU(32,
155+
dropout=0.1,
156+
recurrent_dropout=0.5,
157+
return_sequences=True,
158+
input_shape=(None, float_data.shape[-1])))
159+
model.add(layers.GRU(64, activation='relu', dropout=0.1, recurrent_dropout=0.5))
160+
model.add(layers.Dense(1))
161+
162+
model.compile(optimizer=RMSprop(), loss='mae')
163+
history = model.fit_generator(train_gen, steps_per_epoch=500,
164+
epochs=40,
165+
validation_data=val_gen,
166+
validation_steps=val_steps)
167+
'''
168+
169+
# 使用一个双向的GRU模型
170+
model = Sequential()
171+
model.add(layers.Bidirectional(layers.GRU(32), input_shape=(None, float_data.shape[-1])))
172+
model.add(layers.Dense(1))
173+
174+
model.compile(optimizer=RMSprop(), loss='mae')
175+
history = model.fit_generator(train_gen, steps_per_epoch=500,
176+
epochs=40,
177+
validation_data=val_gen,
178+
validation_steps=val_steps)
179+
180+
# 画出训练过程中损失函数值的变化曲线
181+
loss = history.history['loss']
182+
val_loss = history.history['val_loss']
183+
184+
epochs = range(1, len(loss) + 1)
185+
186+
plt.figure()
187+
188+
plt.plot(epochs, loss, 'bo', label='Training loss')
189+
plt.plot(epochs, val_loss, 'b', label='Validation loss')
190+
plt.title('Training and validation loss')
191+
plt.legend()
192+
193+
plt.show()
194+
print('end')

0 commit comments

Comments
 (0)