|
| 1 | +import os |
| 2 | +import numpy as np |
| 3 | +from matplotlib import pyplot as plt |
| 4 | +from keras.models import Sequential |
| 5 | +from keras import layers |
| 6 | +from keras.optimizers import RMSprop |
| 7 | + |
| 8 | +data_dir = '' |
| 9 | +fname = os.path.join(data_dir, 'jena_climate_2009_2016.csv') |
| 10 | + |
| 11 | +f = open(fname) |
| 12 | +data = f.read() |
| 13 | +f.close() |
| 14 | + |
| 15 | +lines = data.split('\n') |
| 16 | +header = lines[0].split(',') |
| 17 | +lines = lines[1:] |
| 18 | + |
| 19 | +print(header) |
| 20 | +print(len(lines)) |
| 21 | + |
| 22 | +# 解析数据 |
| 23 | +float_data = np.zeros((len(lines), len(header) - 1)) |
| 24 | +for i, line in enumerate(lines): |
| 25 | + values = [float(x) for x in line.split(',')[1:]] |
| 26 | + float_data[i, :] = values |
| 27 | + |
| 28 | +# 画出温度序列 |
| 29 | +temp = float_data[:, 1] # 温度序列 |
| 30 | +plt.plot(range(len(temp)), temp) |
| 31 | +plt.show() |
| 32 | + |
| 33 | +# 画出前10天的温度序列 |
| 34 | +plt.plot(range(1440), temp[:1440]) |
| 35 | +plt.show() |
| 36 | + |
| 37 | +# 把前200000个序列作为训练数据,正规化 |
| 38 | +mean = float_data[:200000].mean(axis=0) |
| 39 | +float_data -= mean |
| 40 | +std = float_data[:200000].std(axis=0) |
| 41 | +float_data /= std |
| 42 | + |
| 43 | + |
| 44 | +# 定义一个生成序列样本和目标序列的生成器 |
| 45 | +# data:正则化后的原始数据 |
| 46 | +# lookback:往前看多少步 |
| 47 | +# delay:往后看第多少步 |
| 48 | +# min_index和max_index:限制序列的选择区间 |
| 49 | +# shuffle:是否打乱数据 |
| 50 | +# batch_size:每个样本多少数据 |
| 51 | +# step:多少一个周期,设置为6表示一小时一个数据点 |
| 52 | +def generator(data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=6): |
| 53 | + if max_index is None: |
| 54 | + max_index = len(data) - delay - 1 |
| 55 | + i = min_index + lookback |
| 56 | + while 1: |
| 57 | + if shuffle: |
| 58 | + rows = np.random.randint(min_index + lookback, max_index, size=batch_size) |
| 59 | + else: |
| 60 | + if i + batch_size >= max_index: |
| 61 | + i = min_index + lookback |
| 62 | + rows = np.arange(i, min(i + batch_size, max_index)) |
| 63 | + i += len(rows) |
| 64 | + |
| 65 | + samples = np.zeros((len(rows), lookback // step, data.shape[-1])) |
| 66 | + targets = np.zeros((len(rows),)) |
| 67 | + for j, row in enumerate(rows): |
| 68 | + indices = range(rows[j] - lookback, rows[j], step) |
| 69 | + samples[j] = data[indices] |
| 70 | + targets[j] = data[rows[j] + delay][1] |
| 71 | + yield samples, targets |
| 72 | + |
| 73 | + |
| 74 | +# 准备训练、验证和测试数据生成器 |
| 75 | +lookback = 1440 |
| 76 | +step = 6 |
| 77 | +delay = 144 |
| 78 | +batch_size = 128 |
| 79 | + |
| 80 | +train_gen = generator(float_data, |
| 81 | + lookback=lookback, |
| 82 | + delay=delay, |
| 83 | + min_index=0, |
| 84 | + max_index=200000, |
| 85 | + shuffle=True, |
| 86 | + step=step, |
| 87 | + batch_size=batch_size) |
| 88 | +val_gen = generator(float_data, |
| 89 | + lookback=lookback, |
| 90 | + delay=delay, |
| 91 | + min_index=200001, |
| 92 | + max_index=300000, |
| 93 | + shuffle=True, |
| 94 | + step=step, |
| 95 | + batch_size=batch_size) |
| 96 | +test_gen = generator(float_data, |
| 97 | + lookback=lookback, |
| 98 | + delay=delay, |
| 99 | + min_index=300001, |
| 100 | + max_index=None, |
| 101 | + shuffle=True, |
| 102 | + step=step, |
| 103 | + batch_size=batch_size) |
| 104 | + |
| 105 | +# 看到所有验证数据需要生成的次数 |
| 106 | +val_steps = (300000 - 200001 - lookback) // batch_size |
| 107 | + |
| 108 | +# 看到所有测试数据需要生成的次数 |
| 109 | +test_steps = (len(float_data) - 300001 - lookback) // batch_size |
| 110 | + |
| 111 | +''' |
| 112 | +# 用一个只使用两个全连接层的模型去训练 |
| 113 | +model = Sequential() |
| 114 | +model.add(layers.Flatten(input_shape=(lookback // step, float_data.shape[-1]))) |
| 115 | +model.add(layers.Dense(32, activation='relu')) |
| 116 | +model.add(layers.Dense(1)) |
| 117 | +
|
| 118 | +model.compile(optimizer=RMSprop(), loss='mae') |
| 119 | +history = model.fit_generator(train_gen, steps_per_epoch=500, |
| 120 | + epochs=20, |
| 121 | + validation_data=val_gen, |
| 122 | + validation_steps=val_steps) |
| 123 | +''' |
| 124 | +''' |
| 125 | +# 使用一个GRU模型 |
| 126 | +model = Sequential() |
| 127 | +model.add(layers.GRU(32, input_shape=(None, float_data.shape[-1]))) |
| 128 | +model.add(layers.Dense(1)) |
| 129 | +
|
| 130 | +model.compile(optimizer=RMSprop(), loss='mae') |
| 131 | +history = model.fit_generator(train_gen, steps_per_epoch=500, |
| 132 | + epochs=20, |
| 133 | + validation_data=val_gen, |
| 134 | + validation_steps=val_steps) |
| 135 | +''' |
| 136 | +''' |
| 137 | +# 使用一个正则化后的GRU模型 |
| 138 | +model = Sequential() |
| 139 | +model.add(layers.GRU(32, |
| 140 | + dropout=0.2, |
| 141 | + recurrent_dropout=0.2, |
| 142 | + input_shape=(None, float_data.shape[-1]))) |
| 143 | +model.add(layers.Dense(1)) |
| 144 | +
|
| 145 | +model.compile(optimizer=RMSprop(), loss='mae') |
| 146 | +history = model.fit_generator(train_gen, steps_per_epoch=500, |
| 147 | + epochs=40, |
| 148 | + validation_data=val_gen, |
| 149 | + validation_steps=val_steps) |
| 150 | +''' |
| 151 | +''' |
| 152 | +# 使用一个正则化后多层的GRU模型 |
| 153 | +model = Sequential() |
| 154 | +model.add(layers.GRU(32, |
| 155 | + dropout=0.1, |
| 156 | + recurrent_dropout=0.5, |
| 157 | + return_sequences=True, |
| 158 | + input_shape=(None, float_data.shape[-1]))) |
| 159 | +model.add(layers.GRU(64, activation='relu', dropout=0.1, recurrent_dropout=0.5)) |
| 160 | +model.add(layers.Dense(1)) |
| 161 | +
|
| 162 | +model.compile(optimizer=RMSprop(), loss='mae') |
| 163 | +history = model.fit_generator(train_gen, steps_per_epoch=500, |
| 164 | + epochs=40, |
| 165 | + validation_data=val_gen, |
| 166 | + validation_steps=val_steps) |
| 167 | +''' |
| 168 | + |
| 169 | +# 使用一个双向的GRU模型 |
| 170 | +model = Sequential() |
| 171 | +model.add(layers.Bidirectional(layers.GRU(32), input_shape=(None, float_data.shape[-1]))) |
| 172 | +model.add(layers.Dense(1)) |
| 173 | + |
| 174 | +model.compile(optimizer=RMSprop(), loss='mae') |
| 175 | +history = model.fit_generator(train_gen, steps_per_epoch=500, |
| 176 | + epochs=40, |
| 177 | + validation_data=val_gen, |
| 178 | + validation_steps=val_steps) |
| 179 | + |
| 180 | +# 画出训练过程中损失函数值的变化曲线 |
| 181 | +loss = history.history['loss'] |
| 182 | +val_loss = history.history['val_loss'] |
| 183 | + |
| 184 | +epochs = range(1, len(loss) + 1) |
| 185 | + |
| 186 | +plt.figure() |
| 187 | + |
| 188 | +plt.plot(epochs, loss, 'bo', label='Training loss') |
| 189 | +plt.plot(epochs, val_loss, 'b', label='Validation loss') |
| 190 | +plt.title('Training and validation loss') |
| 191 | +plt.legend() |
| 192 | + |
| 193 | +plt.show() |
| 194 | +print('end') |
0 commit comments