-
Notifications
You must be signed in to change notification settings - Fork 2
/
AM_dev.py
executable file
·215 lines (181 loc) · 7.38 KB
/
AM_dev.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#coding=utf-8
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random
data_dir = "/home/spica/mnt_device/aqi/dev_data/timesub/"
log_dir = "/home/spica/mnt_device/aqi/result"
def process(x):
if x == '?':
return 0.0
else:
return float(x)
hop = 71
timestep_size = 71 # Hours of looking ahead
output_parameters = 3 # Number of predicting parameters
num_stations = 3 # Number of monitoring stations
atm_dim = 6 # Number of atmosphere dimension
aqi_dim = 6 # Number of air pollution substance monitored
data_dir = "/home/spica/mnt_device/aqi/dev_data/timesub/"
# Hyperparameters
dataset = []
split = 200
leap = 6
length = 16
lr = 0.0001
hidden_size = 256
layer_num = 3
# defines how many hours is used to predict
print("Processing target set")
start = 1395691200
end = 1448564400
cur_start = start
cur_end = start+hop*3600
# --------------------------------------------
# Data Dictionary
# --------------------------------------------
# 0 RecordID
# 1 LocatID Location ID generated by unique latitude-longitude pairs for each monitor location
# 2 StationName Station Name as defined by the monitor owner (might change, might not be unique)
# 3 ChName Chinese language station name (UTF8 encoding)
# 4 Latitude Latitude North in decimal degrees WGS84
# 5 Longitude Longitude East in decimal degrees WGS84
# 6 PM2.5 Particulate Matter 2.5 micron diameter µg/m3 (micrograms per cubic metre)
# 7 PM10 Particulate Matter 10 micron diameter µg/m3 (micrograms per cubic metre)
# 8 O3 Ozone pphm (parts per hundred million)
# 9 NO2 NOX NitrogenDioxide pphm (parts per hundred million)
# 10 SO2 SOX SodiumDioxide pphm (parts per hundred million)
# 11 CO CarbonMonoxide ppm (parts per million)
# 12 Temperature degrees Celsius
# 13 DewPoint degrees Celsius
# 14 Pressure millibars
# 15 Humidity absolute humidity in grams/meter3
# 16 Wind km / hour
# 17 UMT_time data collection time Greenwich Meat Time
# --------------------------------------------
# Data Preparation
# --------------------------------------------
# from UNIX time 1395691200
# to UNIX time 1448564400
# Read from the file of the training set
data = []
atm_data = []
aqi_data = []
target_set = []
# TODO:Change latitude and longitude to an uni encoding
while(cur_end < end - (120 + 288) * 3600):
buff = []
for i in range(hop):
hour = []
hourly_atm = []
hourly_aqi = []
f1 = open(data_dir+(str)(cur_start+i*3600), 'rb')
for line in f1.readlines():
ls = line.split('#')
hour = hour + map(float, ls[4:16])
hourly_atm = hourly_atm + map(float, ls[4, 5, 12:16])
hourly_aqi.append(map(float, ls[6:11]))
f1.close()
buff.append(hour)
data.append(buff)
atm_data.append(hourly_atm)
aqi_data.append(hourly_aqi)
f1 = open(data_dir+(str)(cur_start+120*3600),'rb')
for line in f1.readlines():
ls = line.split("#")
target_set.append(map(float, ls[7:10]))
break
cur_start = cur_start+3600
cur_end = cur_end+3600
# s_target = random.shuffle(target_set)
print(len(target_set))
np_data = np.asarray(data)
np_target = np.asarray(target_set)
print("Target shape :",np_target.shape)
print("Data shape : :",np_data.shape)
# training_data = np.hstack((np_data,np_target))
# np.random.shuffle(training_data)
# X = training_data[:, :-1]
# y = training_data[:, -1]
X = np_data
y = np_target
set = np.array(X[1920:])
target = np.array(y[1920:])
val_set = np.array(X[:1920])
val_target = np.array(y[:1920])
sess = tf.InteractiveSession()
batch_size = tf.placeholder(tf.int32)
_X = tf.placeholder(tf.float32, [None, timestep_size, 36]) # TODO change this to the divided ver
y = tf.placeholder(tf.float32, [None, 3])
atm_x = tf.placeholder(tf.float32, [None, timestep_size, atm_dim])
aqi_x = tf.placeholder(tf.float32, [None, timestep_size, atm_dim])
keep_prob = tf.placeholder(tf.float32)
# --------------------------------------------
# Construct LSTM cells
# --------------------------------------------
lstm_cell = rnn.LSTMCell(num_units=hidden_size,
forget_bias = 1.0,
state_is_tuple = True)
# time_major=False)
lstm_cell = rnn.DropoutWrapper(cell=lstm_cell,
input_keep_prob=1.0,
output_keep_prob=keep_prob)
mlstm_cell = rnn.MultiRNNCell([lstm_cell] * layer_num, state_is_tuple=True)
init_state = mlstm_cell.zero_state(batch_size, dtype=tf.float32)
# ** 当 time_major==False 时, outputs.shape = [batch_size, timestep_size, hidden_size]
# ** 所以,可以取 h_state = outputs[:, -1, :] 作为最后输出
# ** state.shape = [layer_num, 2, batch_size, hidden_size],
# ** 或者,可以取 h_state = state[-1][1] 作为最后输出
# ** 最后输出维度是 [batch_size, hidden_size]
outputs, state = tf.nn.dynamic_rnn(mlstm_cell,
inputs=_X,
initial_state=init_state)
h_state = outputs[:, -1, :]
# 或者 h_state = state[-1][1]
# *************** 为了更好的理解 LSTM 工作原理,我们把上面 步骤6 中的函数自己来实现 ***************
# 通过查看文档你会发现, RNNCell 都提供了一个 __call__()函数(见最后附),我们可以用它来展开实现LSTM按时间步迭代。
# **步骤6:方法二,按时间步展开计算
# outputs = list()
# state = init_state
# with tf.variable_scope('RNN'):
# for timestep in range(timestep_size):
# if timestep > 0:
# tf.get_variable_scope().reuse_variables()
# # 这里的state保存了每一层 LSTM 的状态
# (cell_output, state) = mlstm_cell(X[:, timestep, :], state)
# outputs.append(cell_output)
# h_state = outputs[-1]
# --------------------------------------------
# Convert LSTM output to tensor of three
# --------------------------------------------
W = tf.Variable(tf.truncated_normal([hidden_size, output_parameters],
stddev=0.1),
dtype=tf.float32)
bias = tf.Variable(tf.constant(0.1,shape=[output_parameters]),
dtype=tf.float32)
y_pre = tf.matmul(h_state, W) + bias
cross_entropy = -tf.reduce_mean(y * tf.log(y_pre))
train_op = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
loss = tf.reduce_mean(tf.abs(y_pre-y), 0)
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
sess.run(tf.global_variables_initializer())
count = 0
for i in range(6000):
_batch_size = 384
batch = random.randint(5, 36)
start = batch*_batch_size
end = (batch+1)*_batch_size
sess.run(train_op,
feed_dict={_X: data[start:end],
y: target_set[start:end],
keep_prob: 0.5,
batch_size: 384})
# print("========Iter:"+str(i)+",Accuracy:========",(acc))
if(i%21 != 0):
acc = sess.run(loss, feed_dict={_X: data[1152:1536],
y: target_set[1152:1536],
batch_size: 384,
keep_prob: 1})
print("Epoch:" + str(count) + str(acc))
count = count+1