-
Notifications
You must be signed in to change notification settings - Fork 10
/
model.py
220 lines (198 loc) · 9.82 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import torch
import argparse
import numpy as np
from torch import nn, optim
from numpy import random as rd
from torch.utils.data import DataLoader
from nets import SiameseNetwork, NETA, NETB, Classifier
from util import init_weights, SiameseNetworkDataset, tes_vec, val_classifier
torch.cuda.manual_seed_all(1)
torch.manual_seed(1)
def aup(paras):
total_anchor = paras.total_anchor
train_ratio = paras.train_ratio
load_path_a = paras.feature_A
load_path_b = paras.feature_B
cuda = torch.device('cuda:%d'%(paras.gpu_id))
dim = paras.represent_dim
lr = paras.lr
lr_step = paras.lr_step
lr_prob = paras.lr_prob
N = paras.N
stop_P = paras.stop_P
is_classification = paras.is_classification
represent_epoch = paras.represent_epoch
classification_epoch = paras.classification_epoch
a_array_load = np.load(load_path_a)
b_array_load = np.load(load_path_b)
a_array_tensor = torch.Tensor(a_array_load)
b_array_tensor = torch.Tensor(b_array_load)
len_f = a_array_load.shape[0]
len_t = b_array_load.shape[0]
print(len_f, len_t)
node_f = list(range(0, len_f))
node_t = list(range(0, len_t))
anchor_all = list(range(0, total_anchor))
rd.seed(80)
anchor_train = rd.choice(anchor_all, int(train_ratio * total_anchor))
anchor_test = list(set(anchor_all) - set(anchor_train))
model = SiameseNetwork(dim, len_f, len_t).to(device=cuda)
init_weights(model)
neta = NETA(len_f, dim).to(device=cuda)
netb = NETB(len_t, dim).to(device=cuda)
a_array_tensor = a_array_tensor.to(device=cuda)
b_array_tensor = b_array_tensor.to(device=cuda)
mse = nn.MSELoss()
cos = nn.CosineEmbeddingLoss(margin=0)
optimizer = optim.Adadelta(model.parameters(), lr=lr, weight_decay=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_prob)
triplet_neg = 1
anchor_flag = 1
anchor_train_len = len(anchor_train)
anchor_train_a_list = anchor_train
anchor_train_b_list = anchor_train
input_a = []
input_b = []
classifier_target = torch.empty(0).to(device=cuda)
np.random.seed(5)
index = 0
while index < anchor_train_len:
a = anchor_train_a_list[index]
b = anchor_train_b_list[index]
input_a.append(a)
input_b.append(b)
an_target = torch.ones(anchor_flag).to(device=cuda)
classifier_target = torch.cat((classifier_target, an_target), dim=0)
an_negs_index = list(set(node_t) - {b})
an_negs_index_sampled = list(np.random.choice(an_negs_index, triplet_neg, replace=False))
an_as = triplet_neg * [a]
input_a += an_as
input_b += an_negs_index_sampled
an_negs_index1 = list(set(node_f) - {a})
an_negs_index_sampled1 = list(np.random.choice(an_negs_index1, triplet_neg, replace=False))
an_as1 = triplet_neg * [b]
input_b += an_as1
input_a += an_negs_index_sampled1
un_an_target = torch.zeros(triplet_neg * 2).to(device=cuda)
classifier_target = torch.cat((classifier_target, un_an_target), dim=0)
index += 1
cosine_target = torch.unsqueeze(2 * classifier_target - 1, dim=1)
classifier_target = torch.unsqueeze(classifier_target, dim=1)
ina = a_array_load[input_a]
inb = b_array_load[input_b]
ina = torch.Tensor(ina).to(device=cuda)
inb = torch.Tensor(inb).to(device=cuda)
tensor_dataset = SiameseNetworkDataset(ina, inb, classifier_target, cosine_target)
data_loader = DataLoader(tensor_dataset, batch_size=56, shuffle=False)
hidden_a_for_c = None
hidden_b_for_c = None
for epoch in range(represent_epoch):
model.train()
scheduler.step()
train_loss = 0
loss_rec_a = 0
loss_rec_b = 0
loss_reg = 0
loss_anchor = 0
for data_batch in data_loader:
in_a, in_b, c, cosine = data_batch
cosine = torch.squeeze(cosine, dim=1)
in_a = torch.unsqueeze(in_a, dim=1).to(device=cuda)
in_b = torch.unsqueeze(in_b, dim=1).to(device=cuda)
h_a, h_b, re_a, re_b = model(in_a, in_b)
loss_rec_a_batch = 100 * mse(re_a, in_a)
loss_rec_b_batch = 100 * mse(re_b, in_b)
loss_anchor_batch = 1 * cos(h_a, h_b, cosine)
loss_reg_batch = 0.001 * (h_a.norm() + h_b.norm())
loss = loss_reg_batch + loss_rec_a_batch + loss_rec_b_batch + loss_anchor_batch
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
loss_rec_a += loss_rec_a_batch.item()
loss_rec_b += loss_rec_b_batch.item()
loss_reg += loss_reg_batch.item()
loss_anchor += loss_anchor_batch.item()
neta_dict = neta.state_dict()
netb_dict = netb.state_dict()
model.cpu()
trainmodel_dict = model.state_dict()
trainmodel_dict_a = {k: v for k, v in trainmodel_dict.items() if k in neta_dict}
trainmodel_dict_b = {k: v for k, v in trainmodel_dict.items() if k in netb_dict}
neta_dict.update(trainmodel_dict_a)
netb_dict.update(trainmodel_dict_b)
neta.load_state_dict(neta_dict)
netb.load_state_dict(netb_dict)
neta.eval()
netb.eval()
hidden_a = neta(torch.unsqueeze(a_array_tensor, dim=1))
hidden_b = netb(torch.unsqueeze(b_array_tensor, dim=1))
PatN_v, MatN_v = tes_vec(hidden_a, hidden_b, anchor_train, anchor_train, N, node_t)
PatN_t, MatN_t = tes_vec(hidden_a, hidden_b, anchor_train, anchor_test, N, node_t)
print('epoch:%d, loss:%.3f, rec_a:%.3f, rec_b:%.3f, anchor:%.3f, reg:%.3f, '
'at%d, Val(P=%.3f, M=%.3f), Tes(P=%.3f, M=%.3f)' %
(epoch, train_loss, loss_rec_a, loss_rec_b, loss_anchor, loss_reg, N, PatN_v, MatN_v, PatN_t, MatN_t))
if is_classification and PatN_t > stop_P:
hidden_a_for_c = hidden_a.detach()
hidden_b_for_c = hidden_b.detach()
break
model.to(device=cuda)
if is_classification:
classifier = Classifier().to(device=cuda)
cel = nn.CrossEntropyLoss()
hidden_a_for_c = hidden_a_for_c.cpu().numpy()
hidden_b_for_c = hidden_b_for_c.cpu().numpy()
ina_for_c = hidden_a_for_c[input_a]
inb_for_c = hidden_b_for_c[input_b]
ina_for_c = torch.Tensor(ina_for_c).to(device=cuda)
inb_for_c = torch.Tensor(inb_for_c).to(device=cuda)
tensor_dataset_for_c = SiameseNetworkDataset(ina_for_c, inb_for_c, classifier_target, cosine_target)
data_loader_for_c = DataLoader(tensor_dataset_for_c, batch_size=dim, shuffle=False)
optimizer_for_c = optim.Adadelta(classifier.parameters(), lr=lr, weight_decay=0.0001)
scheduler_c = torch.optim.lr_scheduler.StepLR(optimizer_for_c, step_size=lr_step, gamma=lr_prob)
# classifier
for epoch in range(classification_epoch):
classifier.train()
scheduler_c.step()
loss_c = 0
for data_batch in data_loader_for_c:
in_a, in_b, c, cosine = data_batch
in_a, in_b = in_a.to(device=cuda), in_b.to(device=cuda)
in_class = torch.cat((in_a, in_b), dim=1)
class_out = classifier(in_class)
c = torch.squeeze(c, dim=1)
loss_classifier = cel(class_out, c.long())
optimizer_for_c.zero_grad()
loss_classifier.backward()
optimizer_for_c.step()
loss_c += loss_classifier.item()
classifier.eval()
hidden_a_for_c1 = torch.Tensor(hidden_a_for_c).to(device=cuda)
hidden_b_for_c1 = torch.Tensor(hidden_b_for_c).to(device=cuda)
PatN_v, MatN_v = val_classifier(hidden_a_for_c1, hidden_b_for_c1, anchor_train, anchor_train, paras,
node_t, classifier)
PatN_t, MatN_t = val_classifier(hidden_a_for_c1, hidden_b_for_c1, anchor_train, anchor_test, paras,
node_t, classifier)
print('epoch %d, loss %.3f, at%d, Val(P=%.3f, M=%.3f), Tes(P=%.3f, M=%.3f)' %
(epoch, loss_c, N, PatN_v, MatN_v, PatN_t, MatN_t))
if __name__ == '__main__':
parser = argparse.ArgumentParser('')
parser.add_argument('--feature_A', required=True, type=str, default='', help='feature of network A')
parser.add_argument('--feature_B', required=True, type=str, default='', help='feature of network B')
parser.add_argument('--total_anchor', required=True, type=int, help='total number of anchor users')
parser.add_argument('--train_ratio', type=float, default=0.3, help='train ratio of anchor')
parser.add_argument('--represent_dim', type=int, default=56, help='the dimension of representation vector')
parser.add_argument('--represent_epoch', type=int, default=500, help='epoch for user representation')
parser.add_argument('--classification_epoch', type=int, default=200, help='epoch for classification')
parser.add_argument('--N', type=int, default=30, help='top N for Precision and MAP')
parser.add_argument('--gpu_id', required=True, type=int, default=3, help='GPU ID')
parser.add_argument('--lr', type=float, default=3, help='init represent learning rate')
parser.add_argument('--lr_step', type=float, default=10, help='step for dynamic learning rate')
parser.add_argument('--lr_prob', type=float, default=0.8, help='decay probability for dynamic learning rate')
parser.add_argument('--is_classification', type=bool, default=False, help='have classification or not')
# stop_P changes according train_ratio
parser.add_argument('--stop_P', type=float, default=0.9,
help='if is_classification is True, '
'the process of representation will stop when P large than step_P')
args = parser.parse_args()
aup(args)