Skip to content

Commit df94525

Browse files
Add files via upload
1 parent 4f6768b commit df94525

File tree

3 files changed

+176
-0
lines changed

3 files changed

+176
-0
lines changed

src-tf/__init__.py

Whitespace-only changes.

src-tf/main.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import os
2+
import shutil
3+
4+
import networkx as nx
5+
import numpy as np
6+
import pandas as pd
7+
import spektral
8+
import tensorflow as tf
9+
from sklearn.model_selection import StratifiedKFold, train_test_split
10+
11+
from model import MSDMT
12+
13+
##############################
14+
seed_value = 2021
15+
lr = 0.0001
16+
epochs = 500
17+
alpha = 0.5
18+
beta = 0.5
19+
timestep = 10
20+
maxlen = 64
21+
##############################
22+
23+
24+
def data_process(timestep=10, maxlen=64):
25+
df_U = pd.read_csv('../data/sample_data_player_portrait.csv')
26+
df_B = pd.read_csv('../data/sample_data_behavior_sequence.csv')
27+
df_G = pd.read_csv('../data/sample_data_social_network.csv')
28+
df_Y = pd.read_csv('../data/sample_data_label.csv')
29+
30+
U = df_U.drop(['uid', 'ds'], axis=1).values
31+
U = U.reshape(-1, timestep, U.shape[-1])
32+
B = df_B['seq'].apply(lambda x: x.split(',') if pd.notna(x) else []).values
33+
B = tf.keras.preprocessing.sequence.pad_sequences(sequences=B,
34+
maxlen=maxlen,
35+
padding='post')
36+
B = B.reshape(-1, timestep, maxlen)
37+
38+
G = nx.from_pandas_edgelist(df=df_G,
39+
source='src_uid',
40+
target='dst_uid',
41+
edge_attr=['weight'])
42+
A = nx.adjacency_matrix(G)
43+
A = spektral.layers.GCNConv.preprocess(A).astype('f4')
44+
y1 = df_Y['churn_label'].values.reshape(-1, 1)
45+
y2 = np.log(df_Y['payment_label'].values + 1).reshape(-1, 1)
46+
47+
print('U:', U.shape)
48+
print('B:', B.shape)
49+
print('G:', A.shape)
50+
print('y1:', y1.shape, 'y2:', y2.shape)
51+
52+
return U, B, A, y1, y2
53+
54+
55+
U, B, A, y1, y2 = data_process(timestep=timestep, maxlen=maxlen)
56+
N = A.shape[0]
57+
58+
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed_value)
59+
60+
for train_index, test_index in kfold.split(U, y1):
61+
62+
train_index, val_index = train_test_split(train_index, test_size=0.1, random_state=seed_value)
63+
64+
mask_train = np.zeros(N, dtype=bool)
65+
mask_val = np.zeros(N, dtype=bool)
66+
mask_test = np.zeros(N, dtype=bool)
67+
mask_train[train_index] = True
68+
mask_val[val_index] = True
69+
mask_test[test_index] = True
70+
71+
checkpoint_path = './model/checkpoint-{epoch:04d}.ckpt'
72+
checkpoint_dir = os.path.dirname(checkpoint_path)
73+
74+
if os.path.exists(checkpoint_dir):
75+
shutil.rmtree(checkpoint_dir)
76+
77+
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
78+
patience=5,
79+
mode='min')
80+
81+
best_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
82+
monitor='val_loss',
83+
verbose=1,
84+
save_best_only=True,
85+
save_weights_only=True,
86+
mode='auto')
87+
88+
model = MSDMT(timestep=timestep, behavior_maxlen=maxlen)
89+
90+
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
91+
loss={'output_1': tf.keras.losses.BinaryCrossentropy(),
92+
'output_2': tf.keras.losses.MeanSquaredError()},
93+
loss_weights={'output_1': alpha, 'output_2': beta},
94+
metrics={'output_1': tf.keras.metrics.AUC(),
95+
'output_2': 'mae'})
96+
97+
model.fit([U, B, A], [y1, y2],
98+
validation_data=([U, B, A], [y1, y2], mask_val),
99+
sample_weight=mask_train,
100+
batch_size=N,
101+
epochs=epochs,
102+
shuffle=False,
103+
callbacks=[early_stopping, best_checkpoint],
104+
verbose=1)

src-tf/model.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import spektral
2+
import tensorflow as tf
3+
4+
5+
class MSDMT(tf.keras.Model):
6+
def __init__(self,
7+
timestep=10,
8+
portrait_dim=32,
9+
behavior_num=100 + 1,
10+
behavior_emb_dim=16,
11+
behavior_maxlen=64,
12+
behavior_dim=32,
13+
network_dim=32,
14+
dropout=0.5):
15+
super(MSDMT, self).__init__()
16+
17+
self.timestep = timestep
18+
self.dropout = dropout
19+
self.portrait_dim = portrait_dim
20+
self.behavior_num = behavior_num
21+
self.behavior_emb_dim = behavior_emb_dim
22+
self.behavior_maxlen = behavior_maxlen
23+
self.behavior_dim = behavior_dim
24+
self.network_dim = network_dim
25+
26+
self.portrait_net = tf.keras.Sequential(
27+
name='portrait_net',
28+
layers=[tf.keras.layers.LSTM(units=self.portrait_dim,
29+
return_sequences=False),
30+
tf.keras.layers.LayerNormalization(),
31+
tf.keras.layers.Dense(units=self.portrait_dim,
32+
activation='relu',
33+
use_bias=False)])
34+
35+
self.behavior_net = tf.keras.Sequential(
36+
name='behavior_net',
37+
layers=[tf.keras.layers.Embedding(input_dim=self.behavior_num,
38+
output_dim=self.behavior_emb_dim,
39+
mask_zero=True),
40+
tf.keras.layers.Lambda(lambda x: tf.reshape(x, (-1, self.behavior_maxlen, self.behavior_emb_dim))),
41+
tf.keras.layers.Conv1D(filters=self.behavior_dim,
42+
kernel_size=3,
43+
padding='same',
44+
activation='relu'),
45+
tf.keras.layers.GlobalAveragePooling1D(),
46+
tf.keras.layers.Lambda(lambda x: tf.reshape(x, (-1, self.timestep, self.behavior_dim))),
47+
tf.keras.layers.LSTM(units=self.behavior_dim,
48+
return_sequences=False),
49+
tf.keras.layers.LayerNormalization(),
50+
tf.keras.layers.Dense(units=self.behavior_dim,
51+
activation='relu',
52+
use_bias=False)])
53+
self.network_net = tf.keras.Sequential(
54+
name='network_net',
55+
layers=[spektral.layers.GCNConv(channels=self.network_dim,
56+
activation='relu'),
57+
tf.keras.layers.Dropout(rate=self.dropout),
58+
tf.keras.layers.Dense(units=self.network_dim,
59+
activation='relu')])
60+
61+
self.output1 = tf.keras.layers.Dense(units=1, activation='sigmoid', name='output1')
62+
self.output2 = tf.keras.layers.Dense(units=1, activation=None, name='output2')
63+
64+
def call(self, inputs):
65+
U, B, A = inputs
66+
H = self.portrait_net(U)
67+
O = self.behavior_net(B)
68+
X = tf.keras.layers.Concatenate()([H, O])
69+
V = self.network_net([X, A])
70+
output1 = self.output1(V)
71+
output2 = self.output2(V)
72+
return output1, output2

0 commit comments

Comments
 (0)