Dawn's Blogs

分享技术 记录成长

0%

事件抽取模型复现之DMCNN (3) DMCNN模型

DMCNN 简介

DMCNN 由以下组成:

  • Embedding Learning:采用 Skip-gram 预训练模型。
  • Lexical Feature Representation:触发词和论元的上下文 token,并将这些拼接起来形成词汇级特征表示向量L。
  • DMCNN 结构:
    • 输入包括三个部分 CWF(Context-Word Feature)、PF(Position Feature)、EF(Event-type feature,在本模型复现中没用用到)。
    • 多个 filter 进行卷积,每一个 filter 生成一个feature map,把每一个 feature map 分为三个部分,对这三个部分分别进行 max-pool 操作就是动态池化操作,得到向量 P。
  • 分类输出:将P和L连接起来,送入一个分类器中。

1664963639460

模型代码实现

全部代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from keras.models import Model
from keras.regularizers import l2
from keras.optimizers import Adam
from keras.initializers import Constant
from keras import backend as K
from keras.layers.core import Lambda
from keras.layers import Input, Embedding, Conv1D, MaxPooling1D, TimeDistributed, Flatten
from keras.layers import Dropout, Dense, Concatenate, Reshape, Multiply


class DMCNN:
def __init__(self, max_sequence_length, embedding_matrix,
window_size=3, filters_num=200, pf_dim=5, invalid_flag=-1,
output=8, l2_param=0.01, lr_param=0.001):
self.steps = max_sequence_length
self.embedding_matrix = embedding_matrix
self.window = window_size
self.filters = filters_num
self.dim = embedding_matrix.shape[1]
self.pf_dim = pf_dim
self.invalid_flag = invalid_flag
self.output = output

self.l2_param = l2_param

self.model = self.build()
self.model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr_param, beta_1=0.8),
metrics=['accuracy'])

def build(self):
# [n, steps]
cwf_input = Input(shape=(self.steps,), name='Word2Vec')
# [n, steps, steps, 2]
pf_input = Input(shape=(self.steps, self.steps, 2), name='PositionFeatures')
# [n, steps, 6]
lexical_level_input = Input(shape=(self.steps, 6), name='LexicalLevelFeatures')

# [n, steps, 3, steps]
event_words_mask_input = Input(shape=(self.steps, 3, self.steps), name='EventWordsMask')

# ----------------------------------------------------------------------- #

# [n, steps, dim]
cwf_emb = Embedding(self.embedding_matrix.shape[0],
self.embedding_matrix.shape[1],
embeddings_initializer=Constant(self.embedding_matrix),
input_length=self.steps,
trainable=False, name='cwf_embedding')(cwf_input)
# [n, steps, steps, dim]
cwf_repeat = Lambda(lambda x: K.repeat_elements(x[:, None, :, :], rep=self.steps, axis=1),
name='max_sequence_repeat')(cwf_emb)

# [n, steps, steps, pf_dim]
pf_emb = TimeDistributed(Dense(self.pf_dim), name='pf_embedding')(pf_input)

# [n, steps, steps, dim + pf_dim]
sentence_level = Concatenate(name='SentenceLevel')([cwf_repeat, pf_emb])

sentence_masks = []
for i in range(3):
# [n, steps, 3, steps] -> [n, steps, steps] -> [n, steps, steps, 1]
sentence_mask = Lambda(
lambda x: K.expand_dims(x[0][:, :, i, :], axis=-1) * x[1],
name='mask{}'.format(i))([event_words_mask_input, sentence_level])

# [n, steps, steps, dim + pf_dim] -> [n, steps, 1, steps, dim + pf_dim]
sentence_mask_reshape = Lambda(lambda x: K.expand_dims(x, axis=2),
name='sentence_mask_reshape{}'.format(i))(sentence_mask)

sentence_masks.append(sentence_mask_reshape)

# [n, steps, 3, steps, dim + pf_dim]
sentence = Concatenate(name='SentenceLevelMask', axis=2)(sentence_masks)

# [n, steps, 3, steps - window + 1, filters]
conv = TimeDistributed(
TimeDistributed(Conv1D(filters=self.filters, kernel_size=self.window, activation='relu')),
name='conv')(sentence)

# [n, steps, 3, 1, filters]
conv_pool = TimeDistributed(
TimeDistributed(MaxPooling1D(self.steps - self.window + 1)),
name='max_pooling')(conv)
# [n, steps, 3 * filters]
conv_flatten = TimeDistributed(Flatten(), name='flatten')(conv_pool)
cnn = TimeDistributed(Dropout(0.5), name='dropout')(conv_flatten)

# ----------------------------------------------------------------------- #

lexical_level_embeddings = []
for i in range(6):
# [n, steps, dim]
lexical_level_emb = TimeDistributed(
Lambda(lambda x: self.get_embedding(x[:, i])),
name='LexicalEmbedding{}'.format(i))(lexical_level_input)
lexical_level_embeddings.append(lexical_level_emb)
# [n, steps, 6 * dim]
lexical_level = Concatenate(name='LexicalLevel')(lexical_level_embeddings)

# ----------------------------------------------------------------------- #

# [n, steps, 3 * filters + 6 * dim]
fusion = Concatenate(name='LexicalAndSentence')([cnn, lexical_level])

# [n, steps, 32]
dense = TimeDistributed(
Dense(32, activation='relu', kernel_regularizer=l2(self.l2_param)),
name='fc')(fusion)
# [n, steps, output]
output = TimeDistributed(
Dense(self.output, activation='softmax', kernel_regularizer=l2(self.l2_param)),
name='output')(dense)

model = Model(inputs=[cwf_input, pf_input, lexical_level_input, event_words_mask_input], outputs=output)
return model

def get_embedding(self, x):
# x: [n, ] -> [n, 1] -> [n, 1, dim] -> [n, dim]
x = K.expand_dims(x, axis=-1)
emb = Embedding(self.embedding_matrix.shape[0],
self.embedding_matrix.shape[1],
embeddings_initializer=Constant(self.embedding_matrix),
input_length=1,
trainable=False)(x)
flat = Flatten()(emb)
return flat

输入

整个模型的输入包括三个部分,CWF、PF 和 Mask。在此之前先回顾一下 embedd_matrix、position、lexical、mask 的定义

1
2
3
4
5
6
7
8
9
10
11
# [单词数量, embedding-dim]
embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))

# [第i个句子,第j个(触发)词,位置向量,触发词或者论元]
position = np.zeros((len(dataset), MAX_SEQUENCE_LENGTH, MAX_SEQUENCE_LENGTH, 2))

# [第i个句子,第j个(触发)词,触发词和论元的左中右token_id]
lexical = np.zeros((len(dataset), MAX_SEQUENCE_LENGTH, 6))

# [第i个句子,第j个(触发)词,分为了3个部分,mask向量]
mask = np.ones((len(dataset), MAX_SEQUENCE_LENGTH, 3, MAX_SEQUENCE_LENGTH))

所以模型的输入如下:

1
2
3
4
5
6
7
8
9
# [n, steps]
cwf_input = Input(shape=(self.steps,), name='Word2Vec')
# [n, steps, steps, 2]
pf_input = Input(shape=(self.steps, self.steps, 2), name='PositionFeatures')
# [n, steps, 6]
lexical_level_input = Input(shape=(self.steps, 6), name='LexicalLevelFeatures')

# [n, steps, 3, steps]
event_words_mask_input = Input(shape=(self.steps, 3, self.steps), name='EventWordsMask')

DMCNN 结构

输入

CWF

CWF embedding 如下,其中 embedding 由 embedding_matrix 给出:

1
2
3
4
5
6
# [n, steps, dim]
cwf_emb = Embedding(self.embedding_matrix.shape[0],
self.embedding_matrix.shape[1],
embeddings_initializer=Constant(self.embedding_matrix),
input_length=self.steps,
trainable=False, name='cwf_embedding')(cwf_input)

接着将 cwf_emb 的维度变为 [n, steps, steps, dim],使之与 pf_emb 的前三个维度保持一致:

1
2
cwf_repeat = Lambda(lambda x: K.repeat_elements(x[:, None, :, :], rep=self.steps, axis=1),
name='max_sequence_repeat')(cwf_emb)

PF

pf_embedding 由 pf_input经过一层线性变化生成,pf_dim=5:

1
2
# [n, steps, steps, pf_dim]
pf_emb = TimeDistributed(Dense(self.pf_dim), name='pf_embedding')(pf_input)

CWF 与 PF 拼接

最后将 CWF 与 PF 拼接(变化 cwf_emb 的维度也是为了这一步的拼接)得到 DMCNN 结构的输入,即句子级别的词表示:

1
2
# [n, steps, steps, dim + pf_dim]
sentence_level = Concatenate(name='SentenceLevel')([cwf_repeat, pf_emb])

CNN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
sentence_masks = []
for i in range(3):
# [n, steps, 3, steps] -> [n, steps, steps] -> [n, steps, steps, 1]
sentence_mask = Lambda(
lambda x: K.expand_dims(x[0][:, :, i, :], axis=-1) * x[1],
name='mask{}'.format(i))([event_words_mask_input, sentence_level])

# [n, steps, steps, dim + pf_dim] -> [n, steps, 1, steps, dim + pf_dim]
sentence_mask_reshape = Lambda(lambda x: K.expand_dims(x, axis=2),
name='sentence_mask_reshape{}'.format(i))(sentence_mask)

sentence_masks.append(sentence_mask_reshape)

# [n, steps, 3, steps, dim + pf_dim]
sentence = Concatenate(name='SentenceLevelMask', axis=2)(sentence_masks)

送入卷积层和池化层:

1
2
3
4
5
6
7
8
9
# [n, steps, 3, steps - window + 1, filters]
conv = TimeDistributed(
TimeDistributed(Conv1D(filters=self.filters, kernel_size=self.window, activation='relu')),
name='conv')(sentence)

# [n, steps, 3, 1, filters]
conv_pool = TimeDistributed(
TimeDistributed(MaxPooling1D(self.steps - self.window + 1)),
name='max_pooling')(conv)

将池化后的结果进行 Flatten 操作后进行 Dropout :

1
2
3
# [n, steps, 3 * filters]
conv_flatten = TimeDistributed(Flatten(), name='flatten')(conv_pool)
cnn = TimeDistributed(Dropout(0.5), name='dropout')(conv_flatten)

Lexical Level

将触发词和论元上中下一个 token 转为词向量表示,然后将这 6 个向量拼接起来。

1
2
3
4
5
6
7
8
9
lexical_level_embeddings = []
for i in range(6):
# [n, steps, dim]
lexical_level_emb = TimeDistributed(
Lambda(lambda x: self.get_embedding(x[:, i])),
name='LexicalEmbedding{}'.format(i))(lexical_level_input)
lexical_level_embeddings.append(lexical_level_emb)
# [n, steps, 6 * dim]
lexical_level = Concatenate(name='LexicalLevel')(lexical_level_embeddings)

分类输出

将 DMCNN 的输出与 Lexical Level 的表示拼接起来,送入线性层中,得到输出:

1
2
3
4
5
6
7
8
9
# [n, steps, 32]
dense = TimeDistributed(
Dense(32, activation='relu', kernel_regularizer=l2(self.l2_param)),
name='fc')(fusion)

# [n, steps, output]
output = TimeDistributed(
Dense(self.output, activation='softmax', kernel_regularizer=l2(self.l2_param)),
name='output')(dense)