理解 github上代码:Bert-BiLSTM-CRF-pytorch
Github 相关链接: link.

neg_log_likelihood_loss = forward_score - gold_score
这部分应该是为了计算所有路径的分数(forward_score )

    def _forward_alg(self, feats, mask=None):"""Do the forward algorithm to compute the partition function (batched).Args:feats: size=(batch_size, seq_len, self.target_size+2)mask: size=(batch_size, seq_len)Returns:xxx"""batch_size = feats.size(0)seq_len = feats.size(1)tag_size = feats.size(-1)# 1. mask 转置 后 shape 为: (seq_len, batch), #    feats 原先 shape=(batch_size, seq_len, tag_size) #          先转置:    (seq_len, batch_size, tag_size)#          view:  (seq_len*batch_size, 1, tag_size)#          然后在 -2 维度复制: (seq_len*batch_size, [tag_size], tag_size)mask = mask.transpose(1, 0).contiguous()ins_num = batch_size * seq_lenfeats = feats.transpose(1, 0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)# 2. scores: LSTM所有时间步的输出 feats 先加上 转移分数scores = feats + self.transitions.view(1, tag_size, tag_size).expand(ins_num, tag_size, tag_size)scores = scores.view(seq_len, batch_size, tag_size, tag_size)seq_iter = enumerate(scores) # seq_iter: t=0 开始的LSTM所有时间步迭代输出# inivalues: t=1 开始的LSTM所有时间步迭代输出try:_, inivalues = seq_iter.__next__()except:_, inivalues = seq_iter.next()# 2. 计算 a 在 t=0 时刻的初始值partition = inivalues[:, self.START_TAG_IDX, :].clone().view(batch_size, tag_size, 1)# 3. 迭代计算 a (即partition ) 在 t=1,2,。。。更新的值for idx, cur_values in seq_iter: # fro idx = 1,2,3..., cur_values是LSTM输出+转移分数的值cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)cur_partition = log_sum_exp(cur_values, tag_size)mask_idx = mask[idx, :].view(batch_size, 1).expand(batch_size, tag_size)masked_cur_partition = cur_partition.masked_select(mask_idx.byte())if masked_cur_partition.dim() != 0:# 将mask_idx中值为1元素对应的masked_cur_partition中位置的元素复制到本partition中。# mask应该有和partition相同数目的元素。# 即 mask 部分的 partition值不再更新mask_idx = mask_idx.contiguous().view(batch_size, tag_size, 1)partition.masked_scatter_(mask_idx.byte(), masked_cur_partition)cur_values = self.transitions.view(1, tag_size, tag_size).expand(batch_size, tag_size, tag_size) + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)cur_partition = log_sum_exp(cur_values, tag_size)final_partition = cur_partition[:, self.END_TAG_IDX]return final_partition.sum(), scoresdef log_sum_exp(vec, m_size):"""结果和右式相同:torch.log(torch.sum(torch.exp(vec),1))直接计算可能会出现 exp(999)=INF 上溢问题所以 考虑 torch.max(vec, 1)这部分, 以避免 上溢问题Args:vec: size=(batch_ size, vanishing_dim, hidden_dim)m_size: hidden_dimReturns:size=(batch_size, hidden_dim)"""_, idx = torch.max(vec, 1)  # B * 1 * M ,为了防止 log(过大值max),所有值减去每列最大值max_score = torch.gather(vec, 1, idx.view(-1, 1, m_size)).view(-1, 1, m_size)  # B * Mreturn max_score.view(-1, m_size) + torch.log(torch.sum(torch.exp(vec - max_score.expand_as(vec)), 1)).view(-1, m_size)##############################################################

class CRF(nn.Module) 中 _forward_alg 过程

0. 输入的设置


START_TAG_IDX, END_TAG_IDX = -2, -1feats = torch.FloatTensor([[[ 0.1938, -0.0033, -0.0786,  0.1115],[-0.0450, -0.1575,  0.0550, -0.1546],[-0.0271, -0.0669, -0.0533, -0.1674]],[[-0.0269, -0.1714, -0.0775, -0.0791],[-0.0745, -0.2008, -0.1868,  0.2168],[ 0.0703,  0.0196,  0.0457,  0.0400]]])
mask=torch.FloatTensor([[1, 1, 0],[1, 1, 1]])
tags=torch.FloatTensor([[5, 4, 0],[5, 1, 6]])transitions = torch.Tensor([[    7,     3,  -1000,     2],[    2,     1,  -1000,     5],[    1,     3,  -1000,     2],[-1000, -1000, -1000, -1000]])
# transitions.shape: torch.Size([4,4]), 4中包含2个起止符

1. mask 和 feats 的转置及扩展维度等处理

batch_size = feats.size(0)
seq_len = feats.size(1)
tag_size = feats.size(-1)
# 1. mask 转置 后 shape 为: (seq_len, batch),
#    feats 原先 shape=(batch_size, seq_len, tag_size)
#          先转置:    (seq_len, batch_size, tag_size)
#          view:  (seq_len*batch_size, 1, tag_size)
#          然后在 -2 维度复制: (seq_len*batch_size, [tag_size], tag_size)
mask = mask.transpose(1, 0).contiguous()
ins_num = batch_size * seq_len
feats = feats.transpose(1, 0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)print ('batch_size,seq_len,tag_size:',batch_size,seq_len,tag_size)
print ('\nmask:',mask.shape,'\n',mask)
print ('\nins_num=',ins_num)
print ('\nfeats:',feats.shape,'\n',feats)
'''
batch_size,seq_len,tag_size: 2 3 4mask: torch.Size([3, 2]) tensor([[1, 1],[1, 1],[0, 1]], dtype=torch.uint8)ins_num= 6feats: torch.Size([6, 4, 4]) tensor([[[ 0.1938, -0.0033, -0.0786,  0.1115],[ 0.1938, -0.0033, -0.0786,  0.1115],[ 0.1938, -0.0033, -0.0786,  0.1115],[ 0.1938, -0.0033, -0.0786,  0.1115]],[[-0.0269, -0.1714, -0.0775, -0.0791],[-0.0269, -0.1714, -0.0775, -0.0791],[-0.0269, -0.1714, -0.0775, -0.0791],[-0.0269, -0.1714, -0.0775, -0.0791]],[[-0.0450, -0.1575,  0.0550, -0.1546],[-0.0450, -0.1575,  0.0550, -0.1546],[-0.0450, -0.1575,  0.0550, -0.1546],[-0.0450, -0.1575,  0.0550, -0.1546]],[[-0.0745, -0.2008, -0.1868,  0.2168],[-0.0745, -0.2008, -0.1868,  0.2168],[-0.0745, -0.2008, -0.1868,  0.2168],[-0.0745, -0.2008, -0.1868,  0.2168]],[[-0.0271, -0.0669, -0.0533, -0.1674],[-0.0271, -0.0669, -0.0533, -0.1674],[-0.0271, -0.0669, -0.0533, -0.1674],[-0.0271, -0.0669, -0.0533, -0.1674]],[[ 0.0703,  0.0196,  0.0457,  0.0400],[ 0.0703,  0.0196,  0.0457,  0.0400],[ 0.0703,  0.0196,  0.0457,  0.0400],[ 0.0703,  0.0196,  0.0457,  0.0400]]])
'''

2. 计算 scores: LSTM所有时间步的输出 feats 先加上 转移分数

# transitions 复制 seqlen * batch 份,得到 shape= [6, 4, 4],然后和 feats 逐点相加
tr_ = transitions.view(1, tag_size, tag_size).expand(ins_num, tag_size, tag_size)
print ('tr_',tr_.shape,'\n',tr_)scores = feats + tr_
print ('scores',scores.shape,'\n',scores)
'''
tr_:  torch.Size([6, 4, 4]) tensor([[[    7.,     3., -1000.,     2.],[    2.,     1., -1000.,     5.],[    1.,     3., -1000.,     2.],[-1000., -1000., -1000., -1000.]],............[[    7.,     3., -1000.,     2.],[    2.,     1., -1000.,     5.],[    1.,     3., -1000.,     2.],[-1000., -1000., -1000., -1000.]]])scores: torch.Size([6, 4, 4]) tensor([[[ 7.1938,  2.9967, -1000.1,  2.1115],[ 2.1938,   0.9967, -1000.1,  5.1115],[ 1.1938,   2.9967, -1000.1,  2.1115],[-999.81,    -1000, -1000.1, -999.89]],[[ 6.9731e+00,  2.8286e+00, -1.0001e+03,  1.9209e+00],[ 1.9731e+00,  8.2860e-01, -1.0001e+03,  4.9209e+00],[ 9.7310e-01,  2.8286e+00, -1.0001e+03,  1.9209e+00],[-1.0000e+03, -1.0002e+03, -1.0001e+03, -1.0001e+03]],[[ 6.9550e+00,  2.8425e+00, -9.9995e+02,  1.8454e+00],[ 1.9550e+00,  8.4250e-01, -9.9995e+02,  4.8454e+00],[ 9.5500e-01,  2.8425e+00, -9.9995e+02,  1.8454e+00],[-1.0000e+03, -1.0002e+03, -9.9995e+02, -1.0002e+03]],[[ 6.9255e+00,  2.7992e+00, -1.0002e+03,  2.2168e+00],[ 1.9255e+00,  7.9920e-01, -1.0002e+03,  5.2168e+00],[ 9.2550e-01,  2.7992e+00, -1.0002e+03,  2.2168e+00],[-1.0001e+03, -1.0002e+03, -1.0002e+03, -9.9978e+02]],[[ 6.9729e+00,  2.9331e+00, -1.0001e+03,  1.8326e+00],[ 1.9729e+00,  9.3310e-01, -1.0001e+03,  4.8326e+00],[ 9.7290e-01,  2.9331e+00, -1.0001e+03,  1.8326e+00],[-1.0000e+03, -1.0001e+03, -1.0001e+03, -1.0002e+03]],[[ 7.0703e+00,  3.0196e+00, -9.9995e+02,  2.0400e+00],[ 2.0703e+00,  1.0196e+00, -9.9995e+02,  5.0400e+00],[ 1.0703e+00,  3.0196e+00, -9.9995e+02,  2.0400e+00],[-9.9993e+02, -9.9998e+02, -9.9995e+02, -9.9996e+02]]])
'''scores = scores.view(seq_len, batch_size, tag_size, tag_size)
print ('scores reshape:',scores.shape,'\n',scores)'''
scores reshape: torch.Size([3, 2, 4, 4]) tensor([[[[ 7.1938e+00,  2.9967e+00, -1.0001e+03,  2.1115e+00],[ 2.1938e+00,  9.9670e-01, -1.0001e+03,  5.1115e+00],[ 1.1938e+00,  2.9967e+00, -1.0001e+03,  2.1115e+00],[-9.9981e+02, -1.0000e+03, -1.0001e+03, -9.9989e+02]],[[ 6.9731e+00,  2.8286e+00, -1.0001e+03,  1.9209e+00],[ 1.9731e+00,  8.2860e-01, -1.0001e+03,  4.9209e+00],[ 9.7310e-01,  2.8286e+00, -1.0001e+03,  1.9209e+00],[-1.0000e+03, -1.0002e+03, -1.0001e+03, -1.0001e+03]]],[[[ 6.9550e+00,  2.8425e+00, -9.9995e+02,  1.8454e+00],[ 1.9550e+00,  8.4250e-01, -9.9995e+02,  4.8454e+00],[ 9.5500e-01,  2.8425e+00, -9.9995e+02,  1.8454e+00],[-1.0000e+03, -1.0002e+03, -9.9995e+02, -1.0002e+03]],[[ 6.9255e+00,  2.7992e+00, -1.0002e+03,  2.2168e+00],[ 1.9255e+00,  7.9920e-01, -1.0002e+03,  5.2168e+00],[ 9.2550e-01,  2.7992e+00, -1.0002e+03,  2.2168e+00],[-1.0001e+03, -1.0002e+03, -1.0002e+03, -9.9978e+02]]],[[[ 6.9729e+00,  2.9331e+00, -1.0001e+03,  1.8326e+00],[ 1.9729e+00,  9.3310e-01, -1.0001e+03,  4.8326e+00],[ 9.7290e-01,  2.9331e+00, -1.0001e+03,  1.8326e+00],[-1.0000e+03, -1.0001e+03, -1.0001e+03, -1.0002e+03]],[[ 7.0703e+00,  3.0196e+00, -9.9995e+02,  2.0400e+00],[ 2.0703e+00,  1.0196e+00, -9.9995e+02,  5.0400e+00],[ 1.0703e+00,  3.0196e+00, -9.9995e+02,  2.0400e+00],[-9.9993e+02, -9.9998e+02, -9.9995e+02, -9.9996e+02]]]])
'''

3. 设置时间步迭代器 seq_iter

4. inivalues:

是batch=2个样本,经过LSTM后第一个时间步的输出 (2, 4),复制至(2, 4, 4)之后和转移矩阵相加的结果

seq_iter = enumerate(scores)
# seq_iter: t=0 开始的LSTM所有时间步迭代输出
# 每一次迭代输出 (2, 4, 4) 的scoretry:_, inivalues = seq_iter.__next__()
except:_, inivalues = seq_iter.next()
print('inivalues',inivalues.shape,'\n',inivalues)
#inivalues = scores[0],即seq_iter 第一次迭代给出的值
'''
inivalues torch.Size([2, 4, 4]) tensor([[[ 7.1938e+00,  2.9967e+00, -1.0001e+03,  2.1115e+00],[ 2.1938e+00,  9.9670e-01, -1.0001e+03,  5.1115e+00],[ 1.1938e+00,  2.9967e+00, -1.0001e+03,  2.1115e+00],[-9.9981e+02, -1.0000e+03, -1.0001e+03, -9.9989e+02]],[[ 6.9731e+00,  2.8286e+00, -1.0001e+03,  1.9209e+00],[ 1.9731e+00,  8.2860e-01, -1.0001e+03,  4.9209e+00],[ 9.7310e-01,  2.8286e+00, -1.0001e+03,  1.9209e+00],[-1.0000e+03, -1.0002e+03, -1.0001e+03, -1.0001e+03]]])
'''

5. 计算 a 在 t=0 时刻的初始值

#t=0 时 partition(分数)为START_TAG_IDX行对应的值 ??
partition = inivalues[:, START_TAG_IDX, :].clone().view(batch_size, tag_size, 1)
print ('partition',partition.shape,'\n',partition)
'''
partition torch.Size([2, 4, 1]) tensor([[[ 1.1938e+00],[ 2.9967e+00],[-1.0001e+03],[ 2.1115e+00]],[[ 9.7310e-01],[ 2.8286e+00],[-1.0001e+03],[ 1.9209e+00]]])
'''

3. 迭代计算 a (即partition ) 在 t=1,2,。。。更新 partition 的 值

for idx, cur_values in seq_iter: # fro idx = 1,2,3..., cur_values是LSTM输出+转移分数的值print('\n\n',idx,cur_values.shape,'\n', cur_values)pa_ = partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)print('\npa_', pa_.shape,'\n',pa_)cur_values = cur_values + pa_print('相加后的 cur_values:',cur_values.shape,'\n', cur_values)cur_partition = log_sum_exp(cur_values, tag_size)print ('cur_partition 是 log sum exp 之后的结果:\n',cur_partition)mask_idx = mask[idx, :].view(batch_size, 1).expand(batch_size, tag_size)print('mask_idx',mask_idx)masked_cur_partition = cur_partition.masked_select(mask_idx.byte())print ('masked_cur_partition:',masked_cur_partition)if masked_cur_partition.dim() != 0:# 将mask_idx中值为1元素对应的masked_cur_partition中位置的元素复制到本partition中。# mask应该有和partition相同数目的元素。# 即 mask 部分的 partition值不再更新mask_idx = mask_idx.contiguous().view(batch_size, tag_size, 1)print ('mask_idx:',mask_idx)partition.masked_scatter_(mask_idx.byte(), masked_cur_partition)print ('partition',partition)
'''
idx = 1 torch.Size([2, 4, 4]) tensor([[[ 6.9550e+00,  2.8425e+00, -9.9995e+02,  1.8454e+00],[ 1.9550e+00,  8.4250e-01, -9.9995e+02,  4.8454e+00],[ 9.5500e-01,  2.8425e+00, -9.9995e+02,  1.8454e+00],[-1.0000e+03, -1.0002e+03, -9.9995e+02, -1.0002e+03]],[[ 6.9255e+00,  2.7992e+00, -1.0002e+03,  2.2168e+00],[ 1.9255e+00,  7.9920e-01, -1.0002e+03,  5.2168e+00],[ 9.2550e-01,  2.7992e+00, -1.0002e+03,  2.2168e+00],[-1.0001e+03, -1.0002e+03, -1.0002e+03, -9.9978e+02]]])pa_ torch.Size([2, 4, 4]) tensor([[[ 1.1938e+00,  1.1938e+00,  1.1938e+00,  1.1938e+00],[ 2.9967e+00,  2.9967e+00,  2.9967e+00,  2.9967e+00],[-1.0001e+03, -1.0001e+03, -1.0001e+03, -1.0001e+03],[ 2.1115e+00,  2.1115e+00,  2.1115e+00,  2.1115e+00]],[[ 9.7310e-01,  9.7310e-01,  9.7310e-01,  9.7310e-01],[ 2.8286e+00,  2.8286e+00,  2.8286e+00,  2.8286e+00],[-1.0001e+03, -1.0001e+03, -1.0001e+03, -1.0001e+03],[ 1.9209e+00,  1.9209e+00,  1.9209e+00,  1.9209e+00]]])
相加后的 cur_values: torch.Size([2, 4, 4]) tensor([[[    8.1488,     4.0363,  -998.7512,     3.0392],[    4.9517,     3.8392,  -996.9483,     7.8421],[ -999.1236,  -997.2361, -2000.0237,  -998.2332],[ -997.9335,  -998.0460,  -997.8335,  -998.0431]],[[    7.8986,     3.7723,  -999.2137,     3.1899],[    4.7541,     3.6278,  -997.3582,     8.0454],[ -999.1520,  -997.2783, -2000.2644,  -997.8607],[ -998.1536,  -998.2799,  -998.2659,  -997.8623]]])cur_partition 是 log sum exp 之后的结果:tensor([[   8.1889,    4.6357, -996.4925,    7.8503],[   7.9408,    4.3958, -996.9136,    8.0532]])mask_idx tensor([[1, 1, 1, 1],[1, 1, 1, 1]], dtype=torch.uint8)masked_cur_partition:
tensor([   8.1889,    4.6357, -996.4925,    7.8503,    7.9408,    4.3958,-996.9136,    8.0532])mask_idx: tensor([[[1],[1],[1],[1]],[[1],[1],[1],[1]]], dtype=torch.uint8)partition tensor([[[   8.1889],[   4.6357],[-996.4925],[   7.8503]],[[   7.9408],[   4.3958],[-996.9136],[   8.0532]]])#############################################################idx = 2, torch.Size([2, 4, 4]) tensor([[[ 6.9729e+00,  2.9331e+00, -1.0001e+03,  1.8326e+00],[ 1.9729e+00,  9.3310e-01, -1.0001e+03,  4.8326e+00],[ 9.7290e-01,  2.9331e+00, -1.0001e+03,  1.8326e+00],[-1.0000e+03, -1.0001e+03, -1.0001e+03, -1.0002e+03]],[[ 7.0703e+00,  3.0196e+00, -9.9995e+02,  2.0400e+00],[ 2.0703e+00,  1.0196e+00, -9.9995e+02,  5.0400e+00],[ 1.0703e+00,  3.0196e+00, -9.9995e+02,  2.0400e+00],[-9.9993e+02, -9.9998e+02, -9.9995e+02, -9.9996e+02]]])pa_ torch.Size([2, 4, 4]) tensor([[[   8.1889,    8.1889,    8.1889,    8.1889],[   4.6357,    4.6357,    4.6357,    4.6357],[-996.4925, -996.4925, -996.4925, -996.4925],[   7.8503,    7.8503,    7.8503,    7.8503]],[[   7.9408,    7.9408,    7.9408,    7.9408],[   4.3958,    4.3958,    4.3958,    4.3958],[-996.9136, -996.9136, -996.9136, -996.9136],[   8.0532,    8.0532,    8.0532,    8.0532]]])
相加后的 cur_values: torch.Size([2, 4, 4]) tensor([[[   15.1618,    11.1220,  -991.8644,    10.0215],[    6.6086,     5.5688,  -995.4175,     9.4683],[ -995.5196,  -993.5594, -1996.5458,  -994.6599],[ -992.1768,  -992.2166,  -992.2030,  -992.3171]],[[   15.0111,    10.9604,  -992.0135,     9.9808],[    6.4661,     5.4154,  -995.5585,     9.4358],[ -995.8433,  -993.8940, -1996.8679,  -994.8737],[ -991.8765,  -991.9272,  -991.9011,  -991.9069]]])cur_partition 是 log sum exp 之后的结果:tensor([[  15.1620,   11.1258, -991.3098,   10.4758],[  15.0113,   10.9643, -991.2490,   10.4381]])mask_idx tensor([[0, 0, 0, 0],[1, 1, 1, 1]], dtype=torch.uint8)
masked_cur_partition: tensor([  15.0113,   10.9643, -991.2490,   10.4381])
mask_idx: tensor([[[0],[0],[0],[0]],[[1],[1],[1],[1]]], dtype=torch.uint8)
partition tensor([[[   8.1889],[   4.6357],[-996.4925],[   7.8503]],[[  15.0113],[  10.9643],[-991.2490],[  10.4381]]])
'''

4. 计算 final_partition

v1 = transitions.view(1, tag_size, tag_size).expand(batch_size, tag_size, tag_size)
print ('\nv1:',v1)
v2 = partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
print ('\nv2:',v2)
cur_values = v1 + v2
print('\ncur_values:',cur_values)cur_partition = log_sum_exp(cur_values, tag_size)
print ('\ncur_partition:',cur_partition)final_partition = cur_partition[:, END_TAG_IDX]
print ('\nfinal_partition:',final_partition)return1 = final_partition.sum()
return2 = scores
print (return1,'\n',return2)'''
v1: tensor([[[    7.,     3., -1000.,     2.],[    2.,     1., -1000.,     5.],[    1.,     3., -1000.,     2.],[-1000., -1000., -1000., -1000.]],[[    7.,     3., -1000.,     2.],[    2.,     1., -1000.,     5.],[    1.,     3., -1000.,     2.],[-1000., -1000., -1000., -1000.]]])v2: tensor([[[   8.1889,    8.1889,    8.1889,    8.1889],[   4.6357,    4.6357,    4.6357,    4.6357],[-996.4925, -996.4925, -996.4925, -996.4925],[   7.8503,    7.8503,    7.8503,    7.8503]],[[  15.0113,   15.0113,   15.0113,   15.0113],[  10.9643,   10.9643,   10.9643,   10.9643],[-991.2490, -991.2490, -991.2490, -991.2490],[  10.4381,   10.4381,   10.4381,   10.4381]]])cur_values: tensor([[[   15.1889,    11.1889,  -991.8112,    10.1889],[    6.6357,     5.6357,  -995.3643,     9.6357],[ -995.4925,  -993.4925, -1996.4924,  -994.4925],[ -992.1497,  -992.1497,  -992.1497,  -992.1497]],[[   22.0113,    18.0113,  -984.9887,    17.0113],[   12.9643,    11.9643,  -989.0357,    15.9643],[ -990.2490,  -988.2490, -1991.2490,  -989.2490],[ -989.5619,  -989.5619,  -989.5619,  -989.5619]]])cur_partition: tensor([[  15.1891,   11.1927, -991.2565,   10.6432],[  22.0114,   18.0136, -984.9613,   17.3121]])final_partition: tensor([10.6432, 17.3121])return1: tensor(27.9553) return2: torch.Size([3, 2, 4, 4])tensor([[[[ 7.1938e+00,  2.9967e+00, -1.0001e+03,  2.1115e+00],[ 2.1938e+00,  9.9670e-01, -1.0001e+03,  5.1115e+00],[ 1.1938e+00,  2.9967e+00, -1.0001e+03,  2.1115e+00],[-9.9981e+02, -1.0000e+03, -1.0001e+03, -9.9989e+02]],[[ 6.9731e+00,  2.8286e+00, -1.0001e+03,  1.9209e+00],[ 1.9731e+00,  8.2860e-01, -1.0001e+03,  4.9209e+00],[ 9.7310e-01,  2.8286e+00, -1.0001e+03,  1.9209e+00],[-1.0000e+03, -1.0002e+03, -1.0001e+03, -1.0001e+03]]],[[[ 6.9550e+00,  2.8425e+00, -9.9995e+02,  1.8454e+00],[ 1.9550e+00,  8.4250e-01, -9.9995e+02,  4.8454e+00],[ 9.5500e-01,  2.8425e+00, -9.9995e+02,  1.8454e+00],[-1.0000e+03, -1.0002e+03, -9.9995e+02, -1.0002e+03]],[[ 6.9255e+00,  2.7992e+00, -1.0002e+03,  2.2168e+00],[ 1.9255e+00,  7.9920e-01, -1.0002e+03,  5.2168e+00],[ 9.2550e-01,  2.7992e+00, -1.0002e+03,  2.2168e+00],[-1.0001e+03, -1.0002e+03, -1.0002e+03, -9.9978e+02]]],[[[ 6.9729e+00,  2.9331e+00, -1.0001e+03,  1.8326e+00],[ 1.9729e+00,  9.3310e-01, -1.0001e+03,  4.8326e+00],[ 9.7290e-01,  2.9331e+00, -1.0001e+03,  1.8326e+00],[-1.0000e+03, -1.0001e+03, -1.0001e+03, -1.0002e+03]],[[ 7.0703e+00,  3.0196e+00, -9.9995e+02,  2.0400e+00],[ 2.0703e+00,  1.0196e+00, -9.9995e+02,  5.0400e+00],[ 1.0703e+00,  3.0196e+00, -9.9995e+02,  2.0400e+00],[-9.9993e+02, -9.9998e+02, -9.9995e+02, -9.9996e+02]]]])
'''

Bert-BiLSTM-CRF pytorch 代码解析-1:def _forward_alg(self, feats, mask=None)相关推荐

  1. Bert-BiLSTM-CRF pytorch 代码解析-3:def _viterbi_decode

    理解 github上代码:Bert-BiLSTM-CRF-pytorch Github 相关链接: link. 这部分用于解码阶段 def _viterbi_decode(self, feats, m ...

  2. Bert+BiLSTM+CRF实体抽取

    文章目录 一.环境 二.预训练词向量 三.模型 1.BiLSTM - 不使用预训练字向量 - 使用预训练字向量 2.CRF 3.BiLSTM + CRF - 不使用预训练词向量 - 使用预训练词向量 ...

  3. 基于BERT+BiLSTM+CRF的中文景点命名实体识别

    赵平, 孙连英, 万莹, 葛娜. 基于BERT+BiLSTM+CRF的中文景点命名实体识别. 计算机系统应用, 2020, 29(6): 169-174.http://www.c-s-a.org.cn ...

  4. bert+crf可以做NER,那么为什么还有bert+bi-lstm+crf ?

    我在自己人工标注的一份特定领域的数据集上跑过,加上bert确实会比只用固定的词向量要好一些,即使只用BERT加一个softmax层都比不用bert的bilstm+crf强.而bert+bilstm+c ...

  5. pytorch代码解析:loss = y_hat - y.view(y_hat.size())

    pytorch代码解析:pytorch中loss = y_hat - y.view(y_hat.size()) import torchy_hat = torch.tensor([[-0.0044], ...

  6. Bert模型介绍及代码解析(pytorch)

    Bert(预训练模型) 动机 基于微调的NLP模型 预训练的模型抽取了足够多的信息 新的任务只需要增加一个简单的输出层 注:bert相当于只有编码器的transformer 基于transformer ...

  7. mapbox 修改初始位置_一行代码教你如何随心所欲初始化Bert参数(附Pytorch代码详细解读)...

    微信公众号:NLP从入门到放弃 微信文章在这里(排版更漂亮,但是内置链接不太行,看大家喜欢哪个点哪个看吧): 一行代码带你随心所欲重新初始化bert的参数(附Pytorch代码详细解读)​mp.wei ...

  8. 自然语言处理(三):传统RNN(NvsN,Nvs1,1vsN,NvsM)pytorch代码解析

    文章目录 1.预备知识:深度神经网络(DNN) 2.RNN出现的意义与基本结构 3.根据输入和输出数量的网络结构分类 3.1 N vs N(输入和输出序列等长) 3.2 N vs 1(多输入单输出) ...

  9. ResNet论文笔记及Pytorch代码解析

    注:个人学习记录 感谢B站up主"同济子豪兄"的精彩讲解,参考视频的记录 [精读AI论文]ResNet深度残差网络_哔哩哔哩_bilibili 算法的意义(大概介绍) CV史上的技 ...

最新文章

  1. 找Java培训机构有哪些评判标准
  2. java.lang.ClassNotFoundException: org.springframework.web.context.ContextLoaderL
  3. 64位windows驱动使用asm
  4. 华为云部署html网页,手把手教你如何在华为云服务器上部署一个自己的弹幕网站!...
  5. 科大星云诗社动态20210417
  6. Ant在Java项目中的使用(一眼就看会)
  7. git使用.ignore忽略工程中的文件变动
  8. python pandas csv 追加 空行,python – Pandas:read_csv在空行后忽略行
  9. python 删除尾部0_python之List常见操作
  10. vue项目开发中使用插件流程
  11. 大型网站技术架构的原理与分析
  12. 谷歌在招什么样的人?
  13. macOS 开发 - Command Line Tool 命令行工具
  14. 等价类划分法设计用例(超详细)
  15. SQL计算个人所得税函数
  16. gwt 测试_GWT应用程序的单元和集成测试
  17. hardfault常见原因_应对STM32 Cortex-M3 Hard Fault异常
  18. matlab中eval函数保存图片,Matlab中eval函数的使用方法。
  19. Android 6.0系统读写文件问题( Permission denied)
  20. 仿iphone顶部状态栏_无需第三方APP,苹果iPhone手机屏幕录制的方法

热门文章

  1. (十一)Canny 边缘检测算法
  2. 投资人读一份BP只用3分44秒,你还敢不认真准备吗?
  3. Tensorflow的Win10、CPU版本安装
  4. 记录每日待办事项的APP软件
  5. [Unity3D]Unity3D游戏开发之从Unity3D到Eclipse
  6. C++ 使用zlib开源库的minizip解压缩文件及文件夹
  7. vue——Nprogress进度条功能实现
  8. 位运算概览与奇巧淫技
  9. 手机界面操作-隐藏url地址栏
  10. 学习python中的pandas教程