classSAGEConv(nn.Module): r""" Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. 若aggregator为 ``gcn``, 则在异构图情况下,源节点和目的节点的feature size需要相等, 因为后面计算了这个:graph.dstdata['neigh'] + graph.dstdata['h'] out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. feat_drop : float Dropout rate on features, default: ``0``. aggregator_type : str 公式(1)中的聚合函数 Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). bias : bool If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. """
defforward(self, graph, feat, edge_weight=None): r""" Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. edge_weight : torch.Tensor, optional Optional tensor on the edge. If given, the convolution will weight with regard to the message. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ with graph.local_scope(): if isinstance(feat, tuple): feat_src = self.feat_drop(feat[0]) feat_dst = self.feat_drop(feat[1]) else: feat_src = feat_dst = self.feat_drop(feat) # block还没学... if graph.is_block: feat_dst = feat_src[:graph.number_of_dst_nodes()] # 不考虑边的权重 aggregate_fn = fn.copy_src('h', 'm') # 考虑边的权重 if edge_weight isnotNone: assert edge_weight.shape[0] == graph.number_of_edges() graph.edata['_edge_weight'] = edge_weight aggregate_fn = fn.u_mul_e('h', '_edge_weight', 'm')
h_self = feat_dst
# Handle the case of graphs without edges:那还传播个锤子? # 将目的节点的'neigh'特征置为0 if graph.number_of_edges() == 0: # .to(feat_dst): 转换到feat_dst的dtype和device graph.dstdata['neigh'] = torch.zeros( feat_dst.shape[0], self._in_src_feats).to(feat_dst)
GCN聚合器:由于GCN论文中的模型是transductive的,GraphSAGE给出了GCN的inductive形式,如公式 (6) 所示,并说明We call this modified mean-based aggregator convolutional since it is a rough, linear approximation of a localized spectral convolution,且其mean是除以的节点的in-degree,这是与MEAN聚合器的区别之一。区别之二在于gcn 是直接将当前节点和邻居节点的特征求和后取平均,再做线性变换;而 mean 是首先concat 当前节点的特征和邻居节点的特征,再做线性变换,实际在实现上mean采用先线性变换后相加的方式来实现,实际上用到了两个fc(fc_self和fc_neigh),所以**「gcn只经过一个全连接层,而后者是分别用到了self和neigh两个全连接层」**。
LSTM聚合器:其表达能力比 mean 聚合器要强,但是 LSTM 是非对称的,即其考虑节点的顺序性,论文作者通过将节点进行随机排列(DGL没做随机排列,而是按照边id的顺序排列的)来调整 LSTM 对无序集的支持:
# LSTM聚合器实现 def_lstm_reducer(self, nodes): """LSTM reducer NOTE(zihao): lstm reducer with default schedule (degree bucketing) is slow, we could accelerate this with degree padding in the future. """ m = nodes.mailbox['m'] # (B, L, D) batch_size = m.shape[0] # 返回一个dtpe和device与m相同的,用0填充的tensor,并将其reshape h = (m.new_zeros((1, batch_size, self._in_src_feats)), m.new_zeros((1, batch_size, self._in_src_feats))) _, (rst, _) = self.lstm(m, h) return {'neigh': rst.squeeze(0)} elif self._aggre_type == 'lstm': graph.srcdata['h'] = feat_src graph.update_all(aggregate_fn, self._lstm_reducer) h_neigh = graph.dstdata['neigh']
else: raise KeyError('Aggregator type {} not recognized.'.format(self._aggre_type)) # GraphSAGE GCN does not require fc_self. # gcn aggregator不考虑h_self. if self._aggre_type == 'gcn': rst = self.fc_neigh(h_neigh)