Source code for chainer_chemistry.links.update.relgat_update

import chainer
from chainer import functions

from chainer_chemistry.links.connection.graph_linear import GraphLinear


[docs]class RelGATUpdate(chainer.Chain):
    """RelGAT submodule for update part.

    Args:
        in_channels (int): dimension of input feature vector
        out_channels (int): dimension of output feature vector
        n_heads (int): number of multi-head-attentions.
        n_edge_types (int): number of edge types.
        dropout_ratio (float): dropout ratio of the normalized attention
            coefficients
        negative_slope (float): LeakyRELU angle of the negative slope
        softmax_mode (str): take the softmax over the logits 'across' or
            'within' relation. If you would like to know the detail discussion,
            please refer Relational GAT paper.
        concat_heads (bool) : Whether to concat or average multi-head
            attentions
    """
[docs]    def __init__(self, in_channels, out_channels, n_heads=3, n_edge_types=4,
                 dropout_ratio=-1., negative_slope=0.2, softmax_mode='across',
                 concat_heads=False):
        super(RelGATUpdate, self).__init__()
        with self.init_scope():
            self.message_layer = GraphLinear(
                in_channels, out_channels * n_edge_types * n_heads)
            self.attention_layer = GraphLinear(out_channels * 2, 1)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.n_heads = n_heads
        self.n_edge_types = n_edge_types
        self.dropout_ratio = dropout_ratio
        self.softmax_mode = softmax_mode
        self.concat_heads = concat_heads
        self.negative_slope = negative_slope

    def __call__(self, h, adj):
        xp = self.xp
        # (minibatch, atom, channel)
        mb, atom, ch = h.shape
        # (minibatch, atom, EDGE_TYPE * heads * out_dim)
        h = self.message_layer(h)
        # (minibatch, atom, EDGE_TYPE, heads, out_dim)
        h = functions.reshape(h, (mb, atom, self.n_edge_types, self.n_heads,
                                  self.out_channels))
        # concat all pairs of atom
        # (minibatch, 1, atom, heads, out_dim)
        h_i = functions.reshape(h, (mb, 1, atom, self.n_edge_types,
                                    self.n_heads, self.out_channels))
        # (minibatch, atom, atom, heads, out_dim)
        h_i = functions.broadcast_to(h_i, (mb, atom, atom, self.n_edge_types,
                                           self.n_heads, self.out_channels))

        # (minibatch, atom, 1, EDGE_TYPE, heads, out_dim)
        h_j = functions.reshape(h, (mb, atom, 1, self.n_edge_types,
                                    self.n_heads, self.out_channels))
        # (minibatch, atom, atom, EDGE_TYPE, heads, out_dim)
        h_j = functions.broadcast_to(h_j, (mb, atom, atom, self.n_edge_types,
                                           self.n_heads, self.out_channels))

        # (minibatch, atom, atom, EDGE_TYPE, heads, out_dim * 2)
        e = functions.concat([h_i, h_j], axis=5)

        # (minibatch, EDGE_TYPE, heads, atom, atom, out_dim * 2)
        e = functions.transpose(e, (0, 3, 4, 1, 2, 5))
        # (minibatch * EDGE_TYPE * heads, atom * atom, out_dim * 2)
        e = functions.reshape(e, (mb * self.n_edge_types * self.n_heads,
                                  atom * atom, self.out_channels * 2))
        # (minibatch * EDGE_TYPE * heads, atom * atom, 1)
        e = self.attention_layer(e)

        # (minibatch, EDGE_TYPE, heads, atom, atom)
        e = functions.reshape(e, (mb, self.n_edge_types, self.n_heads, atom,
                                  atom))
        e = functions.leaky_relu(e, self.negative_slope)

        # (minibatch, EDGE_TYPE, atom, atom)
        if isinstance(adj, chainer.Variable):
            cond = adj.array.astype(xp.bool)
        else:
            cond = adj.astype(xp.bool)
        # (minibatch, EDGE_TYPE, 1, atom, atom)
        cond = xp.reshape(cond, (mb, self.n_edge_types, 1, atom, atom))
        # (minibatch, EDGE_TYPE, heads, atom, atom)
        cond = xp.broadcast_to(cond, e.array.shape)
        # TODO(mottodora): find better way to ignore non connected
        e = functions.where(cond, e,
                            xp.broadcast_to(xp.array(-10000), e.array.shape)
                            .astype(xp.float32))
        # In Relational Graph Attention Networks eq.(7)
        # ARGAT: take the softmax over the logits across node neighborhoods
        # irrespective of relation
        if self.softmax_mode == 'across':
            # (minibatch, heads, atom, EDGE_TYPE, atom)
            e = functions.transpose(e, (0, 2, 3, 1, 4))
            # (minibatch, heads, atom, EDGE_TYPE * atom)
            e = functions.reshape(e, (mb, self.n_heads, atom,
                                      self.n_edge_types * atom))
            # (minibatch, heads, atom, EDGE_TYPE * atom)
            alpha = functions.softmax(e, axis=3)
            if self.dropout_ratio >= 0:
                alpha = functions.dropout(alpha, ratio=self.dropout_ratio)
            # (minibatch, heads, atom, EDGE_TYPE, atom)
            alpha = functions.reshape(alpha, (mb, self.n_heads, atom,
                                              self.n_edge_types, atom))
            # (minibatch, EDGE_TYPE, heads, atom, atom)
            alpha = functions.transpose(alpha, (0, 3, 1, 2, 4))

        # In Relational Graph Attention Networks eq.(6)
        # WIRGAT: take the softmax over the logits independently for each
        # relation
        elif self.softmax_mode == 'within':
            alpha = functions.softmax(e, axis=4)
            if self.dropout_ratio >= 0:
                alpha = functions.dropout(alpha, ratio=self.dropout_ratio)
        else:
            raise ValueError("{} is invalid. Please use 'across' or 'within'"
                             .format(self.softmax_mode))

        # before: (minibatch, atom, EDGE_TYPE, heads, out_dim)
        # after: (minibatch, EDGE_TYPE, heads, atom, out_dim)
        h = functions.transpose(h, (0, 2, 3, 1, 4))
        # (minibatch, EDGE_TYPE, heads, atom, out_dim)
        h_new = functions.matmul(alpha, h)
        # (minibatch, heads, atom, out_dim)
        h_new = functions.sum(h_new, axis=1)
        if self.concat_heads:
            # (heads, minibatch, atom, out_dim)
            h_new = functions.transpose(h_new, (1, 0, 2, 3))
            # (minibatch, atom, heads * out_dim)
            h_new = functions.concat(h_new, axis=2)
        else:
            # (minibatch, atom, out_dim)
            h_new = functions.mean(h_new, axis=1)
        return h_new