Module poptorch_experimental_addons.collectives
Primitives for collective communication across IPU clusters.
Expand source code
# Copyright (c) 2023 Graphcore Ltd. All rights reserved.
from ._impl.collectives import * # NOQA:F401,F403
from ._impl.collectives import __all__, __doc__ # NOQA:F401
Functions
def all_gather_cross_replica(x: torch.Tensor, replication_factor: int) ‑> Any
-
All-gather across IPU program replicas.
Gathers and stacks tensors occupying the same memory location across all IPUs
x – shape () returns – shape (replication_factor, )
Expand source code
def all_gather_cross_replica(x: torch.Tensor, replication_factor: int) -> Any: """ All-gather across IPU program replicas. Gathers and stacks tensors occupying the same memory location across all IPUs x -- shape (*) returns -- shape (replication_factor, *) """ x = all_gather_cross_replica_identical_grads_in(x, replication_factor) x = all_reduce_cross_replica_sum(x, replication_factor, insert_in_grad_graph=True) return x
def all_gather_cross_replica_identical_grads_in(x: torch.Tensor, replication_factor: int) ‑> Any
-
All-gather across IPU program replicas.
Gathers and stacks tensors occupying the same memory location across all IPUs
Gradient graph generated assumes gradient inputs are identical
x – shape () returns – shape (replication_factor, )
Expand source code
def all_gather_cross_replica_identical_grads_in( x: torch.Tensor, replication_factor: int ) -> Any: """ All-gather across IPU program replicas. Gathers and stacks tensors occupying the same memory location across all IPUs Gradient graph generated assumes gradient inputs are identical x -- shape (*) returns -- shape (replication_factor, *) """ x = _no_op_reshape(x) # ensures grad of ReplicatedAllGather is reshaped out = poptorch.custom_op( [x], name="ReplicatedAllGather", domain="ai.graphcore", domain_version=1, example_outputs=[ torch.zeros( dtype=x.dtype, size=(replication_factor, *x.shape), device=x.device ), ], )[0] out = out.reshape(replication_factor, *x.shape) return out
def all_reduce_cross_replica_sum(x: torch.Tensor, replication_factor: int, insert_in_grad_graph: bool = False) ‑> Any
-
All-reduce across IPU program replicas
Sums tensors occupying the same memory location across IPUs, resulting in replicated tensors.
insert_in_grad_graph is a boolean argument that inserts the all_reduce in the gradient graph (backward pass) rather than the forward graph.
x – shape () returns – shape ()
Expand source code
def all_reduce_cross_replica_sum( x: torch.Tensor, replication_factor: int, insert_in_grad_graph: bool = False ) -> Any: """ All-reduce across IPU program replicas Sums tensors occupying the same memory location across IPUs, resulting in replicated tensors. insert_in_grad_graph is a boolean argument that inserts the all_reduce in the gradient graph (backward pass) rather than the forward graph. x -- shape (*) returns -- shape (*) """ rg_info = [replication_factor, 1, replication_factor] out = poptorch.custom_op( [x], name="ReplicatedAllReduceTP", domain="ai.graphcore", domain_version=1, example_outputs=[x], attributes={ "op": "sum", "__collectiveReplicaGrouping": rg_info, "backwards": insert_in_grad_graph, }, )[0] return out
def all_to_all_single_cross_replica(x: torch.Tensor, replication_factor: int) ‑> Any
-
All-to-all across IPU program replicas
Splits input tensor over leading axis and scatters to IPU according to position.
Leading axis must be divisible by number of replicas.
Does not support uneven splits.
Also see docs for
torch.distributed.all_to_all_single
for similarx – shape () returns – shape ()
Expand source code
def all_to_all_single_cross_replica(x: torch.Tensor, replication_factor: int) -> Any: """ All-to-all across IPU program replicas Splits input tensor over leading axis and scatters to IPU according to position. Leading axis must be divisible by number of replicas. Does not support uneven splits. Also see docs for `torch.distributed.all_to_all_single` for similar x -- shape (*) returns -- shape (*) """ out = poptorch.custom_op( [x], name="ReplicatedAllToAll", domain="ai.graphcore", domain_version=1, example_outputs=[x], )[0] return out