/aosp_15_r20/external/pytorch/test/distributed/_tensor/debug/ |
H A D | test_comm_mode.py | 52 x = funcol.reduce_scatter_tensor(x, "sum", 0, world_pg) 66 self.assertEqual(comm_counts[c10d_functional.reduce_scatter_tensor], 1) 78 x = funcol.reduce_scatter_tensor(x, "sum", 0, world_pg) 92 self.assertEqual(comm_counts[c10d_functional.reduce_scatter_tensor], 1) 114 self.assertEqual(comm_counts[c10d_functional.reduce_scatter_tensor], 0) 142 dist.reduce_scatter_tensor(inp, all_gather_out) 206 dist.reduce_scatter_tensor(all_gather_out, inp)
|
/aosp_15_r20/external/pytorch/torch/distributed/_tools/ |
H A D | fsdp2_mem_tracker.py | 68 reduce_scatter_tensor: Callable 454 dist.reduce_scatter_tensor, 493 @wraps(dist.reduce_scatter_tensor) 494 def reduce_scatter_tensor( function 512 return self._saved_collectives.reduce_scatter_tensor( 542 dist.reduce_scatter_tensor = reduce_scatter_tensor 548 dist.reduce_scatter_tensor = self._saved_collectives.reduce_scatter_tensor
|
/aosp_15_r20/external/pytorch/test/distributed/tensor/parallel/ |
H A D | test_micro_pipeline_tp.py | 19 reduce_scatter_tensor, 138 a = reduce_scatter_tensor(inp, "sum", scatter_dim=0, group=group.group_name) 139 b = reduce_scatter_tensor(inp, "avg", scatter_dim=1, group=group.group_name) 157 torch.ops._c10d_functional.reduce_scatter_tensor.default, 182 d = reduce_scatter_tensor(b, "avg", scatter_dim=0, group=group.group_name) 313 return reduce_scatter_tensor(A @ B, "avg", scatter_dim, group) 355 return reduce_scatter_tensor(C, "avg", scatter_dim, group)
|
H A D | test_tp_style.py | 79 comm_mode.get_comm_counts()[c10d_functional.reduce_scatter_tensor], 1 136 comm_mode.get_comm_counts()[c10d_functional.reduce_scatter_tensor], 1 184 comm_mode.get_comm_counts()[c10d_functional.reduce_scatter_tensor], 1
|
H A D | test_tp_examples.py | 48 c10d_functional.reduce_scatter_tensor, 132 comm_mode.get_comm_counts()[c10d_functional.reduce_scatter_tensor], 1
|
/aosp_15_r20/external/pytorch/test/distributed/_composable/fsdp/ |
H A D | test_fully_shard_mixed_precision.py | 78 orig_reduce_scatter = dist.reduce_scatter_tensor 111 dist.reduce_scatter_tensor(output, param.grad) 147 orig_reduce_scatter = dist.reduce_scatter_tensor 191 orig_reduce_scatter = dist.reduce_scatter_tensor 216 sharded_grad = funcol.reduce_scatter_tensor( 262 orig_reduce_scatter = dist.reduce_scatter_tensor
|
H A D | test_fully_shard_overlap.py | 56 orig_reduce_scatter_tensor = dist.reduce_scatter_tensor 126 dist.reduce_scatter_tensor(dummy_rs_output, dummy_rs_input)
|
H A D | test_fully_shard_frozen.py | 80 orig_reduce_scatter = dist.reduce_scatter_tensor
|
/aosp_15_r20/external/pytorch/test/distributed/fsdp/ |
H A D | test_fsdp_hybrid_shard.py | 73 orig_reduce_scatter = dist.reduce_scatter_tensor 74 dist.reduce_scatter_tensor = new_reduce_scatter 78 dist.reduce_scatter_tensor = orig_reduce_scatter 304 orig_rs = dist.reduce_scatter_tensor
|
H A D | test_fsdp_mixed_precision.py | 149 orig_reduce_scatter = dist.reduce_scatter_tensor 150 dist.reduce_scatter_tensor = new_reduce_scatter 156 dist.reduce_scatter_tensor = orig_reduce_scatter 422 orig_reduce_scatter = dist.reduce_scatter_tensor 591 orig_reduce_scatter = dist.reduce_scatter_tensor 973 orig_reduce_scatter = dist.reduce_scatter_tensor
|
H A D | test_fsdp_tp_integration.py | 395 self.assertEqual(comm_counts[funcol.reduce_scatter_tensor], 2)
|
/aosp_15_r20/external/pytorch/torch/distributed/ |
H A D | _functional_collectives.py | 251 def reduce_scatter_tensor( function 283 tensor = torch.ops._c10d_functional.reduce_scatter_tensor( 322 tensor = torch.ops._c10d_functional_autograd.reduce_scatter_tensor( 1039 return output.copy_(reduce_scatter_tensor(input, op, scatter_dim, group, tag)) 1136 reduce_scatter_tensor as legacy_reducescatter,
|
H A D | _functional_collectives_impl.py | 67 return torch.ops._c10d_functional.reduce_scatter_tensor(
|
H A D | distributed_c10d.py | 2300 elif op0 == reduce_scatter_tensor: 3766 def reduce_scatter_tensor(output, input, op=ReduceOp.SUM, group=None, async_op=False): function 3836 coll = _CollOp(reduce_scatter_tensor, input, output, op, None) 3876 return reduce_scatter_tensor(output, input, op, group, async_op) 4981 reduce_scatter_tensor,
|
/aosp_15_r20/external/pytorch/torch/csrc/distributed/c10d/ |
H A D | Functional.cpp | 247 at::Tensor reduce_scatter_tensor( in reduce_scatter_tensor() function 358 c10::DispatchKey::CompositeExplicitAutograd, ::reduce_scatter_tensor), in TORCH_LIBRARY() 473 .typed<decltype(reduce_scatter_tensor)>() in forward() 545 .typed<decltype(reduce_scatter_tensor)>() in backward()
|
/aosp_15_r20/external/pytorch/test/distributed/ |
H A D | test_c10d_functional_native.py | 18 reduce_scatter_tensor, 299 output = torch.ops._c10d_functional.reduce_scatter_tensor( 309 output = reduce_scatter_tensor( 711 rs0 = funcol.reduce_scatter_tensor(arg, "avg", 0, "0")
|
H A D | test_inductor_collectives.py | 374 ag = torch.ops.c10d_functional.reduce_scatter_tensor( 748 torch.distributed.reduce_scatter_tensor( 917 torch.distributed.reduce_scatter_tensor(out, inp, group=pg, async_op=False) 936 work = torch.distributed.reduce_scatter_tensor( 974 ar = _functional_collectives.reduce_scatter_tensor(inp, "sum", 0, "0")
|
/aosp_15_r20/external/pytorch/test/distributed/_tensor/ |
H A D | test_math_ops.py | 526 funcol.reduce_scatter_tensor: 1, 546 expected_bwd_comm = {funcol.reduce_scatter_tensor: 1} 554 funcol.reduce_scatter_tensor: 1,
|
/aosp_15_r20/external/pytorch/torch/testing/_internal/ |
H A D | common_fsdp.py | 636 orig_reduce_scatter = torch.distributed.reduce_scatter_tensor 965 orig_reduce_scatter = dist.reduce_scatter_tensor 967 dist.reduce_scatter_tensor = new_reduce_scatter_tensor 972 dist.reduce_scatter_tensor = orig_reduce_scatter
|
/aosp_15_r20/external/pytorch/torch/distributed/tensor/debug/ |
H A D | _comm_mode.py | 38 funcol_native.reduce_scatter_tensor: funcol_py.reduce_scatter_tensor,
|
/aosp_15_r20/external/pytorch/torch/distributed/algorithms/_comm_hooks/ |
H A D | default_hooks.py | 129 dist.reduce_scatter_tensor(output, grad, group=state.process_group)
|
/aosp_15_r20/external/pytorch/torch/_inductor/fx_passes/ |
H A D | micro_pipeline_tp.py | 225 c10d.reduce_scatter_tensor.default, 779 torch.ops._c10d_functional.reduce_scatter_tensor.default,
|
/aosp_15_r20/external/pytorch/torch/distributed/_symmetric_memory/ |
H A D | __init__.py | 608 res = funcol.reduce_scatter_tensor(A @ B, reduce_op, scatter_dim, group_name) 674 res = funcol.reduce_scatter_tensor(
|
/aosp_15_r20/external/pytorch/test/inductor/ |
H A D | test_snode_runtime.py | 297 rs = _c10d.reduce_scatter_tensor(
|
/aosp_15_r20/external/pytorch/torch/distributed/tensor/ |
H A D | placement_types.py | 210 output = funcol.reduce_scatter_tensor(
|