/aosp_15_r20/external/pytorch/torch/distributed/ |
H A D | _functional_collectives.py | 447 input_split_sizes: Optional[List[int]], 470 if input_split_sizes is not None: 472 isinstance(size, (int, torch.SymInt)) for size in input_split_sizes 473 ), input_split_sizes 476 if output_split_sizes is None or input_split_sizes is None: 477 assert output_split_sizes is None and input_split_sizes is None, ( 482 input_split_sizes = output_split_sizes 486 input_split_sizes, 495 input_split_sizes: Optional[List[int]], 506 if input_split_sizes is not None: [all …]
|
H A D | _functional_collectives_impl.py | 94 input_split_sizes: Optional[List[int]], 99 if output_split_sizes is None or input_split_sizes is None: 100 assert output_split_sizes is None and input_split_sizes is None, ( 105 input_split_sizes = output_split_sizes 111 input_split_sizes,
|
H A D | distributed_c10d.py | 3884 input_split_sizes=None, argument 3993 input_split_sizes = [] if input_split_sizes is None else input_split_sizes 3997 output, input, output_split_sizes, input_split_sizes, opts
|
/aosp_15_r20/external/pytorch/torch/distributed/_shard/sharded_tensor/ |
H A D | reshard.py | 123 input_split_sizes = [0] * world_size 126 input_split_sizes[new_rank] = local_shard.size(reshard_dim) 143 input_split_sizes=input_split_sizes, 194 input_split_sizes = [] 196 input_split_sizes.append(metadata.shard_sizes[reshard_dim]) 222 output_tensor_size[reshard_dim] = input_split_sizes[current_rank] 233 input_tensor_tuple = torch.split(local_tensor, input_split_sizes, dim=reshard_dim)
|
/aosp_15_r20/external/pytorch/torch/distributed/nn/ |
H A D | functional.py | 178 input_split_sizes=None, argument 201 group, output, output_split_sizes, input_split_sizes, input 411 def forward(ctx, group, output, output_split_sizes, input_split_sizes, input): argument 414 ctx.output_split_sizes = input_split_sizes 415 ctx.input_split_sizes = output_split_sizes 420 input_split_sizes=input_split_sizes, 435 ctx.input_split_sizes,
|
/aosp_15_r20/external/pytorch/torch/csrc/distributed/c10d/ |
H A D | Functional.cpp | 260 std::vector<int64_t> input_split_sizes, in all_to_all_single() argument 274 input_split_sizes); in all_to_all_single() 405 std::vector<int64_t> input_split_sizes, in forward() argument 409 ctx->saved_data["output_split_sizes"] = input_split_sizes; in forward() 416 .call(input, output_split_sizes, input_split_sizes, group_name); in forward() 424 const std::vector<int64_t>& input_split_sizes = in backward() local 435 .call(grad_out, output_split_sizes, input_split_sizes, group_name); in backward() 451 const std::vector<int64_t>& input_split_sizes, in all_to_all_single_autograd() argument 454 input, output_split_sizes, input_split_sizes, group_name); in all_to_all_single_autograd()
|
H A D | Ops.cpp | 415 std::vector<int64_t> input_split_sizes, \ 422 input_split_sizes, \
|
/aosp_15_r20/external/pytorch/test/distributed/ |
H A D | test_c10d_functional_native.py | 355 input_split_sizes = send_sz_matrix[self.rank].tolist() 357 input = torch.full((sum(input_split_sizes),), float(self.rank)).cuda() 362 input_split_sizes, 376 input, output_split_sizes, input_split_sizes, "default" 781 input_split_sizes: torch.Tensor, 786 _tolist_with_constrain_as_size(input_split_sizes), 794 input_split_sizes = send_sz_matrix[self.rank] 796 input = torch.full((input_split_sizes.sum().item(),), float(self.rank)).cuda() 805 compiled, input, output_split_sizes, input_split_sizes
|
H A D | test_functional_api.py | 525 x, output_split_sizes=split_sizes, input_split_sizes=split_sizes, group=mesh 543 x, output_split_sizes=split_sizes, input_split_sizes=split_sizes, group=mesh 559 x, output_split_sizes=None, input_split_sizes=None, group=mesh
|
H A D | test_c10d_spawn.py | 242 y, x, output_split_sizes=split_sizes, input_split_sizes=split_sizes
|
H A D | test_inductor_collectives.py | 409 input_split_sizes = _tolist_with_constrain_as_size(input_split_sizes_tensor) 416 input_split_sizes,
|
/aosp_15_r20/external/pytorch/torch/testing/_internal/distributed/ |
H A D | multi_threaded_pg.py | 89 _, input_buffer, _, input_split_sizes = data[src_rank] 90 … input_indexes = self._size_cumsum(input_buffer.size(0), input_split_sizes, world_size) 322 input_split_sizes: Optional[List[int]], 326 … res = coll.join(self._rank, (output_buffer, input_buffer, output_split_sizes, input_split_sizes))
|
H A D | distributed_test.py | 2487 input_split_sizes = [] 2489 input_split_sizes.append(src + 1) 2490 start_len = sum(input_split_sizes[:rank]) 2491 end_len = start_len + input_split_sizes[rank] 2492 sum_len = sum(input_split_sizes) 2501 input_split_sizes[rank], sum_len, sum_len, dtype=torch.float 2509 list(torch.split(tensor, input_split_sizes)), 2519 input_split_sizes[rank], sum_len, sum_len, dtype=torch.float
|
/aosp_15_r20/external/pytorch/torch/_C/ |
H A D | _distributed_c10d.pyi | 463 input_split_sizes: list[int], 472 input_split_sizes: list[int],
|
/aosp_15_r20/external/pytorch/torch/_inductor/ |
H A D | lowering.py | 6412 def _all_to_all_single(inp, output_split_sizes, input_split_sizes, group_name): argument 6418 input_split_sizes,
|