/aosp_15_r20/external/executorch/extension/llm/modules/test/ |
H A D | test_attention.py | 33 self.max_seq_len = 128 54 max_seq_len=self.max_seq_len, 70 max_seq_len=self.max_seq_len, 84 max_seq_len=self.max_seq_len, 99 size=(self.max_seq_len, self.max_seq_len), 111 self.et_mha.setup_cache(1, dtype=torch.float32, max_seq_len=20) 112 self.tt_mha.setup_cache(1, dtype=torch.float32, max_seq_len=20) 145 self.et_mha.setup_cache(1, dtype=torch.float32, max_seq_len=100) 146 self.tt_mha.setup_cache(1, dtype=torch.float32, max_seq_len=100) 166 self.et_mha.setup_cache(1, dtype=torch.float32, max_seq_len=100) [all …]
|
/aosp_15_r20/external/executorch/extension/llm/custom_ops/ |
H A D | test_sdpa_with_kv_cache.py | 376 (self.n_batch, self.max_seq_len, self.n_heads_kv, self.head_dim) 379 (self.n_batch, self.max_seq_len, self.n_heads_kv, self.head_dim) 382 (self.max_seq_len, self.max_seq_len), 393 self.max_seq_len = 2048 408 max_seq_len, argument 419 self.max_seq_len = max_seq_len 495 max_seq_len = 2048 498 n_heads_kv, n_heads_q, head_dim, max_seq_len, seq_len, True 505 max_seq_len = 8 507 self._test_sdpa_common(n_heads_kv, n_heads_q, head_dim, max_seq_len, seq_len) [all …]
|
/aosp_15_r20/external/pytorch/test/inductor/ |
H A D | test_custom_lowering.py | 40 …"jagged_to_padded_dense(Tensor input, Tensor offsets, SymInt max_seq_len, Scalar pad_value) -> Ten… 43 def j2pd_meta(inp, offsets, max_seq_len, pad_value): argument 45 (offsets.shape[0] - 1, max_seq_len, inp.shape[1]), 50 def j2pd_cuda(inp, offsets, max_seq_len, pad_value): argument 52 (offsets.shape[0] - 1, max_seq_len, inp.shape[1]), 62 def j2pd_lowering(inp, offsets, max_seq_len, pad_value): argument 91 ranges=[offsets.get_size()[0] - 1, max_seq_len, inp.get_size()[1]], 136 def fn(inp, offsets, max_seq_len): argument 138 inp, offsets, max_seq_len, 60.0 143 max_seq_len = 4 [all …]
|
/aosp_15_r20/external/executorch/extension/llm/modules/ |
H A D | kv_cache.py | 22 max_seq_len (int): maximum sequence length model will be run with 32 max_seq_len: int, 40 max_seq_len=max_seq_len, 46 self.max_seq_len = max_seq_len 48 cache_shape = (batch_size, num_kv_heads, max_seq_len, head_dim) 50 cache_shape = (batch_size, max_seq_len, num_kv_heads, head_dim) 59 "cache_pos", torch.arange(0, self.max_seq_len), persistent=False 74 …>>> cache = KVCache(batch_size=2, max_seq_len=16, num_kv_heads=4, head_dim=32, dtype=torch.bfloat1… 108 assert (self.cache_pos[0] + seq_len) <= self.max_seq_len 139 max_seq_len=self.max_seq_len,
|
H A D | attention.py | 76 max_seq_len (int): maximum sequence length supported by the model. 104 max_seq_len: int = 4096, 133 self.max_seq_len = max_seq_len 164 self, batch_size: int, dtype: torch.dtype, max_seq_len: int 172 max_seq_len (int): maximum sequence length model will be run with. 182 max_seq_len=max_seq_len, 398 max_seq_len=child.max_seq_len,
|
/aosp_15_r20/external/executorch/examples/llm_pte_finetuning/ |
H A D | runner.py | 51 max_seq_len = cfg.tokenizer.max_seq_len 64 max_seq_len=max_seq_len, 87 if token_size > max_seq_len: 88 tokens = tokens[:, :max_seq_len] 90 tokens = F.pad(tokens, (0, max_seq_len - token_size), value=0) 92 if labels_size > max_seq_len: 93 labels = labels[:, :max_seq_len] 95 labels = F.pad(labels, (0, max_seq_len - labels_size), value=0) 115 max_seq_len=max_seq_len,
|
H A D | training_lib.py | 63 if tokenizer.max_seq_len is None: 65 "PackedDataset requires a max_seq_len to be set on the tokenizer." 67 return PackedDataset(ds, max_seq_len=tokenizer.max_seq_len, split_across_pack=False) 85 max_seq_len: int, 102 if token_size > max_seq_len: 103 tokens = tokens[:, :max_seq_len] 105 tokens = F.pad(tokens, (0, max_seq_len - token_size), value=0) 107 if labels_size > max_seq_len: 108 labels = labels[:, :max_seq_len] 110 labels = F.pad(labels, (0, max_seq_len - labels_size), value=0)
|
H A D | model_exporter.py | 54 max_seq_len = cfg.tokenizer.max_seq_len 62 if token_size > max_seq_len: 63 tokens = tokens[:, :max_seq_len] 65 tokens = F.pad(tokens, (0, max_seq_len - token_size), value=0) 67 if labels_size > max_seq_len: 68 labels = labels[:, :max_seq_len] 70 labels = F.pad(labels, (0, max_seq_len - labels_size), value=0)
|
/aosp_15_r20/external/executorch/backends/vulkan/test/op_tests/ |
H A D | sdpa_test.cpp | 137 const int max_seq_len = k_cache.size(1); in construct_attention_mask() local 220 const int max_seq_len, in test_reference_sdpa() argument 227 {batch_size, max_seq_len, num_kv_heads, head_dim}, in test_reference_sdpa() 234 for (int input_pos = start_input_pos; input_pos + sequence_len < max_seq_len; in test_reference_sdpa() 289 const int max_seq_len, in test_vulkan_sdpa() argument 294 const int init_seq_len = dynamic_seq_len ? max_seq_len : base_sequence_len; in test_vulkan_sdpa() 298 {batch_size, max_seq_len, num_kv_heads, head_dim}, in test_vulkan_sdpa() 387 input_pos + seq_len < max_seq_len; in test_vulkan_sdpa() 447 const int max_seq_len = 7; in TEST() local 456 max_seq_len, in TEST() [all …]
|
/aosp_15_r20/external/executorch/examples/models/llama/ |
H A D | llama_transformer.py | 92 max_seq_len: int = 2048 variable in ModelArgs 221 max_seq_len: int, 229 self.max_seq_len = max_seq_len 250 torch._check(start_pos < self.max_seq_len) 277 self.max_seq_len = args.max_seq_len 288 self.max_seq_len, 289 self.max_seq_len, 299 args.max_seq_len, 310 max_seq_len=self.max_seq_len, 465 self.max_seq_len = params.max_seq_len [all …]
|
H A D | export_llama_lib.py | 556 max_seq_len=args.max_seq_length, 735 (atten.max_batch_size, atten.max_seq_len, atten.head_dim) 741 atten.max_seq_len, 820 max_seq_len: int, 829 "get_max_seq_len": max_seq_len, 863 max_seq_len: int = 128, 907 max_seq_len=max_seq_len, 944 max_seq_len=model.max_seq_len, 965 model.max_seq_len,
|
H A D | main.cpp | 35 …output). Defaults to max_seq_len. If the number of input tokens + seq_len > max_seq_len, the outpu…
|
/aosp_15_r20/external/executorch/examples/models/llama/runner/ |
H A D | generation.py | 52 max_seq_len: int, 63 max_seq_len: max length of the output sequence, after which the output will be clipped. 69 self.max_seq_len = max_seq_len 87 max_seq_len: int, 107 while len(tokens) < max_seq_len: 163 max_seq_len=self.max_seq_len, 198 max_seq_len=self.max_seq_len,
|
/aosp_15_r20/external/pytorch/torch/_inductor/ |
H A D | jagged_lowerings.py | 93 max_seq_len: Union[int, sympy.Expr], 102 # check=False because there may be sequences longer than max_seq_len 103 seq_idx = ops.indirect_indexing(seq, max_seq_len, check=False) 145 max_seq_len = max_lengths[0] 149 output_size = [batch_size, max_seq_len, embedding_len] 210 max_seq_len = dense_size[1] 233 max_seq_len=max_seq_len, 239 ops.index_expr(max_seq_len, offsets_dtype), 242 0.0, # jagged sequence longer than max_seq_len
|
/aosp_15_r20/external/executorch/examples/models/llama3_2_vision/text_decoder/ |
H A D | model.py | 45 self.max_seq_len = kwargs.get( 46 "max_seq_len", 8192 72 size=(self.max_seq_len, self.max_seq_len), 76 self.input_pos = torch.arange(self.max_seq_len, dtype=torch.int64) 106 max_seq_len=self.max_seq_len, 155 decoder_max_seq_len=self.max_seq_len, 187 dim_seq_len = torch.export.Dim("token_dim", min=1, max=self.max_seq_len)
|
/aosp_15_r20/external/executorch/examples/models/llava/ |
H A D | export_llava.py | 87 max_seq_len=llava.text_model_args.max_seq_len, 144 max_seq_len=llava.text_model_args.max_seq_len, # This may not be right 175 token_dim_1 = Dim("token_dim_1", min=2, max=llava.text_model_args.max_seq_len) 298 …ExecuTorch with sdpa_with_kv_cache: {args.use_sdpa_with_kv_cache}, max_seq_len: {args.max_seq_len}" 302 max_seq_len=args.max_seq_len,
|
H A D | model.py | 41 max_seq_len: int = 768, 60 max_seq_len=max_seq_len, 261 def __init__(self, use_sdpa_with_kv_cache_op=True, max_seq_len=768): argument 263 self.max_seq_len = max_seq_len 288 self.max_seq_len, 337 dim = torch.export.Dim("token_dim", min=2, max=self.max_seq_len)
|
/aosp_15_r20/external/executorch/examples/models/llama3_2_vision/runner/ |
H A D | generation.py | 17 max_seq_len: int, 25 max_seq_len, 34 size=(max_seq_len, max_seq_len), 38 self.input_pos = torch.arange(max_seq_len) 43 max_seq_len: int, 72 while len(tokens) < max_seq_len:
|
/aosp_15_r20/external/executorch/examples/qualcomm/oss_scripts/llama2/model/ |
H A D | static_llama.py | 53 self.max_seq_len = config.max_seq_len 224 self.max_seq_len = config.max_seq_len 243 config.max_seq_len, 290 atten_mask = torch.full((self.max_batch_size, self.max_seq_len), -255.0) 299 self.max_seq_len - 1, 305 self.max_seq_len - 1, 325 "get_max_seq_len": self.max_seq_len,
|
/aosp_15_r20/external/executorch/backends/vulkan/runtime/graph/ops/impl/ |
H A D | SDPA.cpp | 154 const int64_t max_seq_len) { in add_cache_slice_view_node() argument 158 slice_sizes.at(1) = max_seq_len; in add_cache_slice_view_node() 230 const int32_t max_seq_len = graph.size_at<int32_t>(1, k_cache); in sdpa_with_kv_cache_impl() local 244 max_seq_len); in sdpa_with_kv_cache_impl() 251 max_seq_len); in sdpa_with_kv_cache_impl() 266 cache_slice_repeated_sizes.at(1) = max_seq_len; in sdpa_with_kv_cache_impl() 295 attn_weight_full_sizes.at(2) = max_seq_len; in sdpa_with_kv_cache_impl() 296 attn_weight_full_sizes.at(3) = max_seq_len; in sdpa_with_kv_cache_impl()
|
/aosp_15_r20/external/executorch/backends/apple/coreml/runtime/test/ |
H A D | export_stateful_model.py | 22 max_seq_len: int, 26 "cache", torch.zeros((max_seq_len, embedding_dim), dtype=torch.float32) 43 max_seq_len = 2 44 model = StatefulModel(embedding_dim=embedding_dim, max_seq_len=max_seq_len)
|
/aosp_15_r20/external/executorch/examples/models/llama/source_transformation/ |
H A D | attention.py | 137 self.max_seq_len = attention_mha.max_seq_len 141 self.max_seq_len, 187 self.max_seq_len, 188 self.max_seq_len,
|
/aosp_15_r20/external/pytorch/torch/testing/_internal/distributed/_tensor/ |
H A D | common_dtensor.py | 95 max_seq_len: int = 16 variable in ModelArgs 176 assert args.max_seq_len is not None 178 self.max_seq_len = args.max_seq_len 180 self.pos_embeddings = nn.Embedding(args.max_seq_len, args.dim) 193 assert seq_len <= self.max_seq_len
|
/aosp_15_r20/external/tensorflow/tensorflow/core/util/ctc/ |
H A D | ctc_loss_calculator.h | 152 auto max_seq_len = seq_len(0); in CalculateLoss() local 160 max_seq_len = std::max(seq_len(b), max_seq_len); in CalculateLoss() 270 max_seq_len * num_classes * in CalculateLoss() 272 max_seq_len * 2 * (2 * num_classes + 1) * in CalculateLoss() 274 max_seq_len * in CalculateLoss()
|
/aosp_15_r20/external/executorch/examples/qualcomm/oss_scripts/llama3_2/ |
H A D | qnn_llama3_2_runner.cpp | 46 …output). Defaults to max_seq_len. If the number of input tokens + seq_len > max_seq_len, the outpu…
|