Searched refs:kNumThreadsPerLine (Results 1 – 1 of 1) sorted by relevance
2509 static constexpr int kNumThreadsPerLine = kNumThreads / kBlockSizeI; in computeDelta() local2512 int16_t laneFirstCol = kElementsPerAccess * (lane_id % kNumThreadsPerLine); in computeDelta()2513 int16_t laneRow = thread_id / kNumThreadsPerLine; in computeDelta()2529 kMaxK / (kElementsPerAccess * kNumThreadsPerLine); in computeDelta()2542 grad_output_ptr += kNumThreadsPerLine; in computeDelta()2543 output_ptr += kNumThreadsPerLine; in computeDelta()2550 (laneFirstCol + iter * kElementsPerAccess * kNumThreadsPerLine) < in computeDelta()2560 kNumThreadsPerLine) < p.head_dim_value; in computeDelta()2577 ceil_div(p.head_dim_value, kElementsPerAccess * kNumThreadsPerLine) * in computeDelta()2578 (kElementsPerAccess * kNumThreadsPerLine); in computeDelta()[all …]