/* * Copyright 2016 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "include/private/base/SkTo.h" #include "src/base/SkHalf.h" #include "src/base/SkUtils.h" #include "src/core/SkOpts.h" #include "src/core/SkRasterPipeline.h" #include "src/core/SkRasterPipelineContextUtils.h" #include "src/gpu/Swizzle.h" #include "src/sksl/tracing/SkSLTraceHook.h" #include "tests/Test.h" #include #include using namespace skia_private; DEF_TEST(SkRasterPipeline, r) { // Build and run a simple pipeline to exercise SkRasterPipeline, // drawing 50% transparent blue over opaque red in half-floats. uint64_t red = 0x3c00000000003c00ull, blue = 0x3800380000000000ull, result; SkRasterPipeline_MemoryCtx load_s_ctx = { &blue, 0 }, load_d_ctx = { &red, 0 }, store_ctx = { &result, 0 }; SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_f16, &load_s_ctx); p.append(SkRasterPipelineOp::load_f16_dst, &load_d_ctx); p.append(SkRasterPipelineOp::srcover); p.append(SkRasterPipelineOp::store_f16, &store_ctx); p.run(0,0,1,1); // We should see half-intensity magenta. REPORTER_ASSERT(r, ((result >> 0) & 0xffff) == 0x3800); REPORTER_ASSERT(r, ((result >> 16) & 0xffff) == 0x0000); REPORTER_ASSERT(r, ((result >> 32) & 0xffff) == 0x3800); REPORTER_ASSERT(r, ((result >> 48) & 0xffff) == 0x3c00); } DEF_TEST(SkRasterPipeline_PackSmallContext, r) { struct PackableObject { std::array data; }; // Create an arena with storage. using StorageArray = std::array; StorageArray storage = {}; SkArenaAllocWithReset alloc(storage.data(), storage.size(), 500); // Construct and pack one PackableObject. PackableObject object; std::fill(object.data.begin(), object.data.end(), 123); const void* packed = SkRPCtxUtils::Pack(object, &alloc); // The alloc should still be empty. REPORTER_ASSERT(r, alloc.isEmpty()); // `packed` should now contain a bitwise cast of the raw object data. uintptr_t objectBits = sk_bit_cast(packed); for (size_t index = 0; index < sizeof(void*); ++index) { REPORTER_ASSERT(r, (objectBits & 0xFF) == 123); objectBits >>= 8; } // Now unpack it. auto unpacked = SkRPCtxUtils::Unpack((const PackableObject*)packed); // The data should be identical to the original. REPORTER_ASSERT(r, unpacked.data == object.data); } DEF_TEST(SkRasterPipeline_PackBigContext, r) { struct BigObject { std::array data; }; // Create an arena with storage. using StorageArray = std::array; StorageArray storage = {}; SkArenaAllocWithReset alloc(storage.data(), storage.size(), 500); // Construct and pack one BigObject. BigObject object; std::fill(object.data.begin(), object.data.end(), 123); const void* packed = SkRPCtxUtils::Pack(object, &alloc); // The alloc should not be empty any longer. REPORTER_ASSERT(r, !alloc.isEmpty()); // Now unpack it. auto unpacked = SkRPCtxUtils::Unpack((const BigObject*)packed); // The data should be identical to the original. REPORTER_ASSERT(r, unpacked.data == object.data); } DEF_TEST(SkRasterPipeline_LoadStoreConditionMask, reporter) { alignas(64) int32_t mask[16] = {~0, 0, ~0, 0, ~0, ~0, ~0, 0, ~0, 0, ~0, 0, ~0, ~0, ~0, 0}; alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {}; alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {}; static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp); SkRasterPipeline_<256> p; SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::load_condition_mask, mask); p.append(SkRasterPipelineOp::store_condition_mask, maskCopy); p.append(SkRasterPipelineOp::store_src, src); p.run(0,0,SkOpts::raster_pipeline_highp_stride,1); { // `maskCopy` should be populated with `mask` in the frontmost positions // (depending on the architecture that SkRasterPipeline is targeting). size_t index = 0; for (; index < SkOpts::raster_pipeline_highp_stride; ++index) { REPORTER_ASSERT(reporter, maskCopy[index] == mask[index]); } // The remaining slots should have been left alone. for (; index < std::size(maskCopy); ++index) { REPORTER_ASSERT(reporter, maskCopy[index] == 0); } } { // `r` and `a` should be populated with `mask`. // `g` and `b` should remain initialized to true. const int r = 0 * SkOpts::raster_pipeline_highp_stride; const int g = 1 * SkOpts::raster_pipeline_highp_stride; const int b = 2 * SkOpts::raster_pipeline_highp_stride; const int a = 3 * SkOpts::raster_pipeline_highp_stride; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { REPORTER_ASSERT(reporter, src[r + index] == mask[index]); REPORTER_ASSERT(reporter, src[g + index] == ~0); REPORTER_ASSERT(reporter, src[b + index] == ~0); REPORTER_ASSERT(reporter, src[a + index] == mask[index]); } } } DEF_TEST(SkRasterPipeline_LoadStoreLoopMask, reporter) { alignas(64) int32_t mask[16] = {~0, 0, ~0, 0, ~0, ~0, ~0, 0, ~0, 0, ~0, 0, ~0, ~0, ~0, 0}; alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {}; alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {}; static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp); SkRasterPipeline_<256> p; SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::load_loop_mask, mask); p.append(SkRasterPipelineOp::store_loop_mask, maskCopy); p.append(SkRasterPipelineOp::store_src, src); p.run(0,0,SkOpts::raster_pipeline_highp_stride,1); { // `maskCopy` should be populated with `mask` in the frontmost positions // (depending on the architecture that SkRasterPipeline is targeting). size_t index = 0; for (; index < SkOpts::raster_pipeline_highp_stride; ++index) { REPORTER_ASSERT(reporter, maskCopy[index] == mask[index]); } // The remaining slots should have been left alone. for (; index < std::size(maskCopy); ++index) { REPORTER_ASSERT(reporter, maskCopy[index] == 0); } } { // `g` and `a` should be populated with `mask`. // `r` and `b` should remain initialized to true. const int r = 0 * SkOpts::raster_pipeline_highp_stride; const int g = 1 * SkOpts::raster_pipeline_highp_stride; const int b = 2 * SkOpts::raster_pipeline_highp_stride; const int a = 3 * SkOpts::raster_pipeline_highp_stride; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { REPORTER_ASSERT(reporter, src[r + index] == ~0); REPORTER_ASSERT(reporter, src[g + index] == mask[index]); REPORTER_ASSERT(reporter, src[b + index] == ~0); REPORTER_ASSERT(reporter, src[a + index] == mask[index]); } } } DEF_TEST(SkRasterPipeline_LoadStoreReturnMask, reporter) { alignas(64) int32_t mask[16] = {~0, 0, ~0, 0, ~0, ~0, ~0, 0, ~0, 0, ~0, 0, ~0, ~0, ~0, 0}; alignas(64) int32_t maskCopy[SkRasterPipeline_kMaxStride_highp] = {}; alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {}; static_assert(std::size(mask) == SkRasterPipeline_kMaxStride_highp); SkRasterPipeline_<256> p; SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::load_return_mask, mask); p.append(SkRasterPipelineOp::store_return_mask, maskCopy); p.append(SkRasterPipelineOp::store_src, src); p.run(0,0,SkOpts::raster_pipeline_highp_stride,1); { // `maskCopy` should be populated with `mask` in the frontmost positions // (depending on the architecture that SkRasterPipeline is targeting). size_t index = 0; for (; index < SkOpts::raster_pipeline_highp_stride; ++index) { REPORTER_ASSERT(reporter, maskCopy[index] == mask[index]); } // The remaining slots should have been left alone. for (; index < std::size(maskCopy); ++index) { REPORTER_ASSERT(reporter, maskCopy[index] == 0); } } { // `b` and `a` should be populated with `mask`. // `r` and `g` should remain initialized to true. const int r = 0 * SkOpts::raster_pipeline_highp_stride; const int g = 1 * SkOpts::raster_pipeline_highp_stride; const int b = 2 * SkOpts::raster_pipeline_highp_stride; const int a = 3 * SkOpts::raster_pipeline_highp_stride; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { REPORTER_ASSERT(reporter, src[r + index] == ~0); REPORTER_ASSERT(reporter, src[g + index] == ~0); REPORTER_ASSERT(reporter, src[b + index] == mask[index]); REPORTER_ASSERT(reporter, src[a + index] == mask[index]); } } } DEF_TEST(SkRasterPipeline_MergeConditionMask, reporter) { alignas(64) int32_t mask[32] = { 0, 0, ~0, ~0, 0, ~0, 0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, ~0, ~0, 0, ~0, 0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0}; alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {}; static_assert(std::size(mask) == (2 * SkRasterPipeline_kMaxStride_highp)); SkRasterPipeline_<256> p; SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::merge_condition_mask, mask); p.append(SkRasterPipelineOp::store_src, src); p.run(0,0,SkOpts::raster_pipeline_highp_stride,1); // `r` and `a` should be populated with `mask[x] & mask[y]` in the frontmost positions. // `g` and `b` should remain initialized to true. const int r = 0 * SkOpts::raster_pipeline_highp_stride; const int g = 1 * SkOpts::raster_pipeline_highp_stride; const int b = 2 * SkOpts::raster_pipeline_highp_stride; const int a = 3 * SkOpts::raster_pipeline_highp_stride; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { int32_t expected = mask[index] & mask[index + SkOpts::raster_pipeline_highp_stride]; REPORTER_ASSERT(reporter, src[r + index] == expected); REPORTER_ASSERT(reporter, src[g + index] == ~0); REPORTER_ASSERT(reporter, src[b + index] == ~0); REPORTER_ASSERT(reporter, src[a + index] == expected); } } DEF_TEST(SkRasterPipeline_MergeLoopMask, reporter) { alignas(64) int32_t initial[64] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // r (condition) ~0, 0, ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, // g (loop) ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // b (return) ~0, 0, ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, // a (combined) ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0}; alignas(64) int32_t mask[16] = {0, ~0, ~0, 0, ~0, ~0, ~0, ~0, 0, ~0, ~0, 0, ~0, ~0, ~0, ~0}; alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {}; static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_src, initial); p.append(SkRasterPipelineOp::merge_loop_mask, mask); p.append(SkRasterPipelineOp::store_src, src); p.run(0,0,SkOpts::raster_pipeline_highp_stride,1); const int r = 0 * SkOpts::raster_pipeline_highp_stride; const int g = 1 * SkOpts::raster_pipeline_highp_stride; const int b = 2 * SkOpts::raster_pipeline_highp_stride; const int a = 3 * SkOpts::raster_pipeline_highp_stride; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { // `g` should contain `g & mask` in each lane. REPORTER_ASSERT(reporter, src[g + index] == (initial[g + index] & mask[index])); // `r` and `b` should be unchanged. REPORTER_ASSERT(reporter, src[r + index] == initial[r + index]); REPORTER_ASSERT(reporter, src[b + index] == initial[b + index]); // `a` should contain `r & g & b`. REPORTER_ASSERT(reporter, src[a + index] == (src[r+index] & src[g+index] & src[b+index])); } } DEF_TEST(SkRasterPipeline_ReenableLoopMask, reporter) { alignas(64) int32_t initial[64] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // r (condition) ~0, 0, ~0, 0, ~0, ~0, 0, ~0, 0, ~0, ~0, ~0, 0, 0, 0, ~0, // g (loop) 0, 0, ~0, 0, 0, 0, 0, ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // b (return) ~0, 0, ~0, 0, ~0, ~0, 0, ~0, 0, ~0, ~0, ~0, 0, 0, 0, ~0, // a (combined) 0, 0, ~0, 0, 0, 0, 0, ~0}; alignas(64) int32_t mask[16] = { 0, ~0, 0, 0, 0, 0, ~0, 0, 0, ~0, 0, 0, 0, 0, ~0, 0}; alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {}; static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_src, initial); p.append(SkRasterPipelineOp::reenable_loop_mask, mask); p.append(SkRasterPipelineOp::store_src, src); p.run(0,0,SkOpts::raster_pipeline_highp_stride,1); const int r = 0 * SkOpts::raster_pipeline_highp_stride; const int g = 1 * SkOpts::raster_pipeline_highp_stride; const int b = 2 * SkOpts::raster_pipeline_highp_stride; const int a = 3 * SkOpts::raster_pipeline_highp_stride; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { // `g` should contain `g | mask` in each lane. REPORTER_ASSERT(reporter, src[g + index] == (initial[g + index] | mask[index])); // `r` and `b` should be unchanged. REPORTER_ASSERT(reporter, src[r + index] == initial[r + index]); REPORTER_ASSERT(reporter, src[b + index] == initial[b + index]); // `a` should contain `r & g & b`. REPORTER_ASSERT(reporter, src[a + index] == (src[r+index] & src[g+index] & src[b+index])); } } DEF_TEST(SkRasterPipeline_CaseOp, reporter) { alignas(64) int32_t initial[64] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // r (condition) 0, ~0, ~0, 0, ~0, ~0, 0, ~0, ~0, 0, ~0, ~0, 0, 0, 0, ~0, // g (loop) 0, 0, ~0, 0, 0, 0, 0, ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // b (return) 0, ~0, ~0, 0, ~0, ~0, 0, ~0, ~0, 0, ~0, ~0, 0, 0, 0, ~0, // a (combined) 0, 0, ~0, 0, 0, 0, 0, ~0}; alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {}; static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp)); constexpr int32_t actualValues[16] = { 2, 1, 2, 4, 5, 2, 2, 8}; static_assert(std::size(actualValues) == SkRasterPipeline_kMaxStride_highp); alignas(64) int32_t caseOpData[2 * SkRasterPipeline_kMaxStride_highp]; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { caseOpData[0 * SkOpts::raster_pipeline_highp_stride + index] = actualValues[index]; caseOpData[1 * SkOpts::raster_pipeline_highp_stride + index] = ~0; } SkRasterPipeline_CaseOpCtx ctx; ctx.offset = 0; ctx.expectedValue = 2; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::load_src, initial); p.append(SkRasterPipelineOp::set_base_pointer, &caseOpData[0]); p.append(SkRasterPipelineOp::case_op, SkRPCtxUtils::Pack(ctx, &alloc)); p.append(SkRasterPipelineOp::store_src, src); p.run(0,0,SkOpts::raster_pipeline_highp_stride,1); const int r = 0 * SkOpts::raster_pipeline_highp_stride; const int g = 1 * SkOpts::raster_pipeline_highp_stride; const int b = 2 * SkOpts::raster_pipeline_highp_stride; const int a = 3 * SkOpts::raster_pipeline_highp_stride; const int actualValueIdx = 0 * SkOpts::raster_pipeline_highp_stride; const int defaultMaskIdx = 1 * SkOpts::raster_pipeline_highp_stride; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { // `g` should have been set to true for each lane containing 2. int32_t expected = (actualValues[index] == 2) ? ~0 : initial[g + index]; REPORTER_ASSERT(reporter, src[g + index] == expected); // `r` and `b` should be unchanged. REPORTER_ASSERT(reporter, src[r + index] == initial[r + index]); REPORTER_ASSERT(reporter, src[b + index] == initial[b + index]); // `a` should contain `r & g & b`. REPORTER_ASSERT(reporter, src[a + index] == (src[r+index] & src[g+index] & src[b+index])); // The actual-value part of `caseOpData` should be unchanged from the inputs. REPORTER_ASSERT(reporter, caseOpData[actualValueIdx + index] == actualValues[index]); // The default-mask part of `caseOpData` should have been zeroed where the values matched. expected = (actualValues[index] == 2) ? 0 : ~0; REPORTER_ASSERT(reporter, caseOpData[defaultMaskIdx + index] == expected); } } DEF_TEST(SkRasterPipeline_MaskOffLoopMask, reporter) { alignas(64) int32_t initial[64] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // r (condition) ~0, 0, ~0, ~0, 0, 0, 0, ~0, ~0, ~0, 0, ~0, 0, 0, ~0, ~0, // g (loop) ~0, 0, 0, ~0, 0, 0, 0, ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // b (return) ~0, 0, ~0, ~0, 0, 0, 0, ~0, ~0, ~0, 0, ~0, 0, 0, ~0, ~0, // a (combined) ~0, 0, 0, ~0, 0, 0, 0, ~0}; alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {}; static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_src, initial); p.append(SkRasterPipelineOp::mask_off_loop_mask); p.append(SkRasterPipelineOp::store_src, src); p.run(0,0,SkOpts::raster_pipeline_highp_stride,1); const int r = 0 * SkOpts::raster_pipeline_highp_stride; const int g = 1 * SkOpts::raster_pipeline_highp_stride; const int b = 2 * SkOpts::raster_pipeline_highp_stride; const int a = 3 * SkOpts::raster_pipeline_highp_stride; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { // `g` should have masked off any lanes that are currently executing. int32_t expected = initial[g + index] & ~initial[a + index]; REPORTER_ASSERT(reporter, src[g + index] == expected); // `a` should contain `r & g & b`. expected = src[r + index] & src[g + index] & src[b + index]; REPORTER_ASSERT(reporter, src[a + index] == expected); } } DEF_TEST(SkRasterPipeline_MaskOffReturnMask, reporter) { alignas(64) int32_t initial[64] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // r (condition) ~0, 0, ~0, ~0, 0, 0, 0, ~0, ~0, ~0, 0, ~0, 0, 0, ~0, ~0, // g (loop) ~0, 0, 0, ~0, 0, 0, 0, ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, // b (return) ~0, 0, ~0, ~0, 0, 0, 0, ~0, ~0, ~0, 0, ~0, 0, 0, ~0, ~0, // a (combined) ~0, 0, 0, ~0, 0, 0, 0, ~0}; alignas(64) int32_t src[4 * SkRasterPipeline_kMaxStride_highp] = {}; static_assert(std::size(initial) == (4 * SkRasterPipeline_kMaxStride_highp)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_src, initial); p.append(SkRasterPipelineOp::mask_off_return_mask); p.append(SkRasterPipelineOp::store_src, src); p.run(0,0,SkOpts::raster_pipeline_highp_stride,1); const int r = 0 * SkOpts::raster_pipeline_highp_stride; const int g = 1 * SkOpts::raster_pipeline_highp_stride; const int b = 2 * SkOpts::raster_pipeline_highp_stride; const int a = 3 * SkOpts::raster_pipeline_highp_stride; for (size_t index = 0; index < SkOpts::raster_pipeline_highp_stride; ++index) { // `b` should have masked off any lanes that are currently executing. int32_t expected = initial[b + index] & ~initial[a + index]; REPORTER_ASSERT(reporter, src[b + index] == expected); // `a` should contain `r & g & b`. expected = src[r + index] & src[g + index] & src[b + index]; REPORTER_ASSERT(reporter, src[a + index] == expected); } } DEF_TEST(SkRasterPipeline_InitLaneMasks, reporter) { for (size_t width = 1; width <= SkOpts::raster_pipeline_highp_stride; ++width) { SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); // Initialize RGBA to unrelated values. alignas(64) static constexpr float kArbitraryColor[4] = {0.0f, 0.25f, 0.50f, 0.75f}; p.appendConstantColor(&alloc, kArbitraryColor); // Overwrite RGBA with lane masks up to the tail width. SkRasterPipeline_InitLaneMasksCtx ctx; p.append(SkRasterPipelineOp::init_lane_masks, &ctx); // Use the store_src command to write out RGBA for inspection. alignas(64) int32_t RGBA[4 * SkRasterPipeline_kMaxStride_highp] = {}; p.append(SkRasterPipelineOp::store_src, RGBA); // Execute our program. p.run(0,0,width,1); // Initialized data should look like on/on/on/on (RGBA are all set) and is // striped by the raster pipeline stride because we wrote it using store_src. size_t index = 0; int32_t* channelR = RGBA; int32_t* channelG = channelR + SkOpts::raster_pipeline_highp_stride; int32_t* channelB = channelG + SkOpts::raster_pipeline_highp_stride; int32_t* channelA = channelB + SkOpts::raster_pipeline_highp_stride; for (; index < width; ++index) { REPORTER_ASSERT(reporter, *channelR++ == ~0); REPORTER_ASSERT(reporter, *channelG++ == ~0); REPORTER_ASSERT(reporter, *channelB++ == ~0); REPORTER_ASSERT(reporter, *channelA++ == ~0); } // The rest of the output array should be untouched (all zero). for (; index < SkOpts::raster_pipeline_highp_stride; ++index) { REPORTER_ASSERT(reporter, *channelR++ == 0); REPORTER_ASSERT(reporter, *channelG++ == 0); REPORTER_ASSERT(reporter, *channelB++ == 0); REPORTER_ASSERT(reporter, *channelA++ == 0); } } } // This is the bit pattern of the "largest" signaling NaN. The next integer is a quiet NaN. // We use this as the starting point for various memory-shuffling tests below, to ensure that our // code doesn't interpret values as float when they might be integral. Using floats can cause // signaling NaN values to change (becoming quiet), even with the most innocuous operations // (particularly on 32-bit x86, where floats are often passed around in the x87 FPU). static constexpr int kLastSignalingNaN = 0x7fbfffff; // Similarly, this is the "smallest" (in magnitude) negative signaling NaN. The next integer is // a quiet negative NaN. Only used when testing operations that need two distinct integer sequences // as input, and the logic is asymmetric enough that we want NaNs fed into both sides. static constexpr int kLastSignalingNegNaN = 0xffbfffff; DEF_TEST(SkRasterPipeline_CopyFromIndirectUnmasked, r) { // Allocate space for 5 source slots, and 5 dest slots. alignas(64) int src[5 * SkRasterPipeline_kMaxStride_highp]; alignas(64) int dst[5 * SkRasterPipeline_kMaxStride_highp]; // Test with various mixes of indirect offsets. static_assert(SkRasterPipeline_kMaxStride_highp == 16); alignas(64) const uint32_t kOffsets1[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; alignas(64) const uint32_t kOffsets2[16] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; alignas(64) const uint32_t kOffsets3[16] = {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2}; alignas(64) const uint32_t kOffsets4[16] = {99, 99, 0, 0, 99, 99, 0, 0, 99, 99, 0, 0, 99, 99, 0, 0}; const int N = SkOpts::raster_pipeline_highp_stride; for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) { for (int copySize = 1; copySize <= 5; ++copySize) { // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs std::iota(&dst[0], &dst[5 * N], 0); std::iota(&src[0], &src[5 * N], kLastSignalingNaN); // Run `copy_from_indirect_unmasked` over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); auto* ctx = alloc.make(); ctx->dst = &dst[0]; ctx->src = &src[0]; ctx->indirectOffset = offsets; ctx->indirectLimit = 5 - copySize; ctx->slots = copySize; p.append(SkRasterPipelineOp::copy_from_indirect_unmasked, ctx); p.run(0,0,N,1); // If the offset plus copy-size would overflow the source data, the results don't // matter; indexing off the end of the buffer is UB, and we don't make any promises // about the values you get. If we didn't crash, that's success. (In practice, we // will have clamped the source pointer so that we don't read past the end.) int maxOffset = *std::max_element(offsets, offsets + N); if (copySize + maxOffset > 5) { continue; } // Verify that the destination has been overwritten in the mask-on fields, and has // not been overwritten in the mask-off fields, for each destination slot. int expectedUnchanged = 0; int expectedFromZero = src[0 * N], expectedFromTwo = src[2 * N]; int* destPtr = dst; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < copySize) { if (offsets[checkLane] == 0) { REPORTER_ASSERT(r, *destPtr == expectedFromZero); } else if (offsets[checkLane] == 2) { REPORTER_ASSERT(r, *destPtr == expectedFromTwo); } else { ERRORF(r, "unexpected offset value"); } } else { REPORTER_ASSERT(r, *destPtr == expectedUnchanged); } ++destPtr; expectedUnchanged += 1; expectedFromZero += 1; expectedFromTwo += 1; } } } } } DEF_TEST(SkRasterPipeline_CopyFromIndirectUniformUnmasked, r) { // Allocate space for 5 source uniform values, and 5 dest slots. // (Note that unlike slots, uniforms don't use multiple lanes per value.) alignas(64) int src[5]; alignas(64) int dst[5 * SkRasterPipeline_kMaxStride_highp]; // Test with various mixes of indirect offsets. static_assert(SkRasterPipeline_kMaxStride_highp == 16); alignas(64) const uint32_t kOffsets1[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; alignas(64) const uint32_t kOffsets2[16] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; alignas(64) const uint32_t kOffsets3[16] = {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2}; alignas(64) const uint32_t kOffsets4[16] = {99, ~99u, 0, 0, ~99u, 99, 0, 0, 99, ~99u, 0, 0, ~99u, 99, 0, 0}; const int N = SkOpts::raster_pipeline_highp_stride; for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) { for (int copySize = 1; copySize <= 5; ++copySize) { // Initialize the destination slots to 0,1,2.. and the source uniforms to various NaNs std::iota(&dst[0], &dst[5 * N], 0); std::iota(&src[0], &src[5], kLastSignalingNaN); // Run `copy_from_indirect_unmasked` over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); auto* ctx = alloc.make(); ctx->dst = &dst[0]; ctx->src = &src[0]; ctx->indirectOffset = offsets; ctx->indirectLimit = 5 - copySize; ctx->slots = copySize; p.append(SkRasterPipelineOp::copy_from_indirect_uniform_unmasked, ctx); p.run(0,0,N,1); // If the offset plus copy-size would overflow the source data, the results don't // matter; indexing off the end of the buffer is UB, and we don't make any promises // about the values you get. If we didn't crash, that's success. (In practice, we // will have clamped the source pointer so that we don't read past the end.) uint32_t maxOffset = *std::max_element(offsets, offsets + N); if (copySize + maxOffset > 5) { continue; } // Verify that the destination has been overwritten in each slot. int expectedUnchanged = 0; int expectedFromZero = src[0], expectedFromTwo = src[2]; int* destPtr = dst; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < copySize) { if (offsets[checkLane] == 0) { REPORTER_ASSERT(r, *destPtr == expectedFromZero); } else if (offsets[checkLane] == 2) { REPORTER_ASSERT(r, *destPtr == expectedFromTwo); } else { ERRORF(r, "unexpected offset value"); } } else { REPORTER_ASSERT(r, *destPtr == expectedUnchanged); } ++destPtr; expectedUnchanged += 1; } expectedFromZero += 1; expectedFromTwo += 1; } } } } DEF_TEST(SkRasterPipeline_CopyToIndirectMasked, r) { // Allocate space for 5 source slots, and 5 dest slots. alignas(64) int src[5 * SkRasterPipeline_kMaxStride_highp]; alignas(64) int dst[5 * SkRasterPipeline_kMaxStride_highp]; // Test with various mixes of indirect offsets. static_assert(SkRasterPipeline_kMaxStride_highp == 16); alignas(64) const uint32_t kOffsets1[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; alignas(64) const uint32_t kOffsets2[16] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; alignas(64) const uint32_t kOffsets3[16] = {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2}; alignas(64) const uint32_t kOffsets4[16] = {99, ~99u, 0, 0, ~99u, 99, 0, 0, 99, ~99u, 0, 0, ~99u, 99, 0, 0}; // Test with various masks. alignas(64) const int32_t kMask1[16] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, ~0}; alignas(64) const int32_t kMask2[16] = {~0, 0, ~0, ~0, 0, 0, 0, ~0, ~0, 0, ~0, ~0, 0, 0, 0, ~0}; alignas(64) const int32_t kMask3[16] = {~0, ~0, 0, ~0, 0, 0, ~0, ~0, ~0, ~0, 0, ~0, 0, 0, ~0, ~0}; alignas(64) const int32_t kMask4[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; const int N = SkOpts::raster_pipeline_highp_stride; for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) { for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) { for (int copySize = 1; copySize <= 5; ++copySize) { // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs std::iota(&dst[0], &dst[5 * N], 0); std::iota(&src[0], &src[5 * N], kLastSignalingNaN); // Run `copy_to_indirect_masked` over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); auto* ctx = alloc.make(); ctx->dst = &dst[0]; ctx->src = &src[0]; ctx->indirectOffset = offsets; ctx->indirectLimit = 5 - copySize; ctx->slots = copySize; SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::load_condition_mask, mask); p.append(SkRasterPipelineOp::copy_to_indirect_masked, ctx); p.run(0,0,N,1); // If the offset plus copy-size would overflow the destination, the results don't // matter; indexing off the end of the buffer is UB, and we don't make any promises // about the values you get. If we didn't crash, that's success. (In practice, we // will have clamped the destination pointer so that we don't read past the end.) uint32_t maxOffset = *std::max_element(offsets, offsets + N); if (copySize + maxOffset > 5) { continue; } // Verify that the destination has been overwritten in the mask-on fields, and has // not been overwritten in the mask-off fields, for each destination slot. int expectedUnchanged = 0; int expectedFromZero = src[0], expectedFromTwo = src[0] - (2 * N); int* destPtr = dst; int pos = 0; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { int rangeStart = offsets[checkLane] * N; int rangeEnd = (offsets[checkLane] + copySize) * N; if (mask[checkLane] && pos >= rangeStart && pos < rangeEnd) { if (offsets[checkLane] == 0) { REPORTER_ASSERT(r, *destPtr == expectedFromZero); } else if (offsets[checkLane] == 2) { REPORTER_ASSERT(r, *destPtr == expectedFromTwo); } else { ERRORF(r, "unexpected offset value"); } } else { REPORTER_ASSERT(r, *destPtr == expectedUnchanged); } ++pos; ++destPtr; expectedUnchanged += 1; expectedFromZero += 1; expectedFromTwo += 1; } } } } } } DEF_TEST(SkRasterPipeline_SwizzleCopyToIndirectMasked, r) { // Allocate space for 5 source slots, and 5 dest slots. alignas(64) int src[5 * SkRasterPipeline_kMaxStride_highp]; alignas(64) int dst[5 * SkRasterPipeline_kMaxStride_highp]; // Test with various mixes of indirect offsets. static_assert(SkRasterPipeline_kMaxStride_highp == 16); alignas(64) const uint32_t kOffsets1[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; alignas(64) const uint32_t kOffsets2[16] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; alignas(64) const uint32_t kOffsets3[16] = {0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2}; alignas(64) const uint32_t kOffsets4[16] = {99, ~99u, 0, 0, ~99u, 99, 0, 0, 99, ~99u, 0, 0, ~99u, 99, 0, 0}; // Test with various masks. alignas(64) const int32_t kMask1[16] = {~0, ~0, ~0, ~0, ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, ~0, ~0}; alignas(64) const int32_t kMask2[16] = {~0, 0, ~0, ~0, 0, 0, 0, ~0, ~0, 0, ~0, ~0, 0, 0, 0, ~0}; alignas(64) const int32_t kMask3[16] = {~0, ~0, 0, ~0, 0, 0, ~0, ~0, ~0, ~0, 0, ~0, 0, 0, ~0, ~0}; alignas(64) const int32_t kMask4[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // Test with various swizzle permutations. struct TestPattern { int swizzleSize; int swizzleUpperBound; uint16_t swizzle[4]; }; static const TestPattern kPatterns[] = { {1, 4, {3}}, // v.w = (1) {2, 2, {1, 0}}, // v.yx = (1,2) {3, 3, {2, 1, 0}}, // v.zyx = (1,2,3) {4, 4, {3, 0, 1, 2}}, // v.wxyz = (1,2,3,4) }; enum Result { kOutOfBounds = 0, kUnchanged = 1, S0 = 2, S1 = 3, S2 = 4, S3 = 5, S4 = 6, }; #define __ kUnchanged #define XX kOutOfBounds static const Result kExpectationsAtZero[4][5] = { // d[0].w = 1 d[0].yx = (1,2) d[0].zyx = (1,2,3) d[0].wxyz = (1,2,3,4) {__,__,__,S0,__}, {S1,S0,__,__,__}, {S2,S1,S0,__,__}, {S1,S2,S3,S0,__}, }; static const Result kExpectationsAtTwo[4][5] = { // d[2].w = 1 d[2].yx = (1,2) d[2].zyx = (1,2,3) d[2].wxyz = (1,2,3,4) {XX,XX,XX,XX,XX}, {__,__,S1,S0,__}, {__,__,S2,S1,S0}, {XX,XX,XX,XX,XX}, }; #undef __ #undef XX const int N = SkOpts::raster_pipeline_highp_stride; for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) { for (const uint32_t* offsets : {kOffsets1, kOffsets2, kOffsets3, kOffsets4}) { for (size_t patternIndex = 0; patternIndex < std::size(kPatterns); ++patternIndex) { const TestPattern& pattern = kPatterns[patternIndex]; // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs std::iota(&dst[0], &dst[5 * N], 0); std::iota(&src[0], &src[5 * N], kLastSignalingNaN); // Run `swizzle_copy_to_indirect_masked` over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); auto* ctx = alloc.make(); ctx->dst = &dst[0]; ctx->src = &src[0]; ctx->indirectOffset = offsets; ctx->indirectLimit = 5 - pattern.swizzleUpperBound; ctx->slots = pattern.swizzleSize; ctx->offsets[0] = pattern.swizzle[0] * N * sizeof(float); ctx->offsets[1] = pattern.swizzle[1] * N * sizeof(float); ctx->offsets[2] = pattern.swizzle[2] * N * sizeof(float); ctx->offsets[3] = pattern.swizzle[3] * N * sizeof(float); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::load_condition_mask, mask); p.append(SkRasterPipelineOp::swizzle_copy_to_indirect_masked, ctx); p.run(0,0,N,1); // If the offset plus copy-size would overflow the destination, the results don't // matter; indexing off the end of the buffer is UB, and we don't make any promises // about the values you get. If we didn't crash, that's success. (In practice, we // will have clamped the destination pointer so that we don't read past the end.) uint32_t maxOffset = *std::max_element(offsets, offsets + N); if (pattern.swizzleUpperBound + maxOffset > 5) { continue; } // Verify that the destination has been overwritten in the mask-on fields, and has // not been overwritten in the mask-off fields, for each destination slot. int expectedUnchanged = 0; int* destPtr = dst; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { Result expectedType = kUnchanged; if (offsets[checkLane] == 0) { expectedType = kExpectationsAtZero[patternIndex][checkSlot]; } else if (offsets[checkLane] == 2) { expectedType = kExpectationsAtTwo[patternIndex][checkSlot]; } if (!mask[checkLane]) { expectedType = kUnchanged; } switch (expectedType) { case kOutOfBounds: // out of bounds; ignore result break; case kUnchanged: REPORTER_ASSERT(r, *destPtr == expectedUnchanged); break; case S0: // destination should match source 0 REPORTER_ASSERT(r, *destPtr == src[0*N + checkLane]); break; case S1: // destination should match source 1 REPORTER_ASSERT(r, *destPtr == src[1*N + checkLane]); break; case S2: // destination should match source 2 REPORTER_ASSERT(r, *destPtr == src[2*N + checkLane]); break; case S3: // destination should match source 3 REPORTER_ASSERT(r, *destPtr == src[3*N + checkLane]); break; case S4: // destination should match source 4 REPORTER_ASSERT(r, *destPtr == src[4*N + checkLane]); break; } ++destPtr; expectedUnchanged += 1; } } } } } } DEF_TEST(SkRasterPipeline_TraceVar, r) { const int N = SkOpts::raster_pipeline_highp_stride; class TestTraceHook : public SkSL::TraceHook { public: void line(int) override { fBuffer.push_back(-9999999); } void enter(int) override { fBuffer.push_back(-9999999); } void exit(int) override { fBuffer.push_back(-9999999); } void scope(int) override { fBuffer.push_back(-9999999); } void var(int slot, int32_t val) override { fBuffer.push_back(slot); fBuffer.push_back(val); } TArray fBuffer; }; static_assert(SkRasterPipeline_kMaxStride_highp == 16); alignas(64) static constexpr int32_t kMaskOn [16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0}; alignas(64) static constexpr int32_t kMaskOff [16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; alignas(64) static constexpr uint32_t kIndirect0[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; alignas(64) static constexpr uint32_t kIndirect1[16] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; alignas(64) int32_t kData333[16]; alignas(64) int32_t kData555[16]; alignas(64) int32_t kData666[16]; alignas(64) int32_t kData777[32]; alignas(64) int32_t kData999[32]; std::fill(kData333, kData333 + N, 333); std::fill(kData555, kData555 + N, 555); std::fill(kData666, kData666 + N, 666); std::fill(kData777, kData777 + N, 777); std::fill(kData777 + N, kData777 + 2*N, 707); std::fill(kData999, kData999 + N, 999); std::fill(kData999 + N, kData999 + 2*N, 909); TestTraceHook trace; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); const SkRasterPipeline_TraceVarCtx kTraceVar1 = {/*traceMask=*/kMaskOff, &trace, 2, 1, kData333, /*indirectOffset=*/nullptr, /*indirectLimit=*/0}; const SkRasterPipeline_TraceVarCtx kTraceVar2 = {/*traceMask=*/kMaskOn, &trace, 4, 1, kData555, /*indirectOffset=*/nullptr, /*indirectLimit=*/0}; const SkRasterPipeline_TraceVarCtx kTraceVar3 = {/*traceMask=*/kMaskOff, &trace, 5, 1, kData666, /*indirectOffset=*/nullptr, /*indirectLimit=*/0}; const SkRasterPipeline_TraceVarCtx kTraceVar4 = {/*traceMask=*/kMaskOn, &trace, 6, 2, kData777, /*indirectOffset=*/nullptr, /*indirectLimit=*/0}; const SkRasterPipeline_TraceVarCtx kTraceVar5 = {/*traceMask=*/kMaskOn, &trace, 8, 2, kData999, /*indirectOffset=*/nullptr, /*indirectLimit=*/0}; const SkRasterPipeline_TraceVarCtx kTraceVar6 = {/*traceMask=*/kMaskOn, &trace, 9, 1, kData999, /*indirectOffset=*/kIndirect0, /*indirectLimit=*/1}; const SkRasterPipeline_TraceVarCtx kTraceVar7 = {/*traceMask=*/kMaskOn, &trace, 9, 1, kData999, /*indirectOffset=*/kIndirect1, /*indirectLimit=*/1}; p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_var, &kTraceVar1); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_var, &kTraceVar2); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff); p.append(SkRasterPipelineOp::trace_var, &kTraceVar3); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_var, &kTraceVar4); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff); p.append(SkRasterPipelineOp::trace_var, &kTraceVar5); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_var, &kTraceVar6); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_var, &kTraceVar7); p.run(0,0,N,1); REPORTER_ASSERT(r, (trace.fBuffer == TArray{4, 555, 6, 777, 7, 707, 9, 999, 10, 909})); } DEF_TEST(SkRasterPipeline_TraceLine, r) { const int N = SkOpts::raster_pipeline_highp_stride; class TestTraceHook : public SkSL::TraceHook { public: void var(int, int32_t) override { fBuffer.push_back(-9999999); } void enter(int) override { fBuffer.push_back(-9999999); } void exit(int) override { fBuffer.push_back(-9999999); } void scope(int) override { fBuffer.push_back(-9999999); } void line(int lineNum) override { fBuffer.push_back(lineNum); } TArray fBuffer; }; static_assert(SkRasterPipeline_kMaxStride_highp == 16); alignas(64) static constexpr int32_t kMaskOn [16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0}; alignas(64) static constexpr int32_t kMaskOff[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; TestTraceHook trace; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); const SkRasterPipeline_TraceLineCtx kTraceLine1 = {/*traceMask=*/kMaskOn, &trace, 123}; const SkRasterPipeline_TraceLineCtx kTraceLine2 = {/*traceMask=*/kMaskOff, &trace, 456}; const SkRasterPipeline_TraceLineCtx kTraceLine3 = {/*traceMask=*/kMaskOn, &trace, 567}; const SkRasterPipeline_TraceLineCtx kTraceLine4 = {/*traceMask=*/kMaskOff, &trace, 678}; const SkRasterPipeline_TraceLineCtx kTraceLine5 = {/*traceMask=*/kMaskOn, &trace, 789}; p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_line, &kTraceLine1); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_line, &kTraceLine2); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff); p.append(SkRasterPipelineOp::trace_line, &kTraceLine3); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff); p.append(SkRasterPipelineOp::trace_line, &kTraceLine4); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_line, &kTraceLine5); p.run(0,0,N,1); REPORTER_ASSERT(r, (trace.fBuffer == TArray{123, 789})); } DEF_TEST(SkRasterPipeline_TraceEnterExit, r) { const int N = SkOpts::raster_pipeline_highp_stride; class TestTraceHook : public SkSL::TraceHook { public: void line(int) override { fBuffer.push_back(-9999999); } void var(int, int32_t) override { fBuffer.push_back(-9999999); } void scope(int) override { fBuffer.push_back(-9999999); } void enter(int fnIdx) override { fBuffer.push_back(fnIdx); fBuffer.push_back(1); } void exit(int fnIdx) override { fBuffer.push_back(fnIdx); fBuffer.push_back(0); } TArray fBuffer; }; static_assert(SkRasterPipeline_kMaxStride_highp == 16); alignas(64) static constexpr int32_t kMaskOn [16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0}; alignas(64) static constexpr int32_t kMaskOff[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; TestTraceHook trace; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); const SkRasterPipeline_TraceFuncCtx kTraceFunc1 = {/*traceMask=*/kMaskOff, &trace, 99}; const SkRasterPipeline_TraceFuncCtx kTraceFunc2 = {/*traceMask=*/kMaskOn, &trace, 12}; const SkRasterPipeline_TraceFuncCtx kTraceFunc3 = {/*traceMask=*/kMaskOff, &trace, 34}; const SkRasterPipeline_TraceFuncCtx kTraceFunc4 = {/*traceMask=*/kMaskOn, &trace, 56}; const SkRasterPipeline_TraceFuncCtx kTraceFunc5 = {/*traceMask=*/kMaskOn, &trace, 78}; const SkRasterPipeline_TraceFuncCtx kTraceFunc6 = {/*traceMask=*/kMaskOff, &trace, 90}; p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff); p.append(SkRasterPipelineOp::trace_enter, &kTraceFunc1); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_enter, &kTraceFunc2); p.append(SkRasterPipelineOp::trace_enter, &kTraceFunc3); p.append(SkRasterPipelineOp::trace_exit, &kTraceFunc4); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff); p.append(SkRasterPipelineOp::trace_exit, &kTraceFunc5); p.append(SkRasterPipelineOp::trace_exit, &kTraceFunc6); p.run(0,0,N,1); REPORTER_ASSERT(r, (trace.fBuffer == TArray{12, 1, 56, 0})); } DEF_TEST(SkRasterPipeline_TraceScope, r) { const int N = SkOpts::raster_pipeline_highp_stride; class TestTraceHook : public SkSL::TraceHook { public: void line(int) override { fBuffer.push_back(-9999999); } void var(int, int32_t) override { fBuffer.push_back(-9999999); } void enter(int) override { fBuffer.push_back(-9999999); } void exit(int) override { fBuffer.push_back(-9999999); } void scope(int delta) override { fBuffer.push_back(delta); } TArray fBuffer; }; static_assert(SkRasterPipeline_kMaxStride_highp == 16); alignas(64) static constexpr int32_t kMaskOn [16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0}; alignas(64) static constexpr int32_t kMaskOff[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; TestTraceHook trace; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); const SkRasterPipeline_TraceScopeCtx kTraceScope1 = {/*traceMask=*/kMaskOn, &trace, +1}; const SkRasterPipeline_TraceScopeCtx kTraceScope2 = {/*traceMask=*/kMaskOff, &trace, -2}; const SkRasterPipeline_TraceScopeCtx kTraceScope3 = {/*traceMask=*/kMaskOff, &trace, +3}; const SkRasterPipeline_TraceScopeCtx kTraceScope4 = {/*traceMask=*/kMaskOn, &trace, +4}; const SkRasterPipeline_TraceScopeCtx kTraceScope5 = {/*traceMask=*/kMaskOn, &trace, -5}; p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_scope, &kTraceScope1); p.append(SkRasterPipelineOp::trace_scope, &kTraceScope2); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOff); p.append(SkRasterPipelineOp::trace_scope, &kTraceScope3); p.append(SkRasterPipelineOp::trace_scope, &kTraceScope4); p.append(SkRasterPipelineOp::load_condition_mask, kMaskOn); p.append(SkRasterPipelineOp::trace_scope, &kTraceScope5); p.run(0,0,N,1); REPORTER_ASSERT(r, (trace.fBuffer == TArray{+1, +4, -5})); } DEF_TEST(SkRasterPipeline_CopySlotsMasked, r) { // Allocate space for 5 source slots and 5 dest slots. alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp]; const int srcIndex = 0, dstIndex = 5; struct CopySlotsOp { SkRasterPipelineOp stage; int numSlotsAffected; }; static const CopySlotsOp kCopyOps[] = { {SkRasterPipelineOp::copy_slot_masked, 1}, {SkRasterPipelineOp::copy_2_slots_masked, 2}, {SkRasterPipelineOp::copy_3_slots_masked, 3}, {SkRasterPipelineOp::copy_4_slots_masked, 4}, }; static_assert(SkRasterPipeline_kMaxStride_highp == 16); alignas(64) const int32_t kMask1[16] = {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0}; alignas(64) const int32_t kMask2[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; alignas(64) const int32_t kMask3[16] = {~0, 0, ~0, ~0, ~0, ~0, 0, ~0, ~0, 0, ~0, ~0, ~0, ~0, 0, ~0}; alignas(64) const int32_t kMask4[16] = { 0, ~0, 0, 0, 0, ~0, ~0, 0, 0, ~0, 0, 0, 0, ~0, ~0, 0}; const int N = SkOpts::raster_pipeline_highp_stride; for (const CopySlotsOp& op : kCopyOps) { for (const int32_t* mask : {kMask1, kMask2, kMask3, kMask4}) { // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs std::iota(&slots[N * dstIndex], &slots[N * (dstIndex + 5)], 0); std::iota(&slots[N * srcIndex], &slots[N * (srcIndex + 5)], kLastSignalingNaN); // Run `copy_slots_masked` over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_BinaryOpCtx ctx; ctx.dst = N * dstIndex * sizeof(float); ctx.src = N * srcIndex * sizeof(float); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(SkRasterPipelineOp::load_condition_mask, mask); p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0,0,N,1); // Verify that the destination has been overwritten in the mask-on fields, and has not // been overwritten in the mask-off fields, for each destination slot. int expectedUnchanged = 0, expectedChanged = kLastSignalingNaN; int* destPtr = &slots[N * dstIndex]; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkMask = 0; checkMask < N; ++checkMask) { if (checkSlot < op.numSlotsAffected && mask[checkMask]) { REPORTER_ASSERT(r, *destPtr == expectedChanged); } else { REPORTER_ASSERT(r, *destPtr == expectedUnchanged); } ++destPtr; expectedUnchanged += 1; expectedChanged += 1; } } } } } DEF_TEST(SkRasterPipeline_CopySlotsUnmasked, r) { // Allocate space for 5 source slots and 5 dest slots. alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp]; const int srcIndex = 0, dstIndex = 5; const int N = SkOpts::raster_pipeline_highp_stride; struct CopySlotsOp { SkRasterPipelineOp stage; int numSlotsAffected; }; static const CopySlotsOp kCopyOps[] = { {SkRasterPipelineOp::copy_slot_unmasked, 1}, {SkRasterPipelineOp::copy_2_slots_unmasked, 2}, {SkRasterPipelineOp::copy_3_slots_unmasked, 3}, {SkRasterPipelineOp::copy_4_slots_unmasked, 4}, }; for (const CopySlotsOp& op : kCopyOps) { // Initialize the destination slots to 0,1,2.. and the source slots to various NaNs std::iota(&slots[N * dstIndex], &slots[N * (dstIndex + 5)], 0); std::iota(&slots[N * srcIndex], &slots[N * (srcIndex + 5)], kLastSignalingNaN); // Run `copy_slots_unmasked` over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_BinaryOpCtx ctx; ctx.dst = N * dstIndex * sizeof(float); ctx.src = N * srcIndex * sizeof(float); p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0,0,1,1); // Verify that the destination has been overwritten in each slot. int expectedUnchanged = 0, expectedChanged = kLastSignalingNaN; int* destPtr = &slots[N * dstIndex]; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < op.numSlotsAffected) { REPORTER_ASSERT(r, *destPtr == expectedChanged); } else { REPORTER_ASSERT(r, *destPtr == expectedUnchanged); } ++destPtr; expectedUnchanged += 1; expectedChanged += 1; } } } } DEF_TEST(SkRasterPipeline_CopyUniforms, r) { // Allocate space for 5 dest slots. alignas(64) int slots[5 * SkRasterPipeline_kMaxStride_highp]; int uniforms[5]; const int N = SkOpts::raster_pipeline_highp_stride; struct CopyUniformsOp { SkRasterPipelineOp stage; int numSlotsAffected; }; static const CopyUniformsOp kCopyOps[] = { {SkRasterPipelineOp::copy_uniform, 1}, {SkRasterPipelineOp::copy_2_uniforms, 2}, {SkRasterPipelineOp::copy_3_uniforms, 3}, {SkRasterPipelineOp::copy_4_uniforms, 4}, }; for (const CopyUniformsOp& op : kCopyOps) { // Initialize the destination slots to 1,2,3... std::iota(&slots[0], &slots[5 * N], 1); // Initialize the uniform buffer to various NaNs std::iota(&uniforms[0], &uniforms[5], kLastSignalingNaN); // Run `copy_n_uniforms` over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); auto* ctx = alloc.make(); ctx->dst = slots; ctx->src = uniforms; p.append(op.stage, ctx); p.run(0,0,1,1); // Verify that our uniforms have been broadcast into each slot. int expectedUnchanged = 1; int expectedChanged = kLastSignalingNaN; int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < op.numSlotsAffected) { REPORTER_ASSERT(r, *destPtr == expectedChanged); } else { REPORTER_ASSERT(r, *destPtr == expectedUnchanged); } ++destPtr; expectedUnchanged += 1; } expectedChanged += 1; } } } DEF_TEST(SkRasterPipeline_CopyConstant, r) { // Allocate space for 5 dest slots. alignas(64) int slots[5 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; for (int index = 0; index < 5; ++index) { // Initialize the destination slots to 1,2,3... std::iota(&slots[0], &slots[5 * N], 1); // Overwrite one destination slot with a constant (some NaN based on slot number). SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_ConstantCtx ctx; ctx.dst = N * index * sizeof(float); ctx.value = kLastSignalingNaN + index; p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(SkRasterPipelineOp::copy_constant, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0,0,1,1); // Verify that our constant value has been broadcast into exactly one slot. int expectedUnchanged = 1; int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot == index) { REPORTER_ASSERT(r, *destPtr == ctx.value); } else { REPORTER_ASSERT(r, *destPtr == expectedUnchanged); } ++destPtr; expectedUnchanged += 1; } } } } DEF_TEST(SkRasterPipeline_Swizzle, r) { // Allocate space for 4 dest slots. alignas(64) int slots[4 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct TestPattern { SkRasterPipelineOp stage; uint8_t swizzle[4]; uint8_t expectation[4]; }; static const TestPattern kPatterns[] = { {SkRasterPipelineOp::swizzle_1, {3}, {3, 1, 2, 3}}, // (1,2,3,4).w = (4) {SkRasterPipelineOp::swizzle_2, {1, 0}, {1, 0, 2, 3}}, // (1,2,3,4).yx = (2,1) {SkRasterPipelineOp::swizzle_3, {2, 2, 2}, {2, 2, 2, 3}}, // (1,2,3,4).zzz = (3,3,3) {SkRasterPipelineOp::swizzle_4, {0, 0, 1, 2}, {0, 0, 1, 2}}, // (1,2,3,4).xxyz = (1,1,2,3) }; static_assert(sizeof(TestPattern::swizzle) == sizeof(SkRasterPipeline_SwizzleCtx::offsets)); for (const TestPattern& pattern : kPatterns) { // Initialize the destination slots to various NaNs std::iota(&slots[0], &slots[4 * N], kLastSignalingNaN); // Apply the test-pattern swizzle. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_SwizzleCtx ctx; ctx.dst = 0; for (size_t index = 0; index < std::size(ctx.offsets); ++index) { ctx.offsets[index] = pattern.swizzle[index] * N * sizeof(float); } p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(pattern.stage, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0,0,1,1); // Verify that the swizzle has been applied in each slot. int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 4; ++checkSlot) { int expected = pattern.expectation[checkSlot] * N + kLastSignalingNaN; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *destPtr == expected); ++destPtr; expected += 1; } } } } DEF_TEST(SkRasterPipeline_SwizzleCopy, r) { const int N = SkOpts::raster_pipeline_highp_stride; struct TestPattern { SkRasterPipelineOp op; uint16_t swizzle[4]; uint16_t expectation[4]; }; constexpr uint16_t _ = ~0; static const TestPattern kPatterns[] = { {SkRasterPipelineOp::swizzle_copy_slot_masked, {3,_,_,_}, {_,_,_,0}},//v.w = (1) {SkRasterPipelineOp::swizzle_copy_2_slots_masked, {1,0,_,_}, {1,0,_,_}},//v.yx = (1,2) {SkRasterPipelineOp::swizzle_copy_3_slots_masked, {2,3,0,_}, {2,_,0,1}},//v.zwy = (1,2,3) {SkRasterPipelineOp::swizzle_copy_4_slots_masked, {3,0,1,2}, {1,2,3,0}},//v.wxyz = (1,2,3,4) }; static_assert(sizeof(TestPattern::swizzle) == sizeof(SkRasterPipeline_SwizzleCopyCtx::offsets)); for (const TestPattern& pattern : kPatterns) { // Allocate space for 4 dest slots, and initialize them to zero. alignas(64) int dest[4 * SkRasterPipeline_kMaxStride_highp] = {}; // Allocate 4 source slots and initialize them to various NaNs alignas(64) int source[4 * SkRasterPipeline_kMaxStride_highp] = {}; std::iota(&source[0 * N], &source[4 * N], kLastSignalingNaN); // Apply the dest-swizzle pattern. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; SkRasterPipeline_SwizzleCopyCtx ctx = {}; ctx.src = source; ctx.dst = dest; for (size_t index = 0; index < std::size(ctx.offsets); ++index) { if (pattern.swizzle[index] != _) { ctx.offsets[index] = pattern.swizzle[index] * N * sizeof(float); } } p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(pattern.op, &ctx); p.run(0,0,N,1); // Verify that the swizzle has been applied in each slot. int* destPtr = &dest[0]; for (int checkSlot = 0; checkSlot < 4; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (pattern.expectation[checkSlot] == _) { REPORTER_ASSERT(r, *destPtr == 0); } else { int expectedIdx = pattern.expectation[checkSlot] * N + checkLane; REPORTER_ASSERT(r, *destPtr == source[expectedIdx]); } ++destPtr; } } } } DEF_TEST(SkRasterPipeline_Shuffle, r) { // Allocate space for 16 dest slots. alignas(64) int slots[16 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct TestPattern { int count; uint16_t shuffle[16]; uint16_t expectation[16]; }; static const TestPattern kPatterns[] = { {9, { 0, 3, 6, 1, 4, 7, 2, 5, 8, /* past end: */ 0, 0, 0, 0, 0, 0, 0}, { 0, 3, 6, 1, 4, 7, 2, 5, 8, /* unchanged: */ 9, 10, 11, 12, 13, 14, 15}}, {16, { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}}, }; static_assert(sizeof(TestPattern::shuffle) == sizeof(SkRasterPipeline_ShuffleCtx::offsets)); for (const TestPattern& pattern : kPatterns) { // Initialize the destination slots to various NaNs std::iota(&slots[0], &slots[16 * N], kLastSignalingNaN); // Apply the shuffle. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_ShuffleCtx ctx; ctx.ptr = slots; ctx.count = pattern.count; for (size_t index = 0; index < std::size(ctx.offsets); ++index) { ctx.offsets[index] = pattern.shuffle[index] * N * sizeof(float); } p.append(SkRasterPipelineOp::shuffle, &ctx); p.run(0,0,1,1); // Verify that the shuffle has been applied in each slot. int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 16; ++checkSlot) { int expected = pattern.expectation[checkSlot] * N + kLastSignalingNaN; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *destPtr == expected); ++destPtr; expected += 1; } } } } DEF_TEST(SkRasterPipeline_MatrixMultiply2x2, reporter) { alignas(64) float slots[12 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; // Populate the left- and right-matrix data. Slots 0-3 hold the result and are left as-is. std::iota(&slots[4 * N], &slots[12 * N], 1.0f); // Perform a 2x2 matrix multiply. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_MatrixMultiplyCtx ctx; ctx.dst = 0; ctx.leftColumns = ctx.leftRows = ctx.rightColumns = ctx.rightRows = 2; p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(SkRasterPipelineOp::matrix_multiply_2, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0,0,1,1); // Verify that the result slots hold a 2x2 matrix multiply. const float* const destPtr[2][2] = { {&slots[0 * N], &slots[1 * N]}, {&slots[2 * N], &slots[3 * N]}, }; const float* const leftMtx[2][2] = { {&slots[4 * N], &slots[5 * N]}, {&slots[6 * N], &slots[7 * N]}, }; const float* const rightMtx[2][2] = { {&slots[8 * N], &slots[9 * N]}, {&slots[10 * N], &slots[11 * N]}, }; for (int c = 0; c < 2; ++c) { for (int r = 0; r < 2; ++r) { for (int lane = 0; lane < N; ++lane) { // Dot a vector from leftMtx[*][r] with rightMtx[c][*]. float dot = 0; for (int n = 0; n < 2; ++n) { dot += leftMtx[n][r][lane] * rightMtx[c][n][lane]; } REPORTER_ASSERT(reporter, destPtr[c][r][lane] == dot); } } } } DEF_TEST(SkRasterPipeline_MatrixMultiply3x3, reporter) { alignas(64) float slots[27 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; // Populate the left- and right-matrix data. Slots 0-8 hold the result and are left as-is. // To keep results in full-precision float range, we only set values between 0 and 25. float value = 0.0f; for (int idx = 9 * N; idx < 27 * N; ++idx) { slots[idx] = value; value = fmodf(value + 1.0f, 25.0f); } // Perform a 3x3 matrix multiply. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_MatrixMultiplyCtx ctx; ctx.dst = 0; ctx.leftColumns = ctx.leftRows = ctx.rightColumns = ctx.rightRows = 3; p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(SkRasterPipelineOp::matrix_multiply_3, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0,0,1,1); // Verify that the result slots hold a 3x3 matrix multiply. const float* const destPtr[3][3] = { {&slots[0 * N], &slots[1 * N], &slots[2 * N]}, {&slots[3 * N], &slots[4 * N], &slots[5 * N]}, {&slots[6 * N], &slots[7 * N], &slots[8 * N]}, }; const float* const leftMtx[3][3] = { {&slots[9 * N], &slots[10 * N], &slots[11 * N]}, {&slots[12 * N], &slots[13 * N], &slots[14 * N]}, {&slots[15 * N], &slots[16 * N], &slots[17 * N]}, }; const float* const rightMtx[3][3] = { {&slots[18 * N], &slots[19 * N], &slots[20 * N]}, {&slots[21 * N], &slots[22 * N], &slots[23 * N]}, {&slots[24 * N], &slots[25 * N], &slots[26 * N]}, }; for (int c = 0; c < 3; ++c) { for (int r = 0; r < 3; ++r) { for (int lane = 0; lane < N; ++lane) { // Dot a vector from leftMtx[*][r] with rightMtx[c][*]. float dot = 0; for (int n = 0; n < 3; ++n) { dot += leftMtx[n][r][lane] * rightMtx[c][n][lane]; } REPORTER_ASSERT(reporter, destPtr[c][r][lane] == dot); } } } } DEF_TEST(SkRasterPipeline_MatrixMultiply4x4, reporter) { alignas(64) float slots[48 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; // Populate the left- and right-matrix data. Slots 0-8 hold the result and are left as-is. // To keep results in full-precision float range, we only set values between 0 and 25. float value = 0.0f; for (int idx = 16 * N; idx < 48 * N; ++idx) { slots[idx] = value; value = fmodf(value + 1.0f, 25.0f); } // Perform a 4x4 matrix multiply. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_MatrixMultiplyCtx ctx; ctx.dst = 0; ctx.leftColumns = ctx.leftRows = ctx.rightColumns = ctx.rightRows = 4; p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(SkRasterPipelineOp::matrix_multiply_4, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0,0,1,1); // Verify that the result slots hold a 4x4 matrix multiply. const float* const destPtr[4][4] = { {&slots[0 * N], &slots[1 * N], &slots[2 * N], &slots[3 * N]}, {&slots[4 * N], &slots[5 * N], &slots[6 * N], &slots[7 * N]}, {&slots[8 * N], &slots[9 * N], &slots[10 * N], &slots[11 * N]}, {&slots[12 * N], &slots[13 * N], &slots[14 * N], &slots[15 * N]}, }; const float* const leftMtx[4][4] = { {&slots[16 * N], &slots[17 * N], &slots[18 * N], &slots[19 * N]}, {&slots[20 * N], &slots[21 * N], &slots[22 * N], &slots[23 * N]}, {&slots[24 * N], &slots[25 * N], &slots[26 * N], &slots[27 * N]}, {&slots[28 * N], &slots[29 * N], &slots[30 * N], &slots[31 * N]}, }; const float* const rightMtx[4][4] = { {&slots[32 * N], &slots[33 * N], &slots[34 * N], &slots[35 * N]}, {&slots[36 * N], &slots[37 * N], &slots[38 * N], &slots[39 * N]}, {&slots[40 * N], &slots[41 * N], &slots[42 * N], &slots[43 * N]}, {&slots[44 * N], &slots[45 * N], &slots[46 * N], &slots[47 * N]}, }; for (int c = 0; c < 4; ++c) { for (int r = 0; r < 4; ++r) { for (int lane = 0; lane < N; ++lane) { // Dot a vector from leftMtx[*][r] with rightMtx[c][*]. float dot = 0; for (int n = 0; n < 4; ++n) { dot += leftMtx[n][r][lane] * rightMtx[c][n][lane]; } REPORTER_ASSERT(reporter, destPtr[c][r][lane] == dot); } } } } DEF_TEST(SkRasterPipeline_FloatArithmeticWithNSlots, r) { // Allocate space for 5 dest and 5 source slots. alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct ArithmeticOp { SkRasterPipelineOp stage; std::function verify; }; static const ArithmeticOp kArithmeticOps[] = { {SkRasterPipelineOp::add_n_floats, [](float a, float b) { return a + b; }}, {SkRasterPipelineOp::sub_n_floats, [](float a, float b) { return a - b; }}, {SkRasterPipelineOp::mul_n_floats, [](float a, float b) { return a * b; }}, {SkRasterPipelineOp::div_n_floats, [](float a, float b) { return a / b; }}, }; for (const ArithmeticOp& op : kArithmeticOps) { for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) { // Initialize the slot values to 1,2,3... std::iota(&slots[0], &slots[10 * N], 1.0f); // Run the arithmetic op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_BinaryOpCtx ctx; ctx.dst = 0; ctx.src = numSlotsAffected * N * sizeof(float); p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0,0,1,1); // Verify that the affected slots now equal (1,2,3...) op (4,5,6...). float leftValue = 1.0f; float rightValue = float(numSlotsAffected * N) + 1.0f; float* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 10; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < numSlotsAffected) { REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue)); } else { REPORTER_ASSERT(r, *destPtr == leftValue); } ++destPtr; leftValue += 1.0f; rightValue += 1.0f; } } } } } DEF_TEST(SkRasterPipeline_FloatArithmeticWithHardcodedSlots, r) { // Allocate space for 5 dest and 5 source slots. alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct ArithmeticOp { SkRasterPipelineOp stage; int numSlotsAffected; std::function verify; }; static const ArithmeticOp kArithmeticOps[] = { {SkRasterPipelineOp::add_float, 1, [](float a, float b) { return a + b; }}, {SkRasterPipelineOp::sub_float, 1, [](float a, float b) { return a - b; }}, {SkRasterPipelineOp::mul_float, 1, [](float a, float b) { return a * b; }}, {SkRasterPipelineOp::div_float, 1, [](float a, float b) { return a / b; }}, {SkRasterPipelineOp::add_2_floats, 2, [](float a, float b) { return a + b; }}, {SkRasterPipelineOp::sub_2_floats, 2, [](float a, float b) { return a - b; }}, {SkRasterPipelineOp::mul_2_floats, 2, [](float a, float b) { return a * b; }}, {SkRasterPipelineOp::div_2_floats, 2, [](float a, float b) { return a / b; }}, {SkRasterPipelineOp::add_3_floats, 3, [](float a, float b) { return a + b; }}, {SkRasterPipelineOp::sub_3_floats, 3, [](float a, float b) { return a - b; }}, {SkRasterPipelineOp::mul_3_floats, 3, [](float a, float b) { return a * b; }}, {SkRasterPipelineOp::div_3_floats, 3, [](float a, float b) { return a / b; }}, {SkRasterPipelineOp::add_4_floats, 4, [](float a, float b) { return a + b; }}, {SkRasterPipelineOp::sub_4_floats, 4, [](float a, float b) { return a - b; }}, {SkRasterPipelineOp::mul_4_floats, 4, [](float a, float b) { return a * b; }}, {SkRasterPipelineOp::div_4_floats, 4, [](float a, float b) { return a / b; }}, }; for (const ArithmeticOp& op : kArithmeticOps) { // Initialize the slot values to 1,2,3... std::iota(&slots[0], &slots[10 * N], 1.0f); // Run the arithmetic op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(op.stage, &slots[0]); p.run(0,0,1,1); // Verify that the affected slots now equal (1,2,3...) op (4,5,6...). float leftValue = 1.0f; float rightValue = float(op.numSlotsAffected * N) + 1.0f; float* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 10; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < op.numSlotsAffected) { REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue)); } else { REPORTER_ASSERT(r, *destPtr == leftValue); } ++destPtr; leftValue += 1.0f; rightValue += 1.0f; } } } } static int divide_unsigned(int a, int b) { return int(uint32_t(a) / uint32_t(b)); } static int min_unsigned (int a, int b) { return uint32_t(a) < uint32_t(b) ? a : b; } static int max_unsigned (int a, int b) { return uint32_t(a) > uint32_t(b) ? a : b; } DEF_TEST(SkRasterPipeline_IntArithmeticWithNSlots, r) { // Allocate space for 5 dest and 5 source slots. alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct ArithmeticOp { SkRasterPipelineOp stage; std::function verify; }; static const ArithmeticOp kArithmeticOps[] = { {SkRasterPipelineOp::add_n_ints, [](int a, int b) { return a + b; }}, {SkRasterPipelineOp::sub_n_ints, [](int a, int b) { return a - b; }}, {SkRasterPipelineOp::mul_n_ints, [](int a, int b) { return a * b; }}, {SkRasterPipelineOp::div_n_ints, [](int a, int b) { return a / b; }}, {SkRasterPipelineOp::div_n_uints, divide_unsigned}, {SkRasterPipelineOp::bitwise_and_n_ints, [](int a, int b) { return a & b; }}, {SkRasterPipelineOp::bitwise_or_n_ints, [](int a, int b) { return a | b; }}, {SkRasterPipelineOp::bitwise_xor_n_ints, [](int a, int b) { return a ^ b; }}, {SkRasterPipelineOp::min_n_ints, [](int a, int b) { return a b ? a : b; }}, {SkRasterPipelineOp::max_n_uints, max_unsigned}, }; for (const ArithmeticOp& op : kArithmeticOps) { for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) { // Initialize the slot values to 1,2,3... std::iota(&slots[0], &slots[10 * N], 1); int leftValue = slots[0]; int rightValue = slots[numSlotsAffected * N]; // Run the op (e.g. `add_n_ints`) over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_BinaryOpCtx ctx; ctx.dst = 0; ctx.src = numSlotsAffected * N * sizeof(float); p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0,0,1,1); // Verify that the affected slots now equal (1,2,3...) op (4,5,6...). int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 10; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < numSlotsAffected) { REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue)); } else { REPORTER_ASSERT(r, *destPtr == leftValue); } ++destPtr; leftValue += 1; rightValue += 1; } } } } } DEF_TEST(SkRasterPipeline_IntArithmeticWithHardcodedSlots, r) { // Allocate space for 5 dest and 5 source slots. alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct ArithmeticOp { SkRasterPipelineOp stage; int numSlotsAffected; std::function verify; }; static const ArithmeticOp kArithmeticOps[] = { {SkRasterPipelineOp::add_int, 1, [](int a, int b) { return a + b; }}, {SkRasterPipelineOp::sub_int, 1, [](int a, int b) { return a - b; }}, {SkRasterPipelineOp::mul_int, 1, [](int a, int b) { return a * b; }}, {SkRasterPipelineOp::div_int, 1, [](int a, int b) { return a / b; }}, {SkRasterPipelineOp::div_uint, 1, divide_unsigned}, {SkRasterPipelineOp::bitwise_and_int, 1, [](int a, int b) { return a & b; }}, {SkRasterPipelineOp::bitwise_or_int, 1, [](int a, int b) { return a | b; }}, {SkRasterPipelineOp::bitwise_xor_int, 1, [](int a, int b) { return a ^ b; }}, {SkRasterPipelineOp::min_int, 1, [](int a, int b) { return a b ? a: b; }}, {SkRasterPipelineOp::max_uint, 1, max_unsigned}, {SkRasterPipelineOp::add_2_ints, 2, [](int a, int b) { return a + b; }}, {SkRasterPipelineOp::sub_2_ints, 2, [](int a, int b) { return a - b; }}, {SkRasterPipelineOp::mul_2_ints, 2, [](int a, int b) { return a * b; }}, {SkRasterPipelineOp::div_2_ints, 2, [](int a, int b) { return a / b; }}, {SkRasterPipelineOp::div_2_uints, 2, divide_unsigned}, {SkRasterPipelineOp::bitwise_and_2_ints, 2, [](int a, int b) { return a & b; }}, {SkRasterPipelineOp::bitwise_or_2_ints, 2, [](int a, int b) { return a | b; }}, {SkRasterPipelineOp::bitwise_xor_2_ints, 2, [](int a, int b) { return a ^ b; }}, {SkRasterPipelineOp::min_2_ints, 2, [](int a, int b) { return a b ? a: b; }}, {SkRasterPipelineOp::max_2_uints, 2, max_unsigned}, {SkRasterPipelineOp::add_3_ints, 3, [](int a, int b) { return a + b; }}, {SkRasterPipelineOp::sub_3_ints, 3, [](int a, int b) { return a - b; }}, {SkRasterPipelineOp::mul_3_ints, 3, [](int a, int b) { return a * b; }}, {SkRasterPipelineOp::div_3_ints, 3, [](int a, int b) { return a / b; }}, {SkRasterPipelineOp::div_3_uints, 3, divide_unsigned}, {SkRasterPipelineOp::bitwise_and_3_ints, 3, [](int a, int b) { return a & b; }}, {SkRasterPipelineOp::bitwise_or_3_ints, 3, [](int a, int b) { return a | b; }}, {SkRasterPipelineOp::bitwise_xor_3_ints, 3, [](int a, int b) { return a ^ b; }}, {SkRasterPipelineOp::min_3_ints, 3, [](int a, int b) { return a b ? a: b; }}, {SkRasterPipelineOp::max_3_uints, 3, max_unsigned}, {SkRasterPipelineOp::add_4_ints, 4, [](int a, int b) { return a + b; }}, {SkRasterPipelineOp::sub_4_ints, 4, [](int a, int b) { return a - b; }}, {SkRasterPipelineOp::mul_4_ints, 4, [](int a, int b) { return a * b; }}, {SkRasterPipelineOp::div_4_ints, 4, [](int a, int b) { return a / b; }}, {SkRasterPipelineOp::div_4_uints, 4, divide_unsigned}, {SkRasterPipelineOp::bitwise_and_4_ints, 4, [](int a, int b) { return a & b; }}, {SkRasterPipelineOp::bitwise_or_4_ints, 4, [](int a, int b) { return a | b; }}, {SkRasterPipelineOp::bitwise_xor_4_ints, 4, [](int a, int b) { return a ^ b; }}, {SkRasterPipelineOp::min_4_ints, 4, [](int a, int b) { return a b ? a: b; }}, {SkRasterPipelineOp::max_4_uints, 4, max_unsigned}, }; for (const ArithmeticOp& op : kArithmeticOps) { // Initialize the slot values to 1,2,3... std::iota(&slots[0], &slots[10 * N], 1); int leftValue = slots[0]; int rightValue = slots[op.numSlotsAffected * N]; // Run the op (e.g. `add_2_ints`) over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(op.stage, &slots[0]); p.run(0,0,1,1); // Verify that the affected slots now equal (1,2,3...) op (4,5,6...). int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 10; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < op.numSlotsAffected) { REPORTER_ASSERT(r, *destPtr == op.verify(leftValue, rightValue)); } else { REPORTER_ASSERT(r, *destPtr == leftValue); } ++destPtr; leftValue += 1; rightValue += 1; } } } } DEF_TEST(SkRasterPipeline_CompareFloatsWithNSlots, r) { // Allocate space for 5 dest and 5 source slots. alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct CompareOp { SkRasterPipelineOp stage; std::function verify; }; static const CompareOp kCompareOps[] = { {SkRasterPipelineOp::cmpeq_n_floats, [](float a, float b) { return a == b; }}, {SkRasterPipelineOp::cmpne_n_floats, [](float a, float b) { return a != b; }}, {SkRasterPipelineOp::cmplt_n_floats, [](float a, float b) { return a < b; }}, {SkRasterPipelineOp::cmple_n_floats, [](float a, float b) { return a <= b; }}, }; for (const CompareOp& op : kCompareOps) { for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) { // Initialize the slot values to 0,1,2,0,1,2,0,1,2... for (int index = 0; index < 10 * N; ++index) { slots[index] = std::fmod(index, 3.0f); } float leftValue = slots[0]; float rightValue = slots[numSlotsAffected * N]; // Run the comparison op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_BinaryOpCtx ctx; ctx.dst = 0; ctx.src = numSlotsAffected * N * sizeof(float); p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0, 0, 1, 1); // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)". float* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 10; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < numSlotsAffected) { bool compareIsTrue = op.verify(leftValue, rightValue); REPORTER_ASSERT(r, *(int*)destPtr == (compareIsTrue ? ~0 : 0)); } else { REPORTER_ASSERT(r, *destPtr == leftValue); } ++destPtr; leftValue = std::fmod(leftValue + 1.0f, 3.0f); rightValue = std::fmod(rightValue + 1.0f, 3.0f); } } } } } DEF_TEST(SkRasterPipeline_CompareFloatsWithHardcodedSlots, r) { // Allocate space for 5 dest and 5 source slots. alignas(64) float slots[10 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct CompareOp { SkRasterPipelineOp stage; int numSlotsAffected; std::function verify; }; static const CompareOp kCompareOps[] = { {SkRasterPipelineOp::cmpeq_float, 1, [](float a, float b) { return a == b; }}, {SkRasterPipelineOp::cmpne_float, 1, [](float a, float b) { return a != b; }}, {SkRasterPipelineOp::cmplt_float, 1, [](float a, float b) { return a < b; }}, {SkRasterPipelineOp::cmple_float, 1, [](float a, float b) { return a <= b; }}, {SkRasterPipelineOp::cmpeq_2_floats, 2, [](float a, float b) { return a == b; }}, {SkRasterPipelineOp::cmpne_2_floats, 2, [](float a, float b) { return a != b; }}, {SkRasterPipelineOp::cmplt_2_floats, 2, [](float a, float b) { return a < b; }}, {SkRasterPipelineOp::cmple_2_floats, 2, [](float a, float b) { return a <= b; }}, {SkRasterPipelineOp::cmpeq_3_floats, 3, [](float a, float b) { return a == b; }}, {SkRasterPipelineOp::cmpne_3_floats, 3, [](float a, float b) { return a != b; }}, {SkRasterPipelineOp::cmplt_3_floats, 3, [](float a, float b) { return a < b; }}, {SkRasterPipelineOp::cmple_3_floats, 3, [](float a, float b) { return a <= b; }}, {SkRasterPipelineOp::cmpeq_4_floats, 4, [](float a, float b) { return a == b; }}, {SkRasterPipelineOp::cmpne_4_floats, 4, [](float a, float b) { return a != b; }}, {SkRasterPipelineOp::cmplt_4_floats, 4, [](float a, float b) { return a < b; }}, {SkRasterPipelineOp::cmple_4_floats, 4, [](float a, float b) { return a <= b; }}, }; for (const CompareOp& op : kCompareOps) { // Initialize the slot values to 0,1,2,0,1,2,0,1,2... for (int index = 0; index < 10 * N; ++index) { slots[index] = std::fmod(index, 3.0f); } float leftValue = slots[0]; float rightValue = slots[op.numSlotsAffected * N]; // Run the comparison op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(op.stage, &slots[0]); p.run(0, 0, 1, 1); // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)". float* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 10; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < op.numSlotsAffected) { bool compareIsTrue = op.verify(leftValue, rightValue); REPORTER_ASSERT(r, *(int*)destPtr == (compareIsTrue ? ~0 : 0)); } else { REPORTER_ASSERT(r, *destPtr == leftValue); } ++destPtr; leftValue = std::fmod(leftValue + 1.0f, 3.0f); rightValue = std::fmod(rightValue + 1.0f, 3.0f); } } } } static bool compare_lt_uint (int a, int b) { return uint32_t(a) < uint32_t(b); } static bool compare_lteq_uint(int a, int b) { return uint32_t(a) <= uint32_t(b); } DEF_TEST(SkRasterPipeline_CompareIntsWithNSlots, r) { // Allocate space for 5 dest and 5 source slots. alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct CompareOp { SkRasterPipelineOp stage; std::function verify; }; static const CompareOp kCompareOps[] = { {SkRasterPipelineOp::cmpeq_n_ints, [](int a, int b) { return a == b; }}, {SkRasterPipelineOp::cmpne_n_ints, [](int a, int b) { return a != b; }}, {SkRasterPipelineOp::cmplt_n_ints, [](int a, int b) { return a < b; }}, {SkRasterPipelineOp::cmple_n_ints, [](int a, int b) { return a <= b; }}, {SkRasterPipelineOp::cmplt_n_uints, compare_lt_uint}, {SkRasterPipelineOp::cmple_n_uints, compare_lteq_uint}, }; for (const CompareOp& op : kCompareOps) { for (int numSlotsAffected = 1; numSlotsAffected <= 5; ++numSlotsAffected) { // Initialize the slot values to -1,0,1,-1,0,1,-1,0,1,-1... for (int index = 0; index < 10 * N; ++index) { slots[index] = (index % 3) - 1; } int leftValue = slots[0]; int rightValue = slots[numSlotsAffected * N]; // Run the comparison op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_BinaryOpCtx ctx; ctx.dst = 0; ctx.src = sizeof(float) * numSlotsAffected * N; p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); p.append(op.stage, SkRPCtxUtils::Pack(ctx, &alloc)); p.run(0, 0, 1, 1); // Verify that the affected slots now contain "(-1,0,1,-1...) op (0,1,-1,0...)". int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 10; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < numSlotsAffected) { bool compareIsTrue = op.verify(leftValue, rightValue); REPORTER_ASSERT(r, *destPtr == (compareIsTrue ? ~0 : 0)); } else { REPORTER_ASSERT(r, *destPtr == leftValue); } ++destPtr; if (++leftValue == 2) { leftValue = -1; } if (++rightValue == 2) { rightValue = -1; } } } } } } DEF_TEST(SkRasterPipeline_CompareIntsWithHardcodedSlots, r) { // Allocate space for 5 dest and 5 source slots. alignas(64) int slots[10 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct CompareOp { SkRasterPipelineOp stage; int numSlotsAffected; std::function verify; }; static const CompareOp kCompareOps[] = { {SkRasterPipelineOp::cmpeq_int, 1, [](int a, int b) { return a == b; }}, {SkRasterPipelineOp::cmpne_int, 1, [](int a, int b) { return a != b; }}, {SkRasterPipelineOp::cmplt_int, 1, [](int a, int b) { return a < b; }}, {SkRasterPipelineOp::cmple_int, 1, [](int a, int b) { return a <= b; }}, {SkRasterPipelineOp::cmplt_uint, 1, compare_lt_uint}, {SkRasterPipelineOp::cmple_uint, 1, compare_lteq_uint}, {SkRasterPipelineOp::cmpeq_2_ints, 2, [](int a, int b) { return a == b; }}, {SkRasterPipelineOp::cmpne_2_ints, 2, [](int a, int b) { return a != b; }}, {SkRasterPipelineOp::cmplt_2_ints, 2, [](int a, int b) { return a < b; }}, {SkRasterPipelineOp::cmple_2_ints, 2, [](int a, int b) { return a <= b; }}, {SkRasterPipelineOp::cmplt_2_uints, 2, compare_lt_uint}, {SkRasterPipelineOp::cmple_2_uints, 2, compare_lteq_uint}, {SkRasterPipelineOp::cmpeq_3_ints, 3, [](int a, int b) { return a == b; }}, {SkRasterPipelineOp::cmpne_3_ints, 3, [](int a, int b) { return a != b; }}, {SkRasterPipelineOp::cmplt_3_ints, 3, [](int a, int b) { return a < b; }}, {SkRasterPipelineOp::cmple_3_ints, 3, [](int a, int b) { return a <= b; }}, {SkRasterPipelineOp::cmplt_3_uints, 3, compare_lt_uint}, {SkRasterPipelineOp::cmple_3_uints, 3, compare_lteq_uint}, {SkRasterPipelineOp::cmpeq_4_ints, 4, [](int a, int b) { return a == b; }}, {SkRasterPipelineOp::cmpne_4_ints, 4, [](int a, int b) { return a != b; }}, {SkRasterPipelineOp::cmplt_4_ints, 4, [](int a, int b) { return a < b; }}, {SkRasterPipelineOp::cmple_4_ints, 4, [](int a, int b) { return a <= b; }}, {SkRasterPipelineOp::cmplt_4_uints, 4, compare_lt_uint}, {SkRasterPipelineOp::cmple_4_uints, 4, compare_lteq_uint}, }; for (const CompareOp& op : kCompareOps) { // Initialize the slot values to -1,0,1,-1,0,1,-1,0,1,-1... for (int index = 0; index < 10 * N; ++index) { slots[index] = (index % 3) - 1; } int leftValue = slots[0]; int rightValue = slots[op.numSlotsAffected * N]; // Run the comparison op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(op.stage, &slots[0]); p.run(0, 0, 1, 1); // Verify that the affected slots now contain "(0,1,2,0...) op (1,2,0,1...)". int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 10; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < op.numSlotsAffected) { bool compareIsTrue = op.verify(leftValue, rightValue); REPORTER_ASSERT(r, *destPtr == (compareIsTrue ? ~0 : 0)); } else { REPORTER_ASSERT(r, *destPtr == leftValue); } ++destPtr; if (++leftValue == 2) { leftValue = -1; } if (++rightValue == 2) { rightValue = -1; } } } } } static int to_float(int a) { return sk_bit_cast((float)a); } DEF_TEST(SkRasterPipeline_UnaryIntOps, r) { // Allocate space for 5 slots. alignas(64) int slots[5 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct UnaryOp { SkRasterPipelineOp stage; int numSlotsAffected; std::function verify; }; static const UnaryOp kUnaryOps[] = { {SkRasterPipelineOp::cast_to_float_from_int, 1, to_float}, {SkRasterPipelineOp::cast_to_float_from_2_ints, 2, to_float}, {SkRasterPipelineOp::cast_to_float_from_3_ints, 3, to_float}, {SkRasterPipelineOp::cast_to_float_from_4_ints, 4, to_float}, {SkRasterPipelineOp::abs_int, 1, [](int a) { return a < 0 ? -a : a; }}, {SkRasterPipelineOp::abs_2_ints, 2, [](int a) { return a < 0 ? -a : a; }}, {SkRasterPipelineOp::abs_3_ints, 3, [](int a) { return a < 0 ? -a : a; }}, {SkRasterPipelineOp::abs_4_ints, 4, [](int a) { return a < 0 ? -a : a; }}, }; for (const UnaryOp& op : kUnaryOps) { // Initialize the slot values to -10,-9,-8... std::iota(&slots[0], &slots[5 * N], -10); int inputValue = slots[0]; // Run the unary op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(op.stage, &slots[0]); p.run(0, 0, 1, 1); // Verify that the destination slots have been updated. int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < op.numSlotsAffected) { int expected = op.verify(inputValue); REPORTER_ASSERT(r, *destPtr == expected); } else { REPORTER_ASSERT(r, *destPtr == inputValue); } ++destPtr; ++inputValue; } } } } static float to_int(float a) { return sk_bit_cast((int)a); } static float to_uint(float a) { return sk_bit_cast((unsigned int)a); } DEF_TEST(SkRasterPipeline_UnaryFloatOps, r) { // Allocate space for 5 slots. alignas(64) float slots[5 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct UnaryOp { SkRasterPipelineOp stage; int numSlotsAffected; std::function verify; }; static const UnaryOp kUnaryOps[] = { {SkRasterPipelineOp::cast_to_int_from_float, 1, to_int}, {SkRasterPipelineOp::cast_to_int_from_2_floats, 2, to_int}, {SkRasterPipelineOp::cast_to_int_from_3_floats, 3, to_int}, {SkRasterPipelineOp::cast_to_int_from_4_floats, 4, to_int}, {SkRasterPipelineOp::cast_to_uint_from_float, 1, to_uint}, {SkRasterPipelineOp::cast_to_uint_from_2_floats, 2, to_uint}, {SkRasterPipelineOp::cast_to_uint_from_3_floats, 3, to_uint}, {SkRasterPipelineOp::cast_to_uint_from_4_floats, 4, to_uint}, {SkRasterPipelineOp::floor_float, 1, [](float a) { return floorf(a); }}, {SkRasterPipelineOp::floor_2_floats, 2, [](float a) { return floorf(a); }}, {SkRasterPipelineOp::floor_3_floats, 3, [](float a) { return floorf(a); }}, {SkRasterPipelineOp::floor_4_floats, 4, [](float a) { return floorf(a); }}, {SkRasterPipelineOp::ceil_float, 1, [](float a) { return ceilf(a); }}, {SkRasterPipelineOp::ceil_2_floats, 2, [](float a) { return ceilf(a); }}, {SkRasterPipelineOp::ceil_3_floats, 3, [](float a) { return ceilf(a); }}, {SkRasterPipelineOp::ceil_4_floats, 4, [](float a) { return ceilf(a); }}, }; for (const UnaryOp& op : kUnaryOps) { // The result of some ops are undefined with negative inputs, so only test positive values. bool positiveOnly = (op.stage == SkRasterPipelineOp::cast_to_uint_from_float || op.stage == SkRasterPipelineOp::cast_to_uint_from_2_floats || op.stage == SkRasterPipelineOp::cast_to_uint_from_3_floats || op.stage == SkRasterPipelineOp::cast_to_uint_from_4_floats); float iotaStart = positiveOnly ? 1.0f : -9.75f; std::iota(&slots[0], &slots[5 * N], iotaStart); float inputValue = slots[0]; // Run the unary op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(op.stage, &slots[0]); p.run(0, 0, 1, 1); // Verify that the destination slots have been updated. float* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 5; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { if (checkSlot < op.numSlotsAffected) { float expected = op.verify(inputValue); // The casting tests can generate NaN, depending on the input value, so a value // match (via ==) might not succeed. // The ceil tests can generate negative zeros _sometimes_, depending on the // exact implementation of ceil(), so a bitwise match might not succeed. // Because of this, we allow either a value match or a bitwise match. bool bitwiseMatch = (0 == memcmp(destPtr, &expected, sizeof(float))); bool valueMatch = (*destPtr == expected); REPORTER_ASSERT(r, valueMatch || bitwiseMatch); } else { REPORTER_ASSERT(r, *destPtr == inputValue); } ++destPtr; ++inputValue; } } } } static float to_mix_weight(float value) { // Convert a positive value to a mix-weight (a number between 0 and 1). value /= 16.0f; return value - std::floor(value); } DEF_TEST(SkRasterPipeline_MixTest, r) { // Allocate space for 5 dest and 10 source slots. alignas(64) float slots[15 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct MixOp { int numSlotsAffected; std::function append; }; static const MixOp kMixOps[] = { {1, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { p->append(SkRasterPipelineOp::mix_float, slots); }}, {2, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { p->append(SkRasterPipelineOp::mix_2_floats, slots); }}, {3, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { p->append(SkRasterPipelineOp::mix_3_floats, slots); }}, {4, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { p->append(SkRasterPipelineOp::mix_4_floats, slots); }}, {5, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { SkRasterPipeline_TernaryOpCtx ctx; ctx.dst = 0; ctx.delta = 5 * N * sizeof(float); p->append(SkRasterPipelineOp::mix_n_floats, SkRPCtxUtils::Pack(ctx, alloc)); }}, }; for (const MixOp& op : kMixOps) { // Initialize the values to 1,2,3... std::iota(&slots[0], &slots[15 * N], 1.0f); float weightValue = slots[0]; float fromValue = slots[1 * op.numSlotsAffected * N]; float toValue = slots[2 * op.numSlotsAffected * N]; // The first group of values (the weights) must be between zero and one. for (int idx = 0; idx < 1 * op.numSlotsAffected * N; ++idx) { slots[idx] = to_mix_weight(slots[idx]); } // Run the mix op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); op.append(&p, &alloc); p.run(0,0,1,1); // Verify that the affected slots now equal mix({0.25, 0.3125...}, {3,4...}, {5,6...}, ). float* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < op.numSlotsAffected; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { float checkValue = (toValue - fromValue) * to_mix_weight(weightValue) + fromValue; REPORTER_ASSERT(r, *destPtr == checkValue); ++destPtr; fromValue += 1.0f; toValue += 1.0f; weightValue += 1.0f; } } } } DEF_TEST(SkRasterPipeline_MixIntTest, r) { // Allocate space for 5 dest and 10 source slots. alignas(64) int slots[15 * SkRasterPipeline_kMaxStride_highp]; const int N = SkOpts::raster_pipeline_highp_stride; struct MixOp { int numSlotsAffected; std::function append; }; static const MixOp kMixOps[] = { {1, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { p->append(SkRasterPipelineOp::mix_int, slots); }}, {2, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { p->append(SkRasterPipelineOp::mix_2_ints, slots); }}, {3, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { p->append(SkRasterPipelineOp::mix_3_ints, slots); }}, {4, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { p->append(SkRasterPipelineOp::mix_4_ints, slots); }}, {5, [&](SkRasterPipeline* p, SkArenaAlloc* alloc) { SkRasterPipeline_TernaryOpCtx ctx; ctx.dst = 0; ctx.delta = 5 * N * sizeof(int); p->append(SkRasterPipelineOp::mix_n_ints, SkRPCtxUtils::Pack(ctx, alloc)); }}, }; for (const MixOp& op : kMixOps) { // Initialize the selector ("weight") values to alternating masks for (int idx = 0; idx < 1 * op.numSlotsAffected * N; ++idx) { slots[idx] = (idx & 1) ? ~0 : 0; } // Initialize the other values to various NaNs std::iota(&slots[1 * op.numSlotsAffected * N], &slots[15 * N], kLastSignalingNaN); int weightValue = slots[0]; int fromValue = slots[1 * op.numSlotsAffected * N]; int toValue = slots[2 * op.numSlotsAffected * N]; // Run the mix op over our data. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::set_base_pointer, &slots[0]); op.append(&p, &alloc); p.run(0,0,1,1); // Verify that the affected slots now equal either fromValue or toValue, correctly int* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < op.numSlotsAffected; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { int checkValue = weightValue ? toValue : fromValue; REPORTER_ASSERT(r, *destPtr == checkValue); ++destPtr; fromValue += 1; toValue += 1; weightValue = ~weightValue; } } } } DEF_TEST(SkRasterPipeline_Jump, r) { // Allocate space for 4 slots. alignas(64) float slots[4 * SkRasterPipeline_kMaxStride_highp] = {}; const int N = SkOpts::raster_pipeline_highp_stride; alignas(64) static constexpr float kColorDarkRed[4] = {0.5f, 0.0f, 0.0f, 0.75f}; alignas(64) static constexpr float kColorGreen[4] = {0.0f, 1.0f, 0.0f, 1.0f}; const int offset = 2; // Make a program which jumps over an appendConstantColor op. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.appendConstantColor(&alloc, kColorGreen); // assign green p.append(SkRasterPipelineOp::jump, &offset); // jump over the dark-red color assignment p.appendConstantColor(&alloc, kColorDarkRed); // (not executed) p.append(SkRasterPipelineOp::store_src, slots); // store the result so we can check it p.run(0,0,1,1); // Verify that the slots contain green. float* destPtr = &slots[0]; for (int checkSlot = 0; checkSlot < 4; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *destPtr == kColorGreen[checkSlot]); ++destPtr; } } } DEF_TEST(SkRasterPipeline_ExchangeSrc, r) { const int N = SkOpts::raster_pipeline_highp_stride; alignas(64) int registerValue[4 * SkRasterPipeline_kMaxStride_highp] = {}; alignas(64) int exchangeValue[4 * SkRasterPipeline_kMaxStride_highp] = {}; std::iota(®isterValue[0], ®isterValue[4 * N], kLastSignalingNaN); std::iota(&exchangeValue[0], &exchangeValue[4 * N], kLastSignalingNegNaN); // This program should swap the contents of `registerValue` and `exchangeValue`. SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::load_src, registerValue); p.append(SkRasterPipelineOp::exchange_src, exchangeValue); p.append(SkRasterPipelineOp::store_src, registerValue); p.run(0,0,N,1); int* registerPtr = ®isterValue[0]; int* exchangePtr = &exchangeValue[0]; int expectedRegister = kLastSignalingNegNaN, expectedExchange = kLastSignalingNaN; for (int checkSlot = 0; checkSlot < 4; ++checkSlot) { for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *registerPtr++ == expectedRegister); REPORTER_ASSERT(r, *exchangePtr++ == expectedExchange); expectedRegister += 1; expectedExchange += 1; } } } DEF_TEST(SkRasterPipeline_BranchIfAllLanesActive, r) { const int N = SkOpts::raster_pipeline_highp_stride; SkRasterPipeline_BranchIfAllLanesActiveCtx ctx; ctx.offset = 2; // The branch should be taken when lane masks are all-on. { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::branch_if_all_lanes_active, &ctx); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ == 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } // The branch should not be taken when lane masks are all-off. { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); alignas(64) constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {}; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::load_src, kNoLanesActive); p.append(SkRasterPipelineOp::branch_if_all_lanes_active, &ctx); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ != 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } // The branch should not be taken when lane masks are partially-on. if (N > 1) { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); // An array of ~0s, except for a single zero in the last A slot. alignas(64) int32_t oneLaneInactive[4 * SkRasterPipeline_kMaxStride_highp] = {}; std::fill(oneLaneInactive, &oneLaneInactive[4*N], ~0); oneLaneInactive[4*N - 1] = 0; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::load_src, oneLaneInactive); p.append(SkRasterPipelineOp::branch_if_all_lanes_active, &ctx); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ != 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } } DEF_TEST(SkRasterPipeline_BranchIfAnyLanesActive, r) { const int N = SkOpts::raster_pipeline_highp_stride; SkRasterPipeline_BranchCtx ctx; ctx.offset = 2; // The branch should be taken when lane masks are all-on. { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::branch_if_any_lanes_active, &ctx); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ == 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } // The branch should not be taken when lane masks are all-off. { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); alignas(64) constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {}; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::load_src, kNoLanesActive); p.append(SkRasterPipelineOp::branch_if_any_lanes_active, &ctx); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ != 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } // The branch should be taken when lane masks are partially-on. if (N > 1) { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); // An array of all zeros, except for a single ~0 in the last A slot. alignas(64) int32_t oneLaneActive[4 * SkRasterPipeline_kMaxStride_highp] = {}; oneLaneActive[4*N - 1] = ~0; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::load_src, oneLaneActive); p.append(SkRasterPipelineOp::branch_if_any_lanes_active, &ctx); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ == 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } } DEF_TEST(SkRasterPipeline_BranchIfNoLanesActive, r) { const int N = SkOpts::raster_pipeline_highp_stride; SkRasterPipeline_BranchCtx ctx; ctx.offset = 2; // The branch should not be taken when lane masks are all-on. { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::branch_if_no_lanes_active, &ctx); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ != 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } // The branch should be taken when lane masks are all-off. { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); alignas(64) constexpr int32_t kNoLanesActive[4 * SkRasterPipeline_kMaxStride_highp] = {}; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::load_src, kNoLanesActive); p.append(SkRasterPipelineOp::branch_if_no_lanes_active, &ctx); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ == 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } // The branch should not be taken when lane masks are partially-on. if (N > 1) { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); // An array of all zeros, except for a single ~0 in the last A slot. alignas(64) int32_t oneLaneActive[4 * SkRasterPipeline_kMaxStride_highp] = {}; oneLaneActive[4*N - 1] = ~0; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::load_src, oneLaneActive); p.append(SkRasterPipelineOp::branch_if_no_lanes_active, &ctx); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ != 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } } DEF_TEST(SkRasterPipeline_BranchIfActiveLanesEqual, r) { // Allocate space for 4 slots. const int N = SkOpts::raster_pipeline_highp_stride; // An array of all 6s. alignas(64) int allSixes[SkRasterPipeline_kMaxStride_highp] = {}; std::fill(std::begin(allSixes), std::end(allSixes), 6); // An array of all 6s, except for a single 5 in one lane. alignas(64) int mostlySixesWithOneFive[SkRasterPipeline_kMaxStride_highp] = {}; std::fill(std::begin(mostlySixesWithOneFive), std::end(mostlySixesWithOneFive), 6); mostlySixesWithOneFive[N - 1] = 5; SkRasterPipeline_BranchIfEqualCtx matching; // comparing all-six vs five will match matching.offset = 2; matching.value = 5; matching.ptr = allSixes; SkRasterPipeline_BranchIfEqualCtx nonmatching; // comparing mostly-six vs five won't match nonmatching.offset = 2; nonmatching.value = 5; nonmatching.ptr = mostlySixesWithOneFive; // The branch should be taken when lane masks are all-on and we're checking 6 ≠ 5. { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &matching); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ == 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } // The branch should not be taken when lane masks are all-on and we're checking 5 ≠ 5 { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); SkRasterPipeline_InitLaneMasksCtx initLaneMasksCtx; p.append(SkRasterPipelineOp::init_lane_masks, &initLaneMasksCtx); p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &nonmatching); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ != 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } // The branch should be taken when the 5 = 5 lane is dead. if (N > 1) { alignas(64) int32_t first [SkRasterPipeline_kMaxStride_highp]; alignas(64) int32_t second[SkRasterPipeline_kMaxStride_highp]; std::fill(&first [0], &first [N], 0x12345678); std::fill(&second[0], &second[N], 0x12345678); // An execution mask with all lanes on except for the five-lane. alignas(64) int mask[4 * SkRasterPipeline_kMaxStride_highp] = {}; std::fill(std::begin(mask), std::end(mask), ~0); mask[4*N - 1] = 0; SkArenaAlloc alloc(/*firstHeapAllocation=*/256); SkRasterPipeline p(&alloc); p.append(SkRasterPipelineOp::load_src, mask); p.append(SkRasterPipelineOp::branch_if_no_active_lanes_eq, &nonmatching); p.append(SkRasterPipelineOp::store_src_a, first); p.append(SkRasterPipelineOp::store_src_a, second); p.run(0,0,N,1); int32_t* firstPtr = first; int32_t* secondPtr = second; for (int checkLane = 0; checkLane < N; ++checkLane) { REPORTER_ASSERT(r, *firstPtr++ == 0x12345678); REPORTER_ASSERT(r, *secondPtr++ != 0x12345678); } } } DEF_TEST(SkRasterPipeline_empty, r) { // No asserts... just a test that this is safe to run. SkRasterPipeline_<256> p; p.run(0,0,20,1); } DEF_TEST(SkRasterPipeline_nonsense, r) { // No asserts... just a test that this is safe to run and terminates. // srcover() calls st->next(); this makes sure we've always got something there to call. SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::srcover); p.run(0,0,20,1); } DEF_TEST(SkRasterPipeline_JIT, r) { // This tests a couple odd corners that a JIT backend can stumble over. uint32_t buf[72] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; SkRasterPipeline_MemoryCtx src = { buf + 0, 0 }, dst = { buf + 36, 0 }; // Copy buf[x] to buf[x+36] for x in [15,35). SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_8888, &src); p.append(SkRasterPipelineOp::store_8888, &dst); p.run(15,0, 20,1); for (int i = 0; i < 36; i++) { if (i < 15 || i == 35) { REPORTER_ASSERT(r, buf[i+36] == 0); } else { REPORTER_ASSERT(r, buf[i+36] == (uint32_t)(i - 11)); } } } static uint16_t h(float f) { // Remember, a float is 1-8-23 (sign-exponent-mantissa) with 127 exponent bias. uint32_t sem; memcpy(&sem, &f, sizeof(sem)); uint32_t s = sem & 0x80000000, em = sem ^ s; // Convert to 1-5-10 half with 15 bias, flushing denorm halfs (including zero) to zero. auto denorm = (int32_t)em < 0x38800000; // I32 comparison is often quicker, and always safe // here. return denorm ? SkTo(0) : SkTo((s>>16) + (em>>13) - ((127-15)<<10)); } DEF_TEST(SkRasterPipeline_tail, r) { { float data[][4] = { {00, 01, 02, 03}, {10, 11, 12, 13}, {20, 21, 22, 23}, {30, 31, 32, 33}, }; float buffer[4][4]; SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 }, dst = { &buffer[0][0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xff, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_f32, &src); p.append(SkRasterPipelineOp::store_f32, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { for (unsigned k = 0; k < 4; k++) { if (buffer[j][k] != data[j][k]) { ERRORF(r, "(%u, %u) - a: %g r: %g\n", j, k, data[j][k], buffer[j][k]); } } } for (int j = i; j < 4; j++) { for (auto f : buffer[j]) { REPORTER_ASSERT(r, SkIsNaN(f)); } } } } { alignas(8) uint16_t data[][4] = { {h(00), h(01), h(02), h(03)}, {h(10), h(11), h(12), h(13)}, {h(20), h(21), h(22), h(23)}, {h(30), h(31), h(32), h(33)}, }; alignas(8) uint16_t buffer[4][4]; SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 }, dst = { &buffer[0][0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xff, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_f16, &src); p.append(SkRasterPipelineOp::store_f16, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { for (int k = 0; k < 4; k++) { REPORTER_ASSERT(r, buffer[j][k] == data[j][k]); } } for (int j = i; j < 4; j++) { for (auto f : buffer[j]) { REPORTER_ASSERT(r, f == 0xffff); } } } } { alignas(8) uint16_t data[]= { h(00), h(10), h(20), h(30), }; alignas(8) uint16_t buffer[4][4]; SkRasterPipeline_MemoryCtx src = { &data[0], 0 }, dst = { &buffer[0][0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xff, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_af16, &src); p.append(SkRasterPipelineOp::store_f16, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { uint16_t expected[] = {0, 0, 0, data[j]}; REPORTER_ASSERT(r, !memcmp(expected, &buffer[j][0], sizeof(buffer[j]))); } for (int j = i; j < 4; j++) { for (auto f : buffer[j]) { REPORTER_ASSERT(r, f == 0xffff); } } } } { alignas(8) uint16_t data[][4] = { {h(00), h(01), h(02), h(03)}, {h(10), h(11), h(12), h(13)}, {h(20), h(21), h(22), h(23)}, {h(30), h(31), h(32), h(33)}, }; alignas(8) uint16_t buffer[4]; SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 }, dst = { &buffer[0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xff, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_f16, &src); p.append(SkRasterPipelineOp::store_af16, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { REPORTER_ASSERT(r, !memcmp(&data[j][3], &buffer[j], sizeof(buffer[j]))); } for (int j = i; j < 4; j++) { REPORTER_ASSERT(r, buffer[j] == 0xffff); } } } { alignas(8) uint16_t data[][4] = { {h(00), h(01), h(02), h(03)}, {h(10), h(11), h(12), h(13)}, {h(20), h(21), h(22), h(23)}, {h(30), h(31), h(32), h(33)}, }; alignas(8) uint16_t buffer[4][2]; SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 }, dst = { &buffer[0][0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xff, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_f16, &src); p.append(SkRasterPipelineOp::store_rgf16, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { REPORTER_ASSERT(r, !memcmp(&buffer[j], &data[j], 2 * sizeof(uint16_t))); } for (int j = i; j < 4; j++) { for (auto h : buffer[j]) { REPORTER_ASSERT(r, h == 0xffff); } } } } { alignas(8) uint16_t data[][2] = { {h(00), h(01)}, {h(10), h(11)}, {h(20), h(21)}, {h(30), h(31)}, }; alignas(8) uint16_t buffer[4][4]; SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 }, dst = { &buffer[0][0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xff, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_rgf16, &src); p.append(SkRasterPipelineOp::store_f16, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { uint16_t expected[] = {data[j][0], data[j][1], h(0), h(1)}; REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected))); } for (int j = i; j < 4; j++) { for (auto h : buffer[j]) { REPORTER_ASSERT(r, h == 0xffff); } } } } } DEF_TEST(SkRasterPipeline_u16, r) { { alignas(8) uint16_t data[][2] = { {0x0000, 0x0111}, {0x1010, 0x1111}, {0x2020, 0x2121}, {0x3030, 0x3131}, }; uint8_t buffer[4][4]; SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 }, dst = { &buffer[0][0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xab, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_rg1616, &src); p.append(SkRasterPipelineOp::store_8888, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { uint8_t expected[] = { SkToU8(data[j][0] >> 8), SkToU8(data[j][1] >> 8), 000, 0xff }; REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected))); } for (int j = i; j < 4; j++) { for (auto b : buffer[j]) { REPORTER_ASSERT(r, b == 0xab); } } } } { alignas(8) uint16_t data[] = { 0x0000, 0x1010, 0x2020, 0x3030, }; uint8_t buffer[4][4]; SkRasterPipeline_MemoryCtx src = { &data[0], 0 }, dst = { &buffer[0][0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xff, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_a16, &src); p.append(SkRasterPipelineOp::store_8888, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { uint8_t expected[] = {0x00, 0x00, 0x00, SkToU8(data[j] >> 8)}; REPORTER_ASSERT(r, !memcmp(&buffer[j], expected, sizeof(expected))); } for (int j = i; j < 4; j++) { for (auto b : buffer[j]) { REPORTER_ASSERT(r, b == 0xff); } } } } { uint8_t data[][4] = { {0x00, 0x01, 0x02, 0x03}, {0x10, 0x11, 0x12, 0x13}, {0x20, 0x21, 0x22, 0x23}, {0x30, 0x31, 0x32, 0x33}, }; alignas(8) uint16_t buffer[4]; SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 }, dst = { &buffer[0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xff, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_8888, &src); p.append(SkRasterPipelineOp::store_a16, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { uint16_t expected = (data[j][3] << 8) | data[j][3]; REPORTER_ASSERT(r, buffer[j] == expected); } for (int j = i; j < 4; j++) { REPORTER_ASSERT(r, buffer[j] == 0xffff); } } } { alignas(8) uint16_t data[][4] = { {0x0000, 0x1000, 0x2000, 0x3000}, {0x0001, 0x1001, 0x2001, 0x3001}, {0x0002, 0x1002, 0x2002, 0x3002}, {0x0003, 0x1003, 0x2003, 0x3003}, }; alignas(8) uint16_t buffer[4][4]; SkRasterPipeline_MemoryCtx src = { &data[0][0], 0 }, dst = { &buffer[0], 0 }; for (unsigned i = 1; i <= 4; i++) { memset(buffer, 0xff, sizeof(buffer)); SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_16161616, &src); p.append(SkRasterPipelineOp::swap_rb); p.append(SkRasterPipelineOp::store_16161616, &dst); p.run(0,0, i,1); for (unsigned j = 0; j < i; j++) { uint16_t expected[4] = {data[j][2], data[j][1], data[j][0], data[j][3]}; REPORTER_ASSERT(r, !memcmp(&expected[0], &buffer[j], sizeof(expected))); } for (int j = i; j < 4; j++) { for (uint16_t u16 : buffer[j]) REPORTER_ASSERT(r, u16 == 0xffff); } } } } DEF_TEST(SkRasterPipeline_lowp, r) { uint32_t rgba[64]; for (int i = 0; i < 64; i++) { rgba[i] = (4*i+0) << 0 | (4*i+1) << 8 | (4*i+2) << 16 | (4*i+3) << 24; } SkRasterPipeline_MemoryCtx ptr = { rgba, 0 }; SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_8888, &ptr); p.append(SkRasterPipelineOp::swap_rb); p.append(SkRasterPipelineOp::store_8888, &ptr); p.run(0,0,64,1); for (int i = 0; i < 64; i++) { uint32_t want = (4*i+0) << 16 | (4*i+1) << 8 | (4*i+2) << 0 | (4*i+3) << 24; if (rgba[i] != want) { ERRORF(r, "got %08x, want %08x\n", rgba[i], want); } } } DEF_TEST(SkRasterPipeline_swizzle, r) { // This takes the lowp code path { uint16_t rg[64]; for (int i = 0; i < 64; i++) { rg[i] = (4*i+0) << 0 | (4*i+1) << 8; } skgpu::Swizzle swizzle("g1b1"); SkRasterPipeline_MemoryCtx ptr = { rg, 0 }; SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_rg88, &ptr); swizzle.apply(&p); p.append(SkRasterPipelineOp::store_rg88, &ptr); p.run(0,0,64,1); for (int i = 0; i < 64; i++) { uint32_t want = 0xff << 8 | (4*i+1) << 0; if (rg[i] != want) { ERRORF(r, "got %08x, want %08x\n", rg[i], want); } } } // This takes the highp code path { float rg[64][4]; for (int i = 0; i < 64; i++) { rg[i][0] = i + 1; rg[i][1] = 2 * i + 1; rg[i][2] = 0; rg[i][3] = 1; } skgpu::Swizzle swizzle("0gra"); uint16_t buffer[64][4]; SkRasterPipeline_MemoryCtx src = { rg, 0 }, dst = { buffer, 0}; SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_f32, &src); swizzle.apply(&p); p.append(SkRasterPipelineOp::store_f16, &dst); p.run(0,0,64,1); for (int i = 0; i < 64; i++) { uint16_t want[4] { h(0), h(2 * i + 1), h(i + 1), h(1), }; REPORTER_ASSERT(r, !memcmp(want, buffer[i], sizeof(buffer[i]))); } } } DEF_TEST(SkRasterPipeline_lowp_clamp01, r) { // This may seem like a funny pipeline to create, // but it certainly shouldn't crash when you run it. uint32_t rgba = 0xff00ff00; SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 }; SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::load_8888, &ptr); p.append(SkRasterPipelineOp::swap_rb); p.append(SkRasterPipelineOp::clamp_01); p.append(SkRasterPipelineOp::store_8888, &ptr); p.run(0,0,1,1); } // Helper struct that can be used to scrape stack addresses at different points in a pipeline class StackCheckerCtx : SkRasterPipeline_CallbackCtx { public: StackCheckerCtx() { this->fn = [](SkRasterPipeline_CallbackCtx* self, int active_pixels) { auto ctx = (StackCheckerCtx*)self; ctx->fStackAddrs.push_back(&active_pixels); }; } enum class Behavior { kGrowth, kBaseline, kUnknown, }; static Behavior GrowthBehavior() { // Only some stages use the musttail attribute, so we have no way of knowing what's going to // happen. In release builds, it's likely that the compiler will apply tail-call // optimization. Even in some debug builds (on Windows), we don't see stack growth. return Behavior::kUnknown; } // Call one of these two each time the checker callback is added: StackCheckerCtx* expectGrowth() { fExpectedBehavior.push_back(GrowthBehavior()); return this; } StackCheckerCtx* expectBaseline() { fExpectedBehavior.push_back(Behavior::kBaseline); return this; } void validate(skiatest::Reporter* r) { REPORTER_ASSERT(r, fStackAddrs.size() == fExpectedBehavior.size()); // This test is storing and comparing stack pointers (to dead stack frames) as a way of // measuring stack usage. Unsurprisingly, ASAN doesn't like that. HWASAN actually inserts // tag bytes in the pointers, causing them not to match. Newer versions of vanilla ASAN // also appear to salt the stack slightly, causing repeated calls to scrape different // addresses, even though $rsp is identical on each invocation of the lambda. #if !defined(SK_SANITIZE_ADDRESS) void* baseline = fStackAddrs[0]; for (size_t i = 1; i < fStackAddrs.size(); i++) { if (fExpectedBehavior[i] == Behavior::kGrowth) { REPORTER_ASSERT(r, fStackAddrs[i] != baseline); } else if (fExpectedBehavior[i] == Behavior::kBaseline) { REPORTER_ASSERT(r, fStackAddrs[i] == baseline); } else { // Unknown behavior, nothing we can assert here } } #endif } private: std::vector fStackAddrs; std::vector fExpectedBehavior; }; DEF_TEST(SkRasterPipeline_stack_rewind, r) { // This test verifies that we can control stack usage with stack_rewind // Without stack_rewind, we should (maybe) see stack growth { StackCheckerCtx stack; uint32_t rgba = 0xff0000ff; SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 }; SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::callback, stack.expectBaseline()); p.append(SkRasterPipelineOp::load_8888, &ptr); p.append(SkRasterPipelineOp::callback, stack.expectGrowth()); p.append(SkRasterPipelineOp::swap_rb); p.append(SkRasterPipelineOp::callback, stack.expectGrowth()); p.append(SkRasterPipelineOp::store_8888, &ptr); p.run(0,0,1,1); REPORTER_ASSERT(r, rgba == 0xffff0000); // Ensure the pipeline worked stack.validate(r); } // With stack_rewind, we should (always) be able to get back to baseline { StackCheckerCtx stack; uint32_t rgba = 0xff0000ff; SkRasterPipeline_MemoryCtx ptr = { &rgba, 0 }; SkRasterPipeline_<256> p; p.append(SkRasterPipelineOp::callback, stack.expectBaseline()); p.append(SkRasterPipelineOp::load_8888, &ptr); p.append(SkRasterPipelineOp::callback, stack.expectGrowth()); p.appendStackRewind(); p.append(SkRasterPipelineOp::callback, stack.expectBaseline()); p.append(SkRasterPipelineOp::swap_rb); p.append(SkRasterPipelineOp::callback, stack.expectGrowth()); p.appendStackRewind(); p.append(SkRasterPipelineOp::callback, stack.expectBaseline()); p.append(SkRasterPipelineOp::store_8888, &ptr); p.run(0,0,1,1); REPORTER_ASSERT(r, rgba == 0xffff0000); // Ensure the pipeline worked stack.validate(r); } }