1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TOOLKIT_H
18 #define ANDROID_RENDERSCRIPT_TOOLKIT_TOOLKIT_H
19 
20 #include <cstdint>
21 #include <memory>
22 
23 namespace renderscript {
24 
25 class TaskProcessor;
26 
27 /**
28  * Define a range of data to process.
29  *
30  * This class is used to restrict a Toolkit operation to a rectangular subset of the input
31  * tensor.
32  *
33  * @property startX The index of the first value to be included on the X axis.
34  * @property endX The index after the last value to be included on the X axis.
35  * @property startY The index of the first value to be included on the Y axis.
36  * @property endY The index after the last value to be included on the Y axis.
37  */
38 struct Restriction {
39     size_t startX;
40     size_t endX;
41     size_t startY;
42     size_t endY;
43 };
44 
45 /**
46  * A collection of high-performance graphic utility functions like blur and blend.
47  *
48  * This toolkit provides ten image manipulation functions: blend, blur, color matrix, convolve,
49  * histogram, histogramDot, lut, lut3d, resize, and YUV to RGB. These functions execute
50  * multithreaded on the CPU.
51  *
52  * These functions work over raw byte arrays. You'll need to specify the width and height of
53  * the data to be processed, as well as the number of bytes per pixel. For most use cases,
54  * this will be 4.
55  *
56  * You should instantiate the Toolkit once and reuse it throughout your application.
57  * On instantiation, the Toolkit creates a thread pool that's used for processing all the functions.
58  * You can limit the number of pool threads used by the Toolkit via the constructor. The pool
59  * threads are destroyed once the Toolkit is destroyed, after any pending work is done.
60  *
61  * This library is thread safe. You can call methods from different pool threads. The functions will
62  * execute sequentially.
63  *
64  * A Java/Kotlin Toolkit is available. It calls this library through JNI.
65  *
66  * This toolkit can be used as a replacement for most RenderScript Intrinsic functions. Compared
67  * to RenderScript, it's simpler to use and more than twice as fast on the CPU. However RenderScript
68  * Intrinsics allow more flexibility for the type of allocation supported. In particular, this
69  * toolkit does not support allocations of floats.
70  */
71 class RenderScriptToolkit {
72     /** Each Toolkit method call is converted to a Task. The processor owns the thread pool. It
73      * tiles the tasks and schedule them over the pool threads.
74      */
75     std::unique_ptr<TaskProcessor> processor;
76 
77    public:
78     /**
79      * Creates the pool threads that are used for processing the method calls.
80      */
81     RenderScriptToolkit(int numberOfThreads = 0);
82     /**
83      * Destroys the thread pool. This stops any in-progress work; the Toolkit methods called from
84      * other pool threads will return without having completed the work. Because of the undefined
85      * state of the output buffers, an application should avoid destroying the Toolkit if other pool
86      * threads are executing Toolkit methods.
87      */
88     ~RenderScriptToolkit();
89 
90     /**
91      * Determines how a source buffer is blended into a destination buffer.
92      *
93      * See {@link RenderScriptToolkit::blend}.
94      *
95      * blend only works on 4 byte RGBA data. In the descriptions below, ".a" represents
96      * the alpha channel.
97      */
98     enum class BlendingMode {
99         /**
100          * dest = 0
101          *
102          * The destination is cleared, i.e. each pixel is set to (0, 0, 0, 0)
103          */
104         CLEAR = 0,
105         /**
106          * dest = src
107          *
108          * Sets each pixel of the destination to the corresponding one in the source.
109          */
110         SRC = 1,
111         /**
112          * dest = dest
113          *
114          * Leaves the destination untouched. This is a no-op.
115          */
116         DST = 2,
117         /**
118          * dest = src + dest * (1.0 - src.a)
119          */
120         SRC_OVER = 3,
121         /**
122          * dest = dest + src * (1.0 - dest.a)
123          */
124         DST_OVER = 4,
125         /**
126          * dest = src * dest.a
127          */
128         SRC_IN = 5,
129         /**
130          * dest = dest * src.a
131          */
132         DST_IN = 6,
133         /**
134          * dest = src * (1.0 - dest.a)
135          */
136         SRC_OUT = 7,
137         /**
138          * dest = dest * (1.0 - src.a)
139          */
140         DST_OUT = 8,
141         /**
142          * dest.rgb = src.rgb * dest.a + (1.0 - src.a) * dest.rgb, dest.a = dest.a
143          */
144         SRC_ATOP = 9,
145         /**
146          * dest = dest.rgb * src.a + (1.0 - dest.a) * src.rgb, dest.a = src.a
147          */
148         DST_ATOP = 10,
149         /**
150          * dest = {src.r ^ dest.r, src.g ^ dest.g, src.b ^ dest.b, src.a ^ dest.a}
151          *
152          * Note: this is NOT the Porter/Duff XOR mode; this is a bitwise xor.
153          */
154         XOR = 11,
155         /**
156          * dest = src * dest
157          */
158         MULTIPLY = 12,
159         /**
160          * dest = min(src + dest, 1.0)
161          */
162         ADD = 13,
163         /**
164          * dest = max(dest - src, 0.0)
165          */
166         SUBTRACT = 14
167     };
168 
169     /**
170      * Blend a source buffer with the destination buffer.
171      *
172      * Blends a source buffer and a destination buffer, placing the result in the destination
173      * buffer. The blending is done pairwise between two corresponding RGBA values found in
174      * each buffer. The mode parameter specifies one of fifteen blending operations.
175      * See {@link BlendingMode}.
176      *
177      * An optional range parameter can be set to restrict the operation to a rectangular subset
178      * of each buffer. If provided, the range must be wholly contained with the dimensions
179      * described by sizeX and sizeY.
180      *
181      * The source and destination buffers must have the same dimensions. Both buffers should be
182      * large enough for sizeX * sizeY * 4 bytes. The buffers have a row-major layout.
183      *
184      * @param mode The specific blending operation to do.
185      * @param source The RGBA input buffer.
186      * @param dest The destination buffer. Used for input and output.
187      * @param sizeX The width of both buffers, as a number of RGBA values.
188      * @param sizeY The height of both buffers, as a number of RGBA values.
189      * @param restriction When not null, restricts the operation to a 2D range of pixels.
190      */
191     void blend(BlendingMode mode, const uint8_t* _Nonnull source, uint8_t* _Nonnull dst,
192                size_t sizeX, size_t sizeY, const Restriction* _Nullable restriction = nullptr);
193 
194     /**
195      * Blur an image.
196      *
197      * Performs a Gaussian blur of the input image and stores the result in the out buffer.
198      *
199      * The radius determines which pixels are used to compute each blurred pixels. This Toolkit
200      * accepts values between 1 and 25. Larger values create a more blurred effect but also
201      * take longer to compute. When the radius extends past the edge, the edge pixel will
202      * be used as replacement for the pixel that's out off boundary.
203      *
204      * Each input pixel can either be represented by four bytes (RGBA format) or one byte
205      * for the less common blurring of alpha channel only image.
206      *
207      * An optional range parameter can be set to restrict the operation to a rectangular subset
208      * of each buffer. If provided, the range must be wholly contained with the dimensions
209      * described by sizeX and sizeY.
210      *
211      * The input and output buffers must have the same dimensions. Both buffers should be
212      * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout.
213      *
214      * @param in The buffer of the image to be blurred.
215      * @param out The buffer that receives the blurred image.
216      * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells.
217      * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells.
218      * @param vectorSize Either 1 or 4, the number of bytes in each cell, i.e. A vs. RGBA.
219      * @param radius The radius of the pixels used to blur.
220      * @param restriction When not null, restricts the operation to a 2D range of pixels.
221      */
222     void blur(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY,
223               size_t vectorSize, int radius, const Restriction* _Nullable restriction = nullptr);
224 
225     /**
226      * Identity matrix that can be passed to the {@link RenderScriptToolkit::colorMatrix} method.
227      *
228      * Using this matrix will result in no change to the pixel through multiplication although
229      * the pixel value can still be modified by the add vector, or transformed to a different
230      * format.
231      */
232     static constexpr float kIdentityMatrix[] =  {
233             1.0f, 0.0f, 0.0f, 0.0f,
234             0.0f, 1.0f, 0.0f, 0.0f,
235             0.0f, 0.0f, 1.0f, 0.0f,
236             0.0f, 0.0f, 0.0f, 1.0f
237     };
238 
239     /**
240      * Matrix to turn color pixels to a grey scale.
241      *
242      * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert an
243      * image from color to greyscale.
244      */
245     static constexpr float kGreyScaleColorMatrix[] = {
246             0.299f, 0.299f, 0.299f, 0.0f,
247             0.587f, 0.587f, 0.587f, 0.0f,
248             0.114f, 0.114f, 0.114f, 0.0f,
249             0.0f,   0.0f,   0.0f,   1.0f
250     };
251 
252     /**
253      * Matrix to convert RGB to YUV.
254      *
255      * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the
256      * first three bytes of each pixel from RGB to YUV. This leaves the last byte (the alpha
257      * channel) untouched.
258      *
259      * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported
260      * by this method.
261      */
262     static constexpr float kRgbToYuvMatrix[] = {
263             0.299f, -0.14713f,  0.615f,   0.0f,
264             0.587f, -0.28886f, -0.51499f, 0.0f,
265             0.114f,  0.436f,   -0.10001f, 0.0f,
266             0.0f,    0.0f,      0.0f,     1.0f
267     };
268 
269     /**
270      * Matrix to convert YUV to RGB.
271      *
272      * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the
273      * first three bytes of each pixel from YUV to RGB. This leaves the last byte (the alpha
274      * channel) untouched.
275      *
276      * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported
277      * by this method. Use {@link RenderScriptToolkit::yuvToRgb} to convert these buffers.
278      */
279     static constexpr float kYuvToRgbMatrix[] = {
280             1.0f,      1.0f,     1.0f,     0.0f,
281             0.0f,     -0.39465f, 2.03211f, 0.0f,
282             1.13983f, -0.5806f,  0.0f,     0.0f,
283             0.0f,      0.0f,     0.0f,     1.0f
284     };
285 
286     /**
287      * Transform an image using a color matrix.
288      *
289      * Converts a 2D array of vectors of unsigned bytes, multiplying each vectors by a 4x4 matrix
290      * and adding an optional vector.
291      *
292      * Each input vector is composed of 1-4 unsigned bytes. If less than 4 bytes, it's extended to
293      * 4, padding with zeroes. The unsigned bytes are converted from 0-255 to 0.0-1.0 floats
294      * before the multiplication is done.
295      *
296      * The resulting value is normalized from 0.0-1.0 to a 0-255 value and stored in the output.
297      * If the output vector size is less than four, the unused channels are discarded.
298      *
299      * If addVector is null, a vector of zeroes is added, i.e. a noop.
300      *
301      * Check kIdentityMatrix, kGreyScaleColorMatrix, kRgbToYuvMatrix, and kYuvToRgbMatrix for sample
302      * matrices. The YUV conversion may not work for all color spaces.
303      *
304      * @param in The buffer of the image to be converted.
305      * @param out The buffer that receives the converted image.
306      * @param inputVectorSize The number of bytes in each input cell, a value from 1 to 4.
307      * @param outputVectorSize The number of bytes in each output cell, a value from 1 to 4.
308      * @param sizeX The width of both buffers, as a number of 1 to 4 byte cells.
309      * @param sizeY The height of both buffers, as a number of 1 to 4 byte cells.
310      * @param matrix The 4x4 matrix to multiply, in row major format.
311      * @param addVector A vector of four floats that's added to the result of the multiplication.
312      * @param restriction When not null, restricts the operation to a 2D range of pixels.
313      */
314     void colorMatrix(const void* _Nonnull in, void* _Nonnull out, size_t inputVectorSize,
315                      size_t outputVectorSize, size_t sizeX, size_t sizeY,
316                      const float* _Nonnull matrix, const float* _Nullable addVector = nullptr,
317                      const Restriction* _Nullable restriction = nullptr);
318 
319     /**
320      * Convolve a ByteArray.
321      *
322      * Applies a 3x3 or 5x5 convolution to the input array using the provided coefficients.
323      *
324      * For 3x3 convolutions, 9 coefficients must be provided. For 5x5, 25 coefficients are needed.
325      * The coefficients should be provided in row-major format.
326      *
327      * When the square extends past the edge, the edge values will be used as replacement for the
328      * values that's are off boundary.
329      *
330      * Each input cell can either be represented by one to four bytes. Each byte is multiplied
331      * and accumulated independently of the other bytes of the cell.
332      *
333      * An optional range parameter can be set to restrict the operation to a rectangular subset
334      * of each buffer. If provided, the range must be wholly contained with the dimensions
335      * described by sizeX and sizeY.
336      *
337      * The input and output buffers must have the same dimensions. Both buffers should be
338      * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout.
339      *
340      * @param in The buffer of the image to be blurred.
341      * @param out The buffer that receives the blurred image.
342      * @param vectorSize The number of bytes in each cell, a value from 1 to 4.
343      * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells.
344      * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells.
345      * @param coefficients 9 or 25 multipliers.
346      * @param restriction When not null, restricts the operation to a 2D range of pixels.
347      */
348     void convolve3x3(const void* _Nonnull in, void* _Nonnull out, size_t vectorSize, size_t sizeX,
349                      size_t sizeY, const float* _Nonnull coefficients,
350                      const Restriction* _Nullable restriction = nullptr);
351 
352     void convolve5x5(const void* _Nonnull in, void* _Nonnull out, size_t vectorSize, size_t sizeX,
353                      size_t sizeY, const float* _Nonnull coefficients,
354                      const Restriction* _Nullable restriction = nullptr);
355 
356     /**
357      * Compute the histogram of an image.
358      *
359      * Tallies how many times each of the 256 possible values of a byte is found in the input.
360      *
361      * An input cell can be represented by one to four bytes. The tally is done independently
362      * for each of the bytes of the cell. Correspondingly, the out array will have
363      * 256 * vectorSize entries. The counts for value 0 are consecutive, followed by those for
364      * value 1, etc.
365      *
366      * An optional range parameter can be set to restrict the operation to a rectangular subset
367      * of each buffer. If provided, the range must be wholly contained with the dimensions
368      * described by sizeX and sizeY.
369      *
370      * The source buffers should be large enough for sizeX * sizeY * vectorSize bytes. The buffers
371      * have a row-major layout. The out buffer should be large enough for 256 * vectorSize ints.
372      *
373      * @param in The buffer of the image to be analyzed.
374      * @param out The resulting vector of counts.
375      * @param sizeX The width of the input buffers, as a number of 1 or 4 byte cells.
376      * @param sizeY The height of the input buffers, as a number of 1 or 4 byte cells.
377      * @param vectorSize The number of bytes in each cell, a value from 1 to 4.
378      * @param restriction When not null, restricts the operation to a 2D range of pixels.
379      */
380     void histogram(const uint8_t* _Nonnull in, int32_t* _Nonnull out, size_t sizeX, size_t sizeY,
381                    size_t vectorSize, const Restriction* _Nullable restriction = nullptr);
382 
383     /**
384      * Compute the histogram of the dot product of an image.
385      *
386      * This method supports cells of 1 to 4 bytes in length. For each cell of the array,
387      * the dot product of its bytes with the provided coefficients is computed. The resulting
388      * floating point value is converted to an unsigned byte and tallied in the histogram.
389      *
390      * If coefficients is null, the coefficients used for RGBA luminosity calculation will be used,
391      * i.e. the values [0.299f, 0.587f, 0.114f, 0.f].
392      *
393      * Each coefficients must be >= 0 and their sum must be 1.0 or less. There must be the same
394      * number of coefficients as vectorSize.
395      *
396      * An optional range parameter can be set to restrict the operation to a rectangular subset
397      * of each buffer. If provided, the range must be wholly contained with the dimensions
398      * described by sizeX and sizeY.
399      *
400      * The source buffers should be large enough for sizeX * sizeY * vectorSize bytes. The buffers
401      * have a row-major layout. The out array should be large enough for 256 ints.
402      *
403      * @param in The buffer of the image to be analyzed.
404      * @param out The resulting vector of counts.
405      * @param sizeX The width of the input buffers, as a number of 1 or 4 byte cells.
406      * @param sizeY The height of the input buffers, as a number of 1 or 4 byte cells.
407      * @param vectorSize The number of bytes in each cell, a value from 1 to 4.
408      * @param coefficients The values used for the dot product. Can be nullptr.
409      * @param restriction When not null, restricts the operation to a 2D range of pixels.
410      */
411     void histogramDot(const uint8_t* _Nonnull in, int32_t* _Nonnull out, size_t sizeX, size_t sizeY,
412                       size_t vectorSize, const float* _Nullable coefficients,
413                       const Restriction* _Nullable restriction = nullptr);
414 
415     /**
416      * Transform an image using a look up table
417      *
418      * Transforms an image by using a per-channel lookup table. Each channel of the input has an
419      * independent lookup table. The tables are 256 entries in size and can cover the full value
420      * range of a byte.
421      *
422      * The input array should be in RGBA format, where four consecutive bytes form an cell.
423      *
424      * An optional range parameter can be set to restrict the operation to a rectangular subset
425      * of each buffer. If provided, the range must be wholly contained with the dimensions
426      * described by sizeX and sizeY.
427      *
428      * The input and output buffers must have the same dimensions. Both buffers should be
429      * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout.
430      *
431      * @param in The buffer of the image to be transformed.
432      * @param out The buffer that receives the transformed image.
433      * @param sizeX The width of both buffers, as a number of 4 byte cells.
434      * @param sizeY The height of both buffers, as a number of 4 byte cells.
435      * @param red An array of 256 values that's used to convert the R channel.
436      * @param green An array of 256 values that's used to convert the G channel.
437      * @param blue An array of 256 values that's used to convert the B channel.
438      * @param alpha An array of 256 values that's used to convert the A channel.
439      * @param restriction When not null, restricts the operation to a 2D range of pixels.
440      */
441     void lut(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY,
442              const uint8_t* _Nonnull red, const uint8_t* _Nonnull green,
443              const uint8_t* _Nonnull blue, const uint8_t* _Nonnull alpha,
444              const Restriction* _Nullable restriction = nullptr);
445 
446     /**
447      * Transform an image using a 3D look up table
448      *
449      * Transforms an image, converting RGB to RGBA by using a 3D lookup table. The incoming R, G,
450      * and B values are normalized to the dimensions of the provided 3D buffer. The eight nearest
451      * values in that 3D buffer are sampled and linearly interpolated. The resulting RGBA entry
452      * is stored in the output.
453      *
454      * The input array should be in RGBA format, where four consecutive bytes form an cell.
455      * The fourth byte of each input cell is ignored.
456      *
457      * An optional range parameter can be set to restrict the operation to a rectangular subset
458      * of each buffer. If provided, the range must be wholly contained with the dimensions
459      * described by sizeX and sizeY.
460      *
461      * The input and output buffers must have the same dimensions. Both buffers should be
462      * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout.
463      *
464      * @param in The buffer of the image to be transformed.
465      * @param out The buffer that receives the transformed image.
466      * @param sizeX The width of both buffers, as a number of 4 byte cells.
467      * @param sizeY The height of both buffers, as a number of 4 byte cells.
468      * @param cube The translation cube, in row major-format.
469      * @param cubeSizeX The number of RGBA entries in the cube in the X direction.
470      * @param cubeSizeY The number of RGBA entries in the cube in the Y direction.
471      * @param cubeSizeZ The number of RGBA entries in the cube in the Z direction.
472      * @param restriction When not null, restricts the operation to a 2D range of pixels.
473      */
474     void lut3d(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY,
475                const uint8_t* _Nonnull cube, size_t cubeSizeX, size_t cubeSizeY, size_t cubeSizeZ,
476                const Restriction* _Nullable restriction = nullptr);
477 
478     /**
479      * Resize an image.
480      *
481      * Resizes an image using bicubic interpolation.
482      *
483      * This method supports cells of 1 to 4 bytes in length. Each byte of the cell is
484      * interpolated independently from the others.
485      *
486      * An optional range parameter can be set to restrict the operation to a rectangular subset
487      * of the output buffer. The corresponding scaled range of the input will be used.  If provided,
488      * the range must be wholly contained with the dimensions described by outputSizeX and
489      * outputSizeY.
490      *
491      * The input and output buffers have a row-major layout. Both buffers should be
492      * large enough for sizeX * sizeY * vectorSize bytes.
493      *
494      * @param in The buffer of the image to be resized.
495      * @param out The buffer that receives the resized image.
496      * @param inputSizeX The width of the input buffer, as a number of 1-4 byte cells.
497      * @param inputSizeY The height of the input buffer, as a number of 1-4 byte cells.
498      * @param vectorSize The number of bytes in each cell of both buffers. A value from 1 to 4.
499      * @param outputSizeX The width of the output buffer, as a number of 1-4 byte cells.
500      * @param outputSizeY The height of the output buffer, as a number of 1-4 byte cells.
501      * @param restriction When not null, restricts the operation to a 2D range of pixels.
502      */
503     void resize(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t inputSizeX,
504                 size_t inputSizeY, size_t vectorSize, size_t outputSizeX, size_t outputSizeY,
505                 const Restriction* _Nullable restriction = nullptr);
506 
507     /**
508      * The YUV formats supported by yuvToRgb.
509      */
510     enum class YuvFormat {
511         NV21 = 0x11,
512         YV12 = 0x32315659,
513     };
514 
515     /**
516      * Convert an image from YUV to RGB.
517      *
518      * Converts an Android YUV buffer to RGB. The input allocation should be
519      * supplied in a supported YUV format as a YUV cell Allocation.
520      * The output is RGBA; the alpha channel will be set to 255.
521      *
522      * Note that for YV12 and a sizeX that's not a multiple of 32, the
523      * RenderScript Intrinsic may not have converted the image correctly.
524      * This Toolkit method should.
525      *
526      * @param in The buffer of the image to be converted.
527      * @param out The buffer that receives the converted image.
528      * @param sizeX The width in pixels of the image. Must be even.
529      * @param sizeY The height in pixels of the image.
530      * @param format Either YV12 or NV21.
531      */
532     void yuvToRgb(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY,
533                   YuvFormat format);
534 };
535 
536 }  // namespace renderscript
537 
538 #endif  // ANDROID_RENDERSCRIPT_TOOLKIT_TOOLKIT_H
539