xref: /aosp_15_r20/external/libyuv/util/ssim.cc (revision 4e366538070a3a6c5c163c31b791eab742e1657a)
1*4e366538SXin Li /*
2*4e366538SXin Li  *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
3*4e366538SXin Li  *
4*4e366538SXin Li  *  Use of this source code is governed by a BSD-style license
5*4e366538SXin Li  *  that can be found in the LICENSE file in the root of the source
6*4e366538SXin Li  *  tree. An additional intellectual property rights grant can be found
7*4e366538SXin Li  *  in the file PATENTS. All contributing project authors may
8*4e366538SXin Li  *  be found in the AUTHORS file in the root of the source tree.
9*4e366538SXin Li  */
10*4e366538SXin Li 
11*4e366538SXin Li #include "../util/ssim.h"  // NOLINT
12*4e366538SXin Li 
13*4e366538SXin Li #include <string.h>
14*4e366538SXin Li 
15*4e366538SXin Li #ifdef __cplusplus
16*4e366538SXin Li extern "C" {
17*4e366538SXin Li #endif
18*4e366538SXin Li 
19*4e366538SXin Li typedef unsigned int uint32_t;    // NOLINT
20*4e366538SXin Li typedef unsigned short uint16_t;  // NOLINT
21*4e366538SXin Li 
22*4e366538SXin Li #if !defined(LIBYUV_DISABLE_X86) && !defined(__SSE2__) && \
23*4e366538SXin Li     (defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)))
24*4e366538SXin Li #define __SSE2__
25*4e366538SXin Li #endif
26*4e366538SXin Li #if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__)
27*4e366538SXin Li #include <emmintrin.h>
28*4e366538SXin Li #endif
29*4e366538SXin Li 
30*4e366538SXin Li #ifdef _OPENMP
31*4e366538SXin Li #include <omp.h>
32*4e366538SXin Li #endif
33*4e366538SXin Li 
34*4e366538SXin Li // SSIM
35*4e366538SXin Li enum { KERNEL = 3, KERNEL_SIZE = 2 * KERNEL + 1 };
36*4e366538SXin Li 
37*4e366538SXin Li // Symmetric Gaussian kernel:  K[i] = ~11 * exp(-0.3 * i * i)
38*4e366538SXin Li // The maximum value (11 x 11) must be less than 128 to avoid sign
39*4e366538SXin Li // problems during the calls to _mm_mullo_epi16().
40*4e366538SXin Li static const int K[KERNEL_SIZE] = {
41*4e366538SXin Li     1, 3, 7, 11, 7, 3, 1  // ~11 * exp(-0.3 * i * i)
42*4e366538SXin Li };
43*4e366538SXin Li static const double kiW[KERNEL + 1 + 1] = {
44*4e366538SXin Li     1. / 1089.,  // 1 / sum(i:0..6, j..6) K[i]*K[j]
45*4e366538SXin Li     1. / 1089.,  // 1 / sum(i:0..6, j..6) K[i]*K[j]
46*4e366538SXin Li     1. / 1056.,  // 1 / sum(i:0..5, j..6) K[i]*K[j]
47*4e366538SXin Li     1. / 957.,   // 1 / sum(i:0..4, j..6) K[i]*K[j]
48*4e366538SXin Li     1. / 726.,   // 1 / sum(i:0..3, j..6) K[i]*K[j]
49*4e366538SXin Li };
50*4e366538SXin Li 
51*4e366538SXin Li #if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__)
52*4e366538SXin Li 
53*4e366538SXin Li #define PWEIGHT(A, B) static_cast<uint16_t>(K[(A)] * K[(B)])  // weight product
54*4e366538SXin Li #define MAKE_WEIGHT(L)                                                \
55*4e366538SXin Li   {                                                                   \
56*4e366538SXin Li     {                                                                 \
57*4e366538SXin Li       {                                                               \
58*4e366538SXin Li         PWEIGHT(L, 0)                                                 \
59*4e366538SXin Li         , PWEIGHT(L, 1), PWEIGHT(L, 2), PWEIGHT(L, 3), PWEIGHT(L, 4), \
60*4e366538SXin Li             PWEIGHT(L, 5), PWEIGHT(L, 6), 0                           \
61*4e366538SXin Li       }                                                               \
62*4e366538SXin Li     }                                                                 \
63*4e366538SXin Li   }
64*4e366538SXin Li 
65*4e366538SXin Li // We need this union trick to be able to initialize constant static __m128i
66*4e366538SXin Li // values. We can't call _mm_set_epi16() for static compile-time initialization.
67*4e366538SXin Li static const struct {
68*4e366538SXin Li   union {
69*4e366538SXin Li     uint16_t i16_[8];
70*4e366538SXin Li     __m128i m_;
71*4e366538SXin Li   } values_;
72*4e366538SXin Li } W0 = MAKE_WEIGHT(0), W1 = MAKE_WEIGHT(1), W2 = MAKE_WEIGHT(2),
73*4e366538SXin Li   W3 = MAKE_WEIGHT(3);
74*4e366538SXin Li // ... the rest is symmetric.
75*4e366538SXin Li #undef MAKE_WEIGHT
76*4e366538SXin Li #undef PWEIGHT
77*4e366538SXin Li #endif
78*4e366538SXin Li 
79*4e366538SXin Li // Common final expression for SSIM, once the weighted sums are known.
FinalizeSSIM(double iw,double xm,double ym,double xxm,double xym,double yym)80*4e366538SXin Li static double FinalizeSSIM(double iw,
81*4e366538SXin Li                            double xm,
82*4e366538SXin Li                            double ym,
83*4e366538SXin Li                            double xxm,
84*4e366538SXin Li                            double xym,
85*4e366538SXin Li                            double yym) {
86*4e366538SXin Li   const double iwx = xm * iw;
87*4e366538SXin Li   const double iwy = ym * iw;
88*4e366538SXin Li   double sxx = xxm * iw - iwx * iwx;
89*4e366538SXin Li   double syy = yym * iw - iwy * iwy;
90*4e366538SXin Li   // small errors are possible, due to rounding. Clamp to zero.
91*4e366538SXin Li   if (sxx < 0.) {
92*4e366538SXin Li     sxx = 0.;
93*4e366538SXin Li   }
94*4e366538SXin Li   if (syy < 0.) {
95*4e366538SXin Li     syy = 0.;
96*4e366538SXin Li   }
97*4e366538SXin Li   const double sxsy = sqrt(sxx * syy);
98*4e366538SXin Li   const double sxy = xym * iw - iwx * iwy;
99*4e366538SXin Li   static const double C11 = (0.01 * 0.01) * (255 * 255);
100*4e366538SXin Li   static const double C22 = (0.03 * 0.03) * (255 * 255);
101*4e366538SXin Li   static const double C33 = (0.015 * 0.015) * (255 * 255);
102*4e366538SXin Li   const double l = (2. * iwx * iwy + C11) / (iwx * iwx + iwy * iwy + C11);
103*4e366538SXin Li   const double c = (2. * sxsy + C22) / (sxx + syy + C22);
104*4e366538SXin Li   const double s = (sxy + C33) / (sxsy + C33);
105*4e366538SXin Li   return l * c * s;
106*4e366538SXin Li }
107*4e366538SXin Li 
108*4e366538SXin Li // GetSSIM() does clipping.  GetSSIMFullKernel() does not
109*4e366538SXin Li 
110*4e366538SXin Li // TODO(skal): use summed tables?
111*4e366538SXin Li // Note: worst case of accumulation is a weight of 33 = 11 + 2 * (7 + 3 + 1)
112*4e366538SXin Li // with a diff of 255, squared. The maximum error is thus 0x4388241,
113*4e366538SXin Li // which fits into 32 bits integers.
GetSSIM(const uint8_t * org,const uint8_t * rec,int xo,int yo,int W,int H,int stride)114*4e366538SXin Li double GetSSIM(const uint8_t* org,
115*4e366538SXin Li                const uint8_t* rec,
116*4e366538SXin Li                int xo,
117*4e366538SXin Li                int yo,
118*4e366538SXin Li                int W,
119*4e366538SXin Li                int H,
120*4e366538SXin Li                int stride) {
121*4e366538SXin Li   uint32_t ws = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
122*4e366538SXin Li   org += (yo - KERNEL) * stride;
123*4e366538SXin Li   org += (xo - KERNEL);
124*4e366538SXin Li   rec += (yo - KERNEL) * stride;
125*4e366538SXin Li   rec += (xo - KERNEL);
126*4e366538SXin Li   for (int y_ = 0; y_ < KERNEL_SIZE; ++y_, org += stride, rec += stride) {
127*4e366538SXin Li     if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H)) {
128*4e366538SXin Li       continue;
129*4e366538SXin Li     }
130*4e366538SXin Li     const int Wy = K[y_];
131*4e366538SXin Li     for (int x_ = 0; x_ < KERNEL_SIZE; ++x_) {
132*4e366538SXin Li       const int Wxy = Wy * K[x_];
133*4e366538SXin Li       if (((xo - KERNEL + x_) >= 0) && ((xo - KERNEL + x_) < W)) {
134*4e366538SXin Li         const int org_x = org[x_];
135*4e366538SXin Li         const int rec_x = rec[x_];
136*4e366538SXin Li         ws += Wxy;
137*4e366538SXin Li         xm += Wxy * org_x;
138*4e366538SXin Li         ym += Wxy * rec_x;
139*4e366538SXin Li         xxm += Wxy * org_x * org_x;
140*4e366538SXin Li         xym += Wxy * org_x * rec_x;
141*4e366538SXin Li         yym += Wxy * rec_x * rec_x;
142*4e366538SXin Li       }
143*4e366538SXin Li     }
144*4e366538SXin Li   }
145*4e366538SXin Li   return FinalizeSSIM(1. / ws, xm, ym, xxm, xym, yym);
146*4e366538SXin Li }
147*4e366538SXin Li 
GetSSIMFullKernel(const uint8_t * org,const uint8_t * rec,int xo,int yo,int stride,double area_weight)148*4e366538SXin Li double GetSSIMFullKernel(const uint8_t* org,
149*4e366538SXin Li                          const uint8_t* rec,
150*4e366538SXin Li                          int xo,
151*4e366538SXin Li                          int yo,
152*4e366538SXin Li                          int stride,
153*4e366538SXin Li                          double area_weight) {
154*4e366538SXin Li   uint32_t xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
155*4e366538SXin Li 
156*4e366538SXin Li #if defined(LIBYUV_DISABLE_X86) || !defined(__SSE2__)
157*4e366538SXin Li 
158*4e366538SXin Li   org += yo * stride + xo;
159*4e366538SXin Li   rec += yo * stride + xo;
160*4e366538SXin Li   for (int y = 1; y <= KERNEL; y++) {
161*4e366538SXin Li     const int dy1 = y * stride;
162*4e366538SXin Li     const int dy2 = y * stride;
163*4e366538SXin Li     const int Wy = K[KERNEL + y];
164*4e366538SXin Li 
165*4e366538SXin Li     for (int x = 1; x <= KERNEL; x++) {
166*4e366538SXin Li       // Compute the contributions of upper-left (ul), upper-right (ur)
167*4e366538SXin Li       // lower-left (ll) and lower-right (lr) points (see the diagram below).
168*4e366538SXin Li       // Symmetric Kernel will have same weight on those points.
169*4e366538SXin Li       //       -  -  -  -  -  -  -
170*4e366538SXin Li       //       -  ul -  -  -  ur -
171*4e366538SXin Li       //       -  -  -  -  -  -  -
172*4e366538SXin Li       //       -  -  -  0  -  -  -
173*4e366538SXin Li       //       -  -  -  -  -  -  -
174*4e366538SXin Li       //       -  ll -  -  -  lr -
175*4e366538SXin Li       //       -  -  -  -  -  -  -
176*4e366538SXin Li       const int Wxy = Wy * K[KERNEL + x];
177*4e366538SXin Li       const int ul1 = org[-dy1 - x];
178*4e366538SXin Li       const int ur1 = org[-dy1 + x];
179*4e366538SXin Li       const int ll1 = org[dy1 - x];
180*4e366538SXin Li       const int lr1 = org[dy1 + x];
181*4e366538SXin Li 
182*4e366538SXin Li       const int ul2 = rec[-dy2 - x];
183*4e366538SXin Li       const int ur2 = rec[-dy2 + x];
184*4e366538SXin Li       const int ll2 = rec[dy2 - x];
185*4e366538SXin Li       const int lr2 = rec[dy2 + x];
186*4e366538SXin Li 
187*4e366538SXin Li       xm += Wxy * (ul1 + ur1 + ll1 + lr1);
188*4e366538SXin Li       ym += Wxy * (ul2 + ur2 + ll2 + lr2);
189*4e366538SXin Li       xxm += Wxy * (ul1 * ul1 + ur1 * ur1 + ll1 * ll1 + lr1 * lr1);
190*4e366538SXin Li       xym += Wxy * (ul1 * ul2 + ur1 * ur2 + ll1 * ll2 + lr1 * lr2);
191*4e366538SXin Li       yym += Wxy * (ul2 * ul2 + ur2 * ur2 + ll2 * ll2 + lr2 * lr2);
192*4e366538SXin Li     }
193*4e366538SXin Li 
194*4e366538SXin Li     // Compute the contributions of up (u), down (d), left (l) and right (r)
195*4e366538SXin Li     // points across the main axes (see the diagram below).
196*4e366538SXin Li     // Symmetric Kernel will have same weight on those points.
197*4e366538SXin Li     //       -  -  -  -  -  -  -
198*4e366538SXin Li     //       -  -  -  u  -  -  -
199*4e366538SXin Li     //       -  -  -  -  -  -  -
200*4e366538SXin Li     //       -  l  -  0  -  r  -
201*4e366538SXin Li     //       -  -  -  -  -  -  -
202*4e366538SXin Li     //       -  -  -  d  -  -  -
203*4e366538SXin Li     //       -  -  -  -  -  -  -
204*4e366538SXin Li     const int Wxy = Wy * K[KERNEL];
205*4e366538SXin Li     const int u1 = org[-dy1];
206*4e366538SXin Li     const int d1 = org[dy1];
207*4e366538SXin Li     const int l1 = org[-y];
208*4e366538SXin Li     const int r1 = org[y];
209*4e366538SXin Li 
210*4e366538SXin Li     const int u2 = rec[-dy2];
211*4e366538SXin Li     const int d2 = rec[dy2];
212*4e366538SXin Li     const int l2 = rec[-y];
213*4e366538SXin Li     const int r2 = rec[y];
214*4e366538SXin Li 
215*4e366538SXin Li     xm += Wxy * (u1 + d1 + l1 + r1);
216*4e366538SXin Li     ym += Wxy * (u2 + d2 + l2 + r2);
217*4e366538SXin Li     xxm += Wxy * (u1 * u1 + d1 * d1 + l1 * l1 + r1 * r1);
218*4e366538SXin Li     xym += Wxy * (u1 * u2 + d1 * d2 + l1 * l2 + r1 * r2);
219*4e366538SXin Li     yym += Wxy * (u2 * u2 + d2 * d2 + l2 * l2 + r2 * r2);
220*4e366538SXin Li   }
221*4e366538SXin Li 
222*4e366538SXin Li   // Lastly the contribution of (x0, y0) point.
223*4e366538SXin Li   const int Wxy = K[KERNEL] * K[KERNEL];
224*4e366538SXin Li   const int s1 = org[0];
225*4e366538SXin Li   const int s2 = rec[0];
226*4e366538SXin Li 
227*4e366538SXin Li   xm += Wxy * s1;
228*4e366538SXin Li   ym += Wxy * s2;
229*4e366538SXin Li   xxm += Wxy * s1 * s1;
230*4e366538SXin Li   xym += Wxy * s1 * s2;
231*4e366538SXin Li   yym += Wxy * s2 * s2;
232*4e366538SXin Li 
233*4e366538SXin Li #else  // __SSE2__
234*4e366538SXin Li 
235*4e366538SXin Li   org += (yo - KERNEL) * stride + (xo - KERNEL);
236*4e366538SXin Li   rec += (yo - KERNEL) * stride + (xo - KERNEL);
237*4e366538SXin Li 
238*4e366538SXin Li   const __m128i zero = _mm_setzero_si128();
239*4e366538SXin Li   __m128i x = zero;
240*4e366538SXin Li   __m128i y = zero;
241*4e366538SXin Li   __m128i xx = zero;
242*4e366538SXin Li   __m128i xy = zero;
243*4e366538SXin Li   __m128i yy = zero;
244*4e366538SXin Li 
245*4e366538SXin Li // Read 8 pixels at line #L, and convert to 16bit, perform weighting
246*4e366538SXin Li // and acccumulate.
247*4e366538SXin Li #define LOAD_LINE_PAIR(L, WEIGHT)                                            \
248*4e366538SXin Li   do {                                                                       \
249*4e366538SXin Li     const __m128i v0 =                                                       \
250*4e366538SXin Li         _mm_loadl_epi64(reinterpret_cast<const __m128i*>(org + (L)*stride)); \
251*4e366538SXin Li     const __m128i v1 =                                                       \
252*4e366538SXin Li         _mm_loadl_epi64(reinterpret_cast<const __m128i*>(rec + (L)*stride)); \
253*4e366538SXin Li     const __m128i w0 = _mm_unpacklo_epi8(v0, zero);                          \
254*4e366538SXin Li     const __m128i w1 = _mm_unpacklo_epi8(v1, zero);                          \
255*4e366538SXin Li     const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_);            \
256*4e366538SXin Li     const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_);            \
257*4e366538SXin Li     x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero));                     \
258*4e366538SXin Li     y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero));                     \
259*4e366538SXin Li     x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero));                     \
260*4e366538SXin Li     y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero));                     \
261*4e366538SXin Li     xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0));                         \
262*4e366538SXin Li     xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1));                         \
263*4e366538SXin Li     yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1));                         \
264*4e366538SXin Li   } while (0)
265*4e366538SXin Li 
266*4e366538SXin Li #define ADD_AND_STORE_FOUR_EPI32(M, OUT)                    \
267*4e366538SXin Li   do {                                                      \
268*4e366538SXin Li     uint32_t tmp[4];                                        \
269*4e366538SXin Li     _mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \
270*4e366538SXin Li     (OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0];              \
271*4e366538SXin Li   } while (0)
272*4e366538SXin Li 
273*4e366538SXin Li   LOAD_LINE_PAIR(0, W0);
274*4e366538SXin Li   LOAD_LINE_PAIR(1, W1);
275*4e366538SXin Li   LOAD_LINE_PAIR(2, W2);
276*4e366538SXin Li   LOAD_LINE_PAIR(3, W3);
277*4e366538SXin Li   LOAD_LINE_PAIR(4, W2);
278*4e366538SXin Li   LOAD_LINE_PAIR(5, W1);
279*4e366538SXin Li   LOAD_LINE_PAIR(6, W0);
280*4e366538SXin Li 
281*4e366538SXin Li   ADD_AND_STORE_FOUR_EPI32(x, xm);
282*4e366538SXin Li   ADD_AND_STORE_FOUR_EPI32(y, ym);
283*4e366538SXin Li   ADD_AND_STORE_FOUR_EPI32(xx, xxm);
284*4e366538SXin Li   ADD_AND_STORE_FOUR_EPI32(xy, xym);
285*4e366538SXin Li   ADD_AND_STORE_FOUR_EPI32(yy, yym);
286*4e366538SXin Li 
287*4e366538SXin Li #undef LOAD_LINE_PAIR
288*4e366538SXin Li #undef ADD_AND_STORE_FOUR_EPI32
289*4e366538SXin Li #endif
290*4e366538SXin Li 
291*4e366538SXin Li   return FinalizeSSIM(area_weight, xm, ym, xxm, xym, yym);
292*4e366538SXin Li }
293*4e366538SXin Li 
start_max(int x,int y)294*4e366538SXin Li static int start_max(int x, int y) {
295*4e366538SXin Li   return (x > y) ? x : y;
296*4e366538SXin Li }
297*4e366538SXin Li 
CalcSSIM(const uint8_t * org,const uint8_t * rec,const int image_width,const int image_height)298*4e366538SXin Li double CalcSSIM(const uint8_t* org,
299*4e366538SXin Li                 const uint8_t* rec,
300*4e366538SXin Li                 const int image_width,
301*4e366538SXin Li                 const int image_height) {
302*4e366538SXin Li   double SSIM = 0.;
303*4e366538SXin Li   const int KERNEL_Y = (image_height < KERNEL) ? image_height : KERNEL;
304*4e366538SXin Li   const int KERNEL_X = (image_width < KERNEL) ? image_width : KERNEL;
305*4e366538SXin Li   const int start_x = start_max(image_width - 8 + KERNEL_X, KERNEL_X);
306*4e366538SXin Li   const int start_y = start_max(image_height - KERNEL_Y, KERNEL_Y);
307*4e366538SXin Li   const int stride = image_width;
308*4e366538SXin Li 
309*4e366538SXin Li   for (int j = 0; j < KERNEL_Y; ++j) {
310*4e366538SXin Li     for (int i = 0; i < image_width; ++i) {
311*4e366538SXin Li       SSIM += GetSSIM(org, rec, i, j, image_width, image_height, stride);
312*4e366538SXin Li     }
313*4e366538SXin Li   }
314*4e366538SXin Li 
315*4e366538SXin Li #ifdef _OPENMP
316*4e366538SXin Li #pragma omp parallel for reduction(+ : SSIM)
317*4e366538SXin Li #endif
318*4e366538SXin Li   for (int j = KERNEL_Y; j < image_height - KERNEL_Y; ++j) {
319*4e366538SXin Li     for (int i = 0; i < KERNEL_X; ++i) {
320*4e366538SXin Li       SSIM += GetSSIM(org, rec, i, j, image_width, image_height, stride);
321*4e366538SXin Li     }
322*4e366538SXin Li     for (int i = KERNEL_X; i < start_x; ++i) {
323*4e366538SXin Li       SSIM += GetSSIMFullKernel(org, rec, i, j, stride, kiW[0]);
324*4e366538SXin Li     }
325*4e366538SXin Li     if (start_x < image_width) {
326*4e366538SXin Li       // GetSSIMFullKernel() needs to be able to read 8 pixels (in SSE2). So we
327*4e366538SXin Li       // copy the 8 rightmost pixels on a cache area, and pad this area with
328*4e366538SXin Li       // zeros which won't contribute to the overall SSIM value (but we need
329*4e366538SXin Li       // to pass the correct normalizing constant!). By using this cache, we can
330*4e366538SXin Li       // still call GetSSIMFullKernel() instead of the slower GetSSIM().
331*4e366538SXin Li       // NOTE: we could use similar method for the left-most pixels too.
332*4e366538SXin Li       const int kScratchWidth = 8;
333*4e366538SXin Li       const int kScratchStride = kScratchWidth + KERNEL + 1;
334*4e366538SXin Li       uint8_t scratch_org[KERNEL_SIZE * kScratchStride] = {0};
335*4e366538SXin Li       uint8_t scratch_rec[KERNEL_SIZE * kScratchStride] = {0};
336*4e366538SXin Li 
337*4e366538SXin Li       for (int k = 0; k < KERNEL_SIZE; ++k) {
338*4e366538SXin Li         const int offset =
339*4e366538SXin Li             (j - KERNEL + k) * stride + image_width - kScratchWidth;
340*4e366538SXin Li         memcpy(scratch_org + k * kScratchStride, org + offset, kScratchWidth);
341*4e366538SXin Li         memcpy(scratch_rec + k * kScratchStride, rec + offset, kScratchWidth);
342*4e366538SXin Li       }
343*4e366538SXin Li       for (int k = 0; k <= KERNEL_X + 1; ++k) {
344*4e366538SXin Li         SSIM += GetSSIMFullKernel(scratch_org, scratch_rec, KERNEL + k, KERNEL,
345*4e366538SXin Li                                   kScratchStride, kiW[k]);
346*4e366538SXin Li       }
347*4e366538SXin Li     }
348*4e366538SXin Li   }
349*4e366538SXin Li 
350*4e366538SXin Li   for (int j = start_y; j < image_height; ++j) {
351*4e366538SXin Li     for (int i = 0; i < image_width; ++i) {
352*4e366538SXin Li       SSIM += GetSSIM(org, rec, i, j, image_width, image_height, stride);
353*4e366538SXin Li     }
354*4e366538SXin Li   }
355*4e366538SXin Li   return SSIM;
356*4e366538SXin Li }
357*4e366538SXin Li 
CalcLSSIM(double ssim)358*4e366538SXin Li double CalcLSSIM(double ssim) {
359*4e366538SXin Li   return -10.0 * log10(1.0 - ssim);
360*4e366538SXin Li }
361*4e366538SXin Li 
362*4e366538SXin Li #ifdef __cplusplus
363*4e366538SXin Li }  // extern "C"
364*4e366538SXin Li #endif
365