xref: /aosp_15_r20/external/webp/src/dsp/ssim.c (revision b2055c353e87c8814eb2b6b1b11112a1562253bd)
1*b2055c35SXin Li // Copyright 2017 Google Inc. All Rights Reserved.
2*b2055c35SXin Li //
3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license
4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source
5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found
6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may
7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree.
8*b2055c35SXin Li // -----------------------------------------------------------------------------
9*b2055c35SXin Li //
10*b2055c35SXin Li // distortion calculation
11*b2055c35SXin Li //
12*b2055c35SXin Li // Author: Skal ([email protected])
13*b2055c35SXin Li 
14*b2055c35SXin Li #include <assert.h>
15*b2055c35SXin Li #include <stdlib.h>  // for abs()
16*b2055c35SXin Li 
17*b2055c35SXin Li #include "src/dsp/dsp.h"
18*b2055c35SXin Li 
19*b2055c35SXin Li #if !defined(WEBP_REDUCE_SIZE)
20*b2055c35SXin Li 
21*b2055c35SXin Li //------------------------------------------------------------------------------
22*b2055c35SXin Li // SSIM / PSNR
23*b2055c35SXin Li 
24*b2055c35SXin Li // hat-shaped filter. Sum of coefficients is equal to 16.
25*b2055c35SXin Li static const uint32_t kWeight[2 * VP8_SSIM_KERNEL + 1] = {
26*b2055c35SXin Li   1, 2, 3, 4, 3, 2, 1
27*b2055c35SXin Li };
28*b2055c35SXin Li static const uint32_t kWeightSum = 16 * 16;   // sum{kWeight}^2
29*b2055c35SXin Li 
SSIMCalculation(const VP8DistoStats * const stats,uint32_t N)30*b2055c35SXin Li static WEBP_INLINE double SSIMCalculation(
31*b2055c35SXin Li     const VP8DistoStats* const stats, uint32_t N  /*num samples*/) {
32*b2055c35SXin Li   const uint32_t w2 =  N * N;
33*b2055c35SXin Li   const uint32_t C1 = 20 * w2;
34*b2055c35SXin Li   const uint32_t C2 = 60 * w2;
35*b2055c35SXin Li   const uint32_t C3 = 8 * 8 * w2;   // 'dark' limit ~= 6
36*b2055c35SXin Li   const uint64_t xmxm = (uint64_t)stats->xm * stats->xm;
37*b2055c35SXin Li   const uint64_t ymym = (uint64_t)stats->ym * stats->ym;
38*b2055c35SXin Li   if (xmxm + ymym >= C3) {
39*b2055c35SXin Li     const int64_t xmym = (int64_t)stats->xm * stats->ym;
40*b2055c35SXin Li     const int64_t sxy = (int64_t)stats->xym * N - xmym;    // can be negative
41*b2055c35SXin Li     const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm;
42*b2055c35SXin Li     const uint64_t syy = (uint64_t)stats->yym * N - ymym;
43*b2055c35SXin Li     // we descale by 8 to prevent overflow during the fnum/fden multiply.
44*b2055c35SXin Li     const uint64_t num_S = (2 * (uint64_t)(sxy < 0 ? 0 : sxy) + C2) >> 8;
45*b2055c35SXin Li     const uint64_t den_S = (sxx + syy + C2) >> 8;
46*b2055c35SXin Li     const uint64_t fnum = (2 * xmym + C1) * num_S;
47*b2055c35SXin Li     const uint64_t fden = (xmxm + ymym + C1) * den_S;
48*b2055c35SXin Li     const double r = (double)fnum / fden;
49*b2055c35SXin Li     assert(r >= 0. && r <= 1.0);
50*b2055c35SXin Li     return r;
51*b2055c35SXin Li   }
52*b2055c35SXin Li   return 1.;   // area is too dark to contribute meaningfully
53*b2055c35SXin Li }
54*b2055c35SXin Li 
VP8SSIMFromStats(const VP8DistoStats * const stats)55*b2055c35SXin Li double VP8SSIMFromStats(const VP8DistoStats* const stats) {
56*b2055c35SXin Li   return SSIMCalculation(stats, kWeightSum);
57*b2055c35SXin Li }
58*b2055c35SXin Li 
VP8SSIMFromStatsClipped(const VP8DistoStats * const stats)59*b2055c35SXin Li double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats) {
60*b2055c35SXin Li   return SSIMCalculation(stats, stats->w);
61*b2055c35SXin Li }
62*b2055c35SXin Li 
SSIMGetClipped_C(const uint8_t * src1,int stride1,const uint8_t * src2,int stride2,int xo,int yo,int W,int H)63*b2055c35SXin Li static double SSIMGetClipped_C(const uint8_t* src1, int stride1,
64*b2055c35SXin Li                                const uint8_t* src2, int stride2,
65*b2055c35SXin Li                                int xo, int yo, int W, int H) {
66*b2055c35SXin Li   VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
67*b2055c35SXin Li   const int ymin = (yo - VP8_SSIM_KERNEL < 0) ? 0 : yo - VP8_SSIM_KERNEL;
68*b2055c35SXin Li   const int ymax = (yo + VP8_SSIM_KERNEL > H - 1) ? H - 1
69*b2055c35SXin Li                                                   : yo + VP8_SSIM_KERNEL;
70*b2055c35SXin Li   const int xmin = (xo - VP8_SSIM_KERNEL < 0) ? 0 : xo - VP8_SSIM_KERNEL;
71*b2055c35SXin Li   const int xmax = (xo + VP8_SSIM_KERNEL > W - 1) ? W - 1
72*b2055c35SXin Li                                                   : xo + VP8_SSIM_KERNEL;
73*b2055c35SXin Li   int x, y;
74*b2055c35SXin Li   src1 += ymin * stride1;
75*b2055c35SXin Li   src2 += ymin * stride2;
76*b2055c35SXin Li   for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
77*b2055c35SXin Li     for (x = xmin; x <= xmax; ++x) {
78*b2055c35SXin Li       const uint32_t w = kWeight[VP8_SSIM_KERNEL + x - xo]
79*b2055c35SXin Li                        * kWeight[VP8_SSIM_KERNEL + y - yo];
80*b2055c35SXin Li       const uint32_t s1 = src1[x];
81*b2055c35SXin Li       const uint32_t s2 = src2[x];
82*b2055c35SXin Li       stats.w   += w;
83*b2055c35SXin Li       stats.xm  += w * s1;
84*b2055c35SXin Li       stats.ym  += w * s2;
85*b2055c35SXin Li       stats.xxm += w * s1 * s1;
86*b2055c35SXin Li       stats.xym += w * s1 * s2;
87*b2055c35SXin Li       stats.yym += w * s2 * s2;
88*b2055c35SXin Li     }
89*b2055c35SXin Li   }
90*b2055c35SXin Li   return VP8SSIMFromStatsClipped(&stats);
91*b2055c35SXin Li }
92*b2055c35SXin Li 
SSIMGet_C(const uint8_t * src1,int stride1,const uint8_t * src2,int stride2)93*b2055c35SXin Li static double SSIMGet_C(const uint8_t* src1, int stride1,
94*b2055c35SXin Li                         const uint8_t* src2, int stride2) {
95*b2055c35SXin Li   VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
96*b2055c35SXin Li   int x, y;
97*b2055c35SXin Li   for (y = 0; y <= 2 * VP8_SSIM_KERNEL; ++y, src1 += stride1, src2 += stride2) {
98*b2055c35SXin Li     for (x = 0; x <= 2 * VP8_SSIM_KERNEL; ++x) {
99*b2055c35SXin Li       const uint32_t w = kWeight[x] * kWeight[y];
100*b2055c35SXin Li       const uint32_t s1 = src1[x];
101*b2055c35SXin Li       const uint32_t s2 = src2[x];
102*b2055c35SXin Li       stats.xm  += w * s1;
103*b2055c35SXin Li       stats.ym  += w * s2;
104*b2055c35SXin Li       stats.xxm += w * s1 * s1;
105*b2055c35SXin Li       stats.xym += w * s1 * s2;
106*b2055c35SXin Li       stats.yym += w * s2 * s2;
107*b2055c35SXin Li     }
108*b2055c35SXin Li   }
109*b2055c35SXin Li   return VP8SSIMFromStats(&stats);
110*b2055c35SXin Li }
111*b2055c35SXin Li 
112*b2055c35SXin Li #endif  // !defined(WEBP_REDUCE_SIZE)
113*b2055c35SXin Li 
114*b2055c35SXin Li //------------------------------------------------------------------------------
115*b2055c35SXin Li 
116*b2055c35SXin Li #if !defined(WEBP_DISABLE_STATS)
AccumulateSSE_C(const uint8_t * src1,const uint8_t * src2,int len)117*b2055c35SXin Li static uint32_t AccumulateSSE_C(const uint8_t* src1,
118*b2055c35SXin Li                                 const uint8_t* src2, int len) {
119*b2055c35SXin Li   int i;
120*b2055c35SXin Li   uint32_t sse2 = 0;
121*b2055c35SXin Li   assert(len <= 65535);  // to ensure that accumulation fits within uint32_t
122*b2055c35SXin Li   for (i = 0; i < len; ++i) {
123*b2055c35SXin Li     const int32_t diff = src1[i] - src2[i];
124*b2055c35SXin Li     sse2 += diff * diff;
125*b2055c35SXin Li   }
126*b2055c35SXin Li   return sse2;
127*b2055c35SXin Li }
128*b2055c35SXin Li #endif
129*b2055c35SXin Li 
130*b2055c35SXin Li //------------------------------------------------------------------------------
131*b2055c35SXin Li 
132*b2055c35SXin Li #if !defined(WEBP_REDUCE_SIZE)
133*b2055c35SXin Li VP8SSIMGetFunc VP8SSIMGet;
134*b2055c35SXin Li VP8SSIMGetClippedFunc VP8SSIMGetClipped;
135*b2055c35SXin Li #endif
136*b2055c35SXin Li #if !defined(WEBP_DISABLE_STATS)
137*b2055c35SXin Li VP8AccumulateSSEFunc VP8AccumulateSSE;
138*b2055c35SXin Li #endif
139*b2055c35SXin Li 
140*b2055c35SXin Li extern VP8CPUInfo VP8GetCPUInfo;
141*b2055c35SXin Li extern void VP8SSIMDspInitSSE2(void);
142*b2055c35SXin Li 
WEBP_DSP_INIT_FUNC(VP8SSIMDspInit)143*b2055c35SXin Li WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) {
144*b2055c35SXin Li #if !defined(WEBP_REDUCE_SIZE)
145*b2055c35SXin Li   VP8SSIMGetClipped = SSIMGetClipped_C;
146*b2055c35SXin Li   VP8SSIMGet = SSIMGet_C;
147*b2055c35SXin Li #endif
148*b2055c35SXin Li 
149*b2055c35SXin Li #if !defined(WEBP_DISABLE_STATS)
150*b2055c35SXin Li   VP8AccumulateSSE = AccumulateSSE_C;
151*b2055c35SXin Li #endif
152*b2055c35SXin Li 
153*b2055c35SXin Li   if (VP8GetCPUInfo != NULL) {
154*b2055c35SXin Li #if defined(WEBP_HAVE_SSE2)
155*b2055c35SXin Li     if (VP8GetCPUInfo(kSSE2)) {
156*b2055c35SXin Li       VP8SSIMDspInitSSE2();
157*b2055c35SXin Li     }
158*b2055c35SXin Li #endif
159*b2055c35SXin Li   }
160*b2055c35SXin Li }
161