1*01826a49SYabin Cui /*
2*01826a49SYabin Cui * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui * All rights reserved.
4*01826a49SYabin Cui *
5*01826a49SYabin Cui * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui */
10*01826a49SYabin Cui
11*01826a49SYabin Cui /*-*************************************
12*01826a49SYabin Cui * Dependencies
13*01826a49SYabin Cui ***************************************/
14*01826a49SYabin Cui
15*01826a49SYabin Cui /* Currently relies on qsort when combining contiguous matches. This can probably
16*01826a49SYabin Cui * be avoided but would require changes to the algorithm. The qsort is far from
17*01826a49SYabin Cui * the bottleneck in this algorithm even for medium sized files so it's probably
18*01826a49SYabin Cui * not worth trying to address */
19*01826a49SYabin Cui #include <stdlib.h>
20*01826a49SYabin Cui #include <assert.h>
21*01826a49SYabin Cui
22*01826a49SYabin Cui #include "zstd_edist.h"
23*01826a49SYabin Cui #include "mem.h"
24*01826a49SYabin Cui
25*01826a49SYabin Cui /*-*************************************
26*01826a49SYabin Cui * Constants
27*01826a49SYabin Cui ***************************************/
28*01826a49SYabin Cui
29*01826a49SYabin Cui /* Just a sential for the entries of the diagonal matrix */
30*01826a49SYabin Cui #define ZSTD_EDIST_DIAG_MAX (S32)(1 << 30)
31*01826a49SYabin Cui
32*01826a49SYabin Cui /* How large should a snake be to be considered a 'big' snake.
33*01826a49SYabin Cui * For an explanation of what a 'snake' is with respect to the
34*01826a49SYabin Cui * edit distance matrix, see the linked paper in zstd_edist.h */
35*01826a49SYabin Cui #define ZSTD_EDIST_SNAKE_THRESH 20
36*01826a49SYabin Cui
37*01826a49SYabin Cui /* After how many iterations should we start to use the heuristic
38*01826a49SYabin Cui * based on 'big' snakes */
39*01826a49SYabin Cui #define ZSTD_EDIST_SNAKE_ITER_THRESH 200
40*01826a49SYabin Cui
41*01826a49SYabin Cui /* After how many iterations should be just give up and take
42*01826a49SYabin Cui * the best available edit script for this round */
43*01826a49SYabin Cui #define ZSTD_EDIST_EXPENSIVE_THRESH 1024
44*01826a49SYabin Cui
45*01826a49SYabin Cui /*-*************************************
46*01826a49SYabin Cui * Structures
47*01826a49SYabin Cui ***************************************/
48*01826a49SYabin Cui
49*01826a49SYabin Cui typedef struct {
50*01826a49SYabin Cui U32 dictIdx;
51*01826a49SYabin Cui U32 srcIdx;
52*01826a49SYabin Cui U32 matchLength;
53*01826a49SYabin Cui } ZSTD_eDist_match;
54*01826a49SYabin Cui
55*01826a49SYabin Cui typedef struct {
56*01826a49SYabin Cui const BYTE* dict;
57*01826a49SYabin Cui const BYTE* src;
58*01826a49SYabin Cui size_t dictSize;
59*01826a49SYabin Cui size_t srcSize;
60*01826a49SYabin Cui S32* forwardDiag; /* Entries of the forward diagonal stored here */
61*01826a49SYabin Cui S32* backwardDiag; /* Entries of the backward diagonal stored here.
62*01826a49SYabin Cui * Note: this buffer and the 'forwardDiag' buffer
63*01826a49SYabin Cui * are contiguous. See the ZSTD_eDist_genSequences */
64*01826a49SYabin Cui ZSTD_eDist_match* matches; /* Accumulate matches of length 1 in this buffer.
65*01826a49SYabin Cui * In a subsequence post-processing step, we combine
66*01826a49SYabin Cui * contiguous matches. */
67*01826a49SYabin Cui U32 nbMatches;
68*01826a49SYabin Cui } ZSTD_eDist_state;
69*01826a49SYabin Cui
70*01826a49SYabin Cui typedef struct {
71*01826a49SYabin Cui S32 dictMid; /* The mid diagonal for the dictionary */
72*01826a49SYabin Cui S32 srcMid; /* The mid diagonal for the source */
73*01826a49SYabin Cui int lowUseHeuristics; /* Should we use heuristics for the low part */
74*01826a49SYabin Cui int highUseHeuristics; /* Should we use heuristics for the high part */
75*01826a49SYabin Cui } ZSTD_eDist_partition;
76*01826a49SYabin Cui
77*01826a49SYabin Cui /*-*************************************
78*01826a49SYabin Cui * Internal
79*01826a49SYabin Cui ***************************************/
80*01826a49SYabin Cui
ZSTD_eDist_diag(ZSTD_eDist_state * state,ZSTD_eDist_partition * partition,S32 dictLow,S32 dictHigh,S32 srcLow,S32 srcHigh,int useHeuristics)81*01826a49SYabin Cui static void ZSTD_eDist_diag(ZSTD_eDist_state* state,
82*01826a49SYabin Cui ZSTD_eDist_partition* partition,
83*01826a49SYabin Cui S32 dictLow, S32 dictHigh, S32 srcLow,
84*01826a49SYabin Cui S32 srcHigh, int useHeuristics)
85*01826a49SYabin Cui {
86*01826a49SYabin Cui S32* const forwardDiag = state->forwardDiag;
87*01826a49SYabin Cui S32* const backwardDiag = state->backwardDiag;
88*01826a49SYabin Cui const BYTE* const dict = state->dict;
89*01826a49SYabin Cui const BYTE* const src = state->src;
90*01826a49SYabin Cui
91*01826a49SYabin Cui S32 const diagMin = dictLow - srcHigh;
92*01826a49SYabin Cui S32 const diagMax = dictHigh - srcLow;
93*01826a49SYabin Cui S32 const forwardMid = dictLow - srcLow;
94*01826a49SYabin Cui S32 const backwardMid = dictHigh - srcHigh;
95*01826a49SYabin Cui
96*01826a49SYabin Cui S32 forwardMin = forwardMid;
97*01826a49SYabin Cui S32 forwardMax = forwardMid;
98*01826a49SYabin Cui S32 backwardMin = backwardMid;
99*01826a49SYabin Cui S32 backwardMax = backwardMid;
100*01826a49SYabin Cui int odd = (forwardMid - backwardMid) & 1;
101*01826a49SYabin Cui U32 iterations;
102*01826a49SYabin Cui
103*01826a49SYabin Cui forwardDiag[forwardMid] = dictLow;
104*01826a49SYabin Cui backwardDiag[backwardMid] = dictHigh;
105*01826a49SYabin Cui
106*01826a49SYabin Cui /* Main loop for updating diag entries. Unless useHeuristics is
107*01826a49SYabin Cui * set to false, this loop will run until it finds the minimal
108*01826a49SYabin Cui * edit script */
109*01826a49SYabin Cui for (iterations = 1;;iterations++) {
110*01826a49SYabin Cui S32 diag;
111*01826a49SYabin Cui int bigSnake = 0;
112*01826a49SYabin Cui
113*01826a49SYabin Cui if (forwardMin > diagMin) {
114*01826a49SYabin Cui forwardMin--;
115*01826a49SYabin Cui forwardDiag[forwardMin - 1] = -1;
116*01826a49SYabin Cui } else {
117*01826a49SYabin Cui forwardMin++;
118*01826a49SYabin Cui }
119*01826a49SYabin Cui
120*01826a49SYabin Cui if (forwardMax < diagMax) {
121*01826a49SYabin Cui forwardMax++;
122*01826a49SYabin Cui forwardDiag[forwardMax + 1] = -1;
123*01826a49SYabin Cui } else {
124*01826a49SYabin Cui forwardMax--;
125*01826a49SYabin Cui }
126*01826a49SYabin Cui
127*01826a49SYabin Cui for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
128*01826a49SYabin Cui S32 dictIdx;
129*01826a49SYabin Cui S32 srcIdx;
130*01826a49SYabin Cui S32 low = forwardDiag[diag - 1];
131*01826a49SYabin Cui S32 high = forwardDiag[diag + 1];
132*01826a49SYabin Cui S32 dictIdx0 = low < high ? high : low + 1;
133*01826a49SYabin Cui
134*01826a49SYabin Cui for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
135*01826a49SYabin Cui dictIdx < dictHigh && srcIdx < srcHigh && dict[dictIdx] == src[srcIdx];
136*01826a49SYabin Cui dictIdx++, srcIdx++) continue;
137*01826a49SYabin Cui
138*01826a49SYabin Cui if (dictIdx - dictIdx0 > ZSTD_EDIST_SNAKE_THRESH)
139*01826a49SYabin Cui bigSnake = 1;
140*01826a49SYabin Cui
141*01826a49SYabin Cui forwardDiag[diag] = dictIdx;
142*01826a49SYabin Cui
143*01826a49SYabin Cui if (odd && backwardMin <= diag && diag <= backwardMax && backwardDiag[diag] <= dictIdx) {
144*01826a49SYabin Cui partition->dictMid = dictIdx;
145*01826a49SYabin Cui partition->srcMid = srcIdx;
146*01826a49SYabin Cui partition->lowUseHeuristics = 0;
147*01826a49SYabin Cui partition->highUseHeuristics = 0;
148*01826a49SYabin Cui return;
149*01826a49SYabin Cui }
150*01826a49SYabin Cui }
151*01826a49SYabin Cui
152*01826a49SYabin Cui if (backwardMin > diagMin) {
153*01826a49SYabin Cui backwardMin--;
154*01826a49SYabin Cui backwardDiag[backwardMin - 1] = ZSTD_EDIST_DIAG_MAX;
155*01826a49SYabin Cui } else {
156*01826a49SYabin Cui backwardMin++;
157*01826a49SYabin Cui }
158*01826a49SYabin Cui
159*01826a49SYabin Cui if (backwardMax < diagMax) {
160*01826a49SYabin Cui backwardMax++;
161*01826a49SYabin Cui backwardDiag[backwardMax + 1] = ZSTD_EDIST_DIAG_MAX;
162*01826a49SYabin Cui } else {
163*01826a49SYabin Cui backwardMax--;
164*01826a49SYabin Cui }
165*01826a49SYabin Cui
166*01826a49SYabin Cui
167*01826a49SYabin Cui for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
168*01826a49SYabin Cui S32 dictIdx;
169*01826a49SYabin Cui S32 srcIdx;
170*01826a49SYabin Cui S32 low = backwardDiag[diag - 1];
171*01826a49SYabin Cui S32 high = backwardDiag[diag + 1];
172*01826a49SYabin Cui S32 dictIdx0 = low < high ? low : high - 1;
173*01826a49SYabin Cui
174*01826a49SYabin Cui for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
175*01826a49SYabin Cui dictLow < dictIdx && srcLow < srcIdx && dict[dictIdx - 1] == src[srcIdx - 1];
176*01826a49SYabin Cui dictIdx--, srcIdx--) continue;
177*01826a49SYabin Cui
178*01826a49SYabin Cui if (dictIdx0 - dictIdx > ZSTD_EDIST_SNAKE_THRESH)
179*01826a49SYabin Cui bigSnake = 1;
180*01826a49SYabin Cui
181*01826a49SYabin Cui backwardDiag[diag] = dictIdx;
182*01826a49SYabin Cui
183*01826a49SYabin Cui if (!odd && forwardMin <= diag && diag <= forwardMax && dictIdx <= forwardDiag[diag]) {
184*01826a49SYabin Cui partition->dictMid = dictIdx;
185*01826a49SYabin Cui partition->srcMid = srcIdx;
186*01826a49SYabin Cui partition->lowUseHeuristics = 0;
187*01826a49SYabin Cui partition->highUseHeuristics = 0;
188*01826a49SYabin Cui return;
189*01826a49SYabin Cui }
190*01826a49SYabin Cui }
191*01826a49SYabin Cui
192*01826a49SYabin Cui if (!useHeuristics)
193*01826a49SYabin Cui continue;
194*01826a49SYabin Cui
195*01826a49SYabin Cui /* Everything under this point is a heuristic. Using these will
196*01826a49SYabin Cui * substantially speed up the match finding. In some cases, taking
197*01826a49SYabin Cui * the total match finding time from several minutes to seconds.
198*01826a49SYabin Cui * Of course, the caveat is that the edit script found may no longer
199*01826a49SYabin Cui * be optimal */
200*01826a49SYabin Cui
201*01826a49SYabin Cui /* Big snake heuristic */
202*01826a49SYabin Cui if (iterations > ZSTD_EDIST_SNAKE_ITER_THRESH && bigSnake) {
203*01826a49SYabin Cui {
204*01826a49SYabin Cui S32 best = 0;
205*01826a49SYabin Cui
206*01826a49SYabin Cui for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
207*01826a49SYabin Cui S32 diagDiag = diag - forwardMid;
208*01826a49SYabin Cui S32 dictIdx = forwardDiag[diag];
209*01826a49SYabin Cui S32 srcIdx = dictIdx - diag;
210*01826a49SYabin Cui S32 v = (dictIdx - dictLow) * 2 - diagDiag;
211*01826a49SYabin Cui
212*01826a49SYabin Cui if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
213*01826a49SYabin Cui if (v > best
214*01826a49SYabin Cui && dictLow + ZSTD_EDIST_SNAKE_THRESH <= dictIdx && dictIdx <= dictHigh
215*01826a49SYabin Cui && srcLow + ZSTD_EDIST_SNAKE_THRESH <= srcIdx && srcIdx <= srcHigh) {
216*01826a49SYabin Cui S32 k;
217*01826a49SYabin Cui for (k = 1; dict[dictIdx - k] == src[srcIdx - k]; k++) {
218*01826a49SYabin Cui if (k == ZSTD_EDIST_SNAKE_THRESH) {
219*01826a49SYabin Cui best = v;
220*01826a49SYabin Cui partition->dictMid = dictIdx;
221*01826a49SYabin Cui partition->srcMid = srcIdx;
222*01826a49SYabin Cui break;
223*01826a49SYabin Cui }
224*01826a49SYabin Cui }
225*01826a49SYabin Cui }
226*01826a49SYabin Cui }
227*01826a49SYabin Cui }
228*01826a49SYabin Cui
229*01826a49SYabin Cui if (best > 0) {
230*01826a49SYabin Cui partition->lowUseHeuristics = 0;
231*01826a49SYabin Cui partition->highUseHeuristics = 1;
232*01826a49SYabin Cui return;
233*01826a49SYabin Cui }
234*01826a49SYabin Cui }
235*01826a49SYabin Cui
236*01826a49SYabin Cui {
237*01826a49SYabin Cui S32 best = 0;
238*01826a49SYabin Cui
239*01826a49SYabin Cui for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
240*01826a49SYabin Cui S32 diagDiag = diag - backwardMid;
241*01826a49SYabin Cui S32 dictIdx = backwardDiag[diag];
242*01826a49SYabin Cui S32 srcIdx = dictIdx - diag;
243*01826a49SYabin Cui S32 v = (dictHigh - dictIdx) * 2 + diagDiag;
244*01826a49SYabin Cui
245*01826a49SYabin Cui if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
246*01826a49SYabin Cui if (v > best
247*01826a49SYabin Cui && dictLow < dictIdx && dictIdx <= dictHigh - ZSTD_EDIST_SNAKE_THRESH
248*01826a49SYabin Cui && srcLow < srcIdx && srcIdx <= srcHigh - ZSTD_EDIST_SNAKE_THRESH) {
249*01826a49SYabin Cui int k;
250*01826a49SYabin Cui for (k = 0; dict[dictIdx + k] == src[srcIdx + k]; k++) {
251*01826a49SYabin Cui if (k == ZSTD_EDIST_SNAKE_THRESH - 1) {
252*01826a49SYabin Cui best = v;
253*01826a49SYabin Cui partition->dictMid = dictIdx;
254*01826a49SYabin Cui partition->srcMid = srcIdx;
255*01826a49SYabin Cui break;
256*01826a49SYabin Cui }
257*01826a49SYabin Cui }
258*01826a49SYabin Cui }
259*01826a49SYabin Cui }
260*01826a49SYabin Cui }
261*01826a49SYabin Cui
262*01826a49SYabin Cui if (best > 0) {
263*01826a49SYabin Cui partition->lowUseHeuristics = 1;
264*01826a49SYabin Cui partition->highUseHeuristics = 0;
265*01826a49SYabin Cui return;
266*01826a49SYabin Cui }
267*01826a49SYabin Cui }
268*01826a49SYabin Cui }
269*01826a49SYabin Cui
270*01826a49SYabin Cui /* More general 'too expensive' heuristic */
271*01826a49SYabin Cui if (iterations >= ZSTD_EDIST_EXPENSIVE_THRESH) {
272*01826a49SYabin Cui S32 forwardDictSrcBest;
273*01826a49SYabin Cui S32 forwardDictBest = 0;
274*01826a49SYabin Cui S32 backwardDictSrcBest;
275*01826a49SYabin Cui S32 backwardDictBest = 0;
276*01826a49SYabin Cui
277*01826a49SYabin Cui forwardDictSrcBest = -1;
278*01826a49SYabin Cui for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
279*01826a49SYabin Cui S32 dictIdx = MIN(forwardDiag[diag], dictHigh);
280*01826a49SYabin Cui S32 srcIdx = dictIdx - diag;
281*01826a49SYabin Cui
282*01826a49SYabin Cui if (srcHigh < srcIdx) {
283*01826a49SYabin Cui dictIdx = srcHigh + diag;
284*01826a49SYabin Cui srcIdx = srcHigh;
285*01826a49SYabin Cui }
286*01826a49SYabin Cui
287*01826a49SYabin Cui if (forwardDictSrcBest < dictIdx + srcIdx) {
288*01826a49SYabin Cui forwardDictSrcBest = dictIdx + srcIdx;
289*01826a49SYabin Cui forwardDictBest = dictIdx;
290*01826a49SYabin Cui }
291*01826a49SYabin Cui }
292*01826a49SYabin Cui
293*01826a49SYabin Cui backwardDictSrcBest = ZSTD_EDIST_DIAG_MAX;
294*01826a49SYabin Cui for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
295*01826a49SYabin Cui S32 dictIdx = MAX(dictLow, backwardDiag[diag]);
296*01826a49SYabin Cui S32 srcIdx = dictIdx - diag;
297*01826a49SYabin Cui
298*01826a49SYabin Cui if (srcIdx < srcLow) {
299*01826a49SYabin Cui dictIdx = srcLow + diag;
300*01826a49SYabin Cui srcIdx = srcLow;
301*01826a49SYabin Cui }
302*01826a49SYabin Cui
303*01826a49SYabin Cui if (dictIdx + srcIdx < backwardDictSrcBest) {
304*01826a49SYabin Cui backwardDictSrcBest = dictIdx + srcIdx;
305*01826a49SYabin Cui backwardDictBest = dictIdx;
306*01826a49SYabin Cui }
307*01826a49SYabin Cui }
308*01826a49SYabin Cui
309*01826a49SYabin Cui if ((dictHigh + srcHigh) - backwardDictSrcBest < forwardDictSrcBest - (dictLow + srcLow)) {
310*01826a49SYabin Cui partition->dictMid = forwardDictBest;
311*01826a49SYabin Cui partition->srcMid = forwardDictSrcBest - forwardDictBest;
312*01826a49SYabin Cui partition->lowUseHeuristics = 0;
313*01826a49SYabin Cui partition->highUseHeuristics = 1;
314*01826a49SYabin Cui } else {
315*01826a49SYabin Cui partition->dictMid = backwardDictBest;
316*01826a49SYabin Cui partition->srcMid = backwardDictSrcBest - backwardDictBest;
317*01826a49SYabin Cui partition->lowUseHeuristics = 1;
318*01826a49SYabin Cui partition->highUseHeuristics = 0;
319*01826a49SYabin Cui }
320*01826a49SYabin Cui return;
321*01826a49SYabin Cui }
322*01826a49SYabin Cui }
323*01826a49SYabin Cui }
324*01826a49SYabin Cui
ZSTD_eDist_insertMatch(ZSTD_eDist_state * state,S32 const dictIdx,S32 const srcIdx)325*01826a49SYabin Cui static void ZSTD_eDist_insertMatch(ZSTD_eDist_state* state,
326*01826a49SYabin Cui S32 const dictIdx, S32 const srcIdx)
327*01826a49SYabin Cui {
328*01826a49SYabin Cui state->matches[state->nbMatches].dictIdx = dictIdx;
329*01826a49SYabin Cui state->matches[state->nbMatches].srcIdx = srcIdx;
330*01826a49SYabin Cui state->matches[state->nbMatches].matchLength = 1;
331*01826a49SYabin Cui state->nbMatches++;
332*01826a49SYabin Cui }
333*01826a49SYabin Cui
ZSTD_eDist_compare(ZSTD_eDist_state * state,S32 dictLow,S32 dictHigh,S32 srcLow,S32 srcHigh,int useHeuristics)334*01826a49SYabin Cui static int ZSTD_eDist_compare(ZSTD_eDist_state* state,
335*01826a49SYabin Cui S32 dictLow, S32 dictHigh, S32 srcLow,
336*01826a49SYabin Cui S32 srcHigh, int useHeuristics)
337*01826a49SYabin Cui {
338*01826a49SYabin Cui const BYTE* const dict = state->dict;
339*01826a49SYabin Cui const BYTE* const src = state->src;
340*01826a49SYabin Cui
341*01826a49SYabin Cui /* Found matches while traversing from the low end */
342*01826a49SYabin Cui while (dictLow < dictHigh && srcLow < srcHigh && dict[dictLow] == src[srcLow]) {
343*01826a49SYabin Cui ZSTD_eDist_insertMatch(state, dictLow, srcLow);
344*01826a49SYabin Cui dictLow++;
345*01826a49SYabin Cui srcLow++;
346*01826a49SYabin Cui }
347*01826a49SYabin Cui
348*01826a49SYabin Cui /* Found matches while traversing from the high end */
349*01826a49SYabin Cui while (dictLow < dictHigh && srcLow < srcHigh && dict[dictHigh - 1] == src[srcHigh - 1]) {
350*01826a49SYabin Cui ZSTD_eDist_insertMatch(state, dictHigh - 1, srcHigh - 1);
351*01826a49SYabin Cui dictHigh--;
352*01826a49SYabin Cui srcHigh--;
353*01826a49SYabin Cui }
354*01826a49SYabin Cui
355*01826a49SYabin Cui /* If the low and high end end up touching. If we wanted to make
356*01826a49SYabin Cui * note of the differences like most diffing algorithms do, we would
357*01826a49SYabin Cui * do so here. In our case, we're only concerned with matches
358*01826a49SYabin Cui * Note: if you wanted to find the edit distance of the algorithm,
359*01826a49SYabin Cui * you could just accumulate the cost for an insertion/deletion
360*01826a49SYabin Cui * below. */
361*01826a49SYabin Cui if (dictLow == dictHigh) {
362*01826a49SYabin Cui while (srcLow < srcHigh) {
363*01826a49SYabin Cui /* Reaching this point means inserting src[srcLow] into
364*01826a49SYabin Cui * the current position of dict */
365*01826a49SYabin Cui srcLow++;
366*01826a49SYabin Cui }
367*01826a49SYabin Cui } else if (srcLow == srcHigh) {
368*01826a49SYabin Cui while (dictLow < dictHigh) {
369*01826a49SYabin Cui /* Reaching this point means deleting dict[dictLow] from
370*01826a49SYabin Cui * the current position of dict */
371*01826a49SYabin Cui dictLow++;
372*01826a49SYabin Cui }
373*01826a49SYabin Cui } else {
374*01826a49SYabin Cui ZSTD_eDist_partition partition;
375*01826a49SYabin Cui partition.dictMid = 0;
376*01826a49SYabin Cui partition.srcMid = 0;
377*01826a49SYabin Cui ZSTD_eDist_diag(state, &partition, dictLow, dictHigh,
378*01826a49SYabin Cui srcLow, srcHigh, useHeuristics);
379*01826a49SYabin Cui if (ZSTD_eDist_compare(state, dictLow, partition.dictMid,
380*01826a49SYabin Cui srcLow, partition.srcMid, partition.lowUseHeuristics))
381*01826a49SYabin Cui return 1;
382*01826a49SYabin Cui if (ZSTD_eDist_compare(state, partition.dictMid, dictHigh,
383*01826a49SYabin Cui partition.srcMid, srcHigh, partition.highUseHeuristics))
384*01826a49SYabin Cui return 1;
385*01826a49SYabin Cui }
386*01826a49SYabin Cui
387*01826a49SYabin Cui return 0;
388*01826a49SYabin Cui }
389*01826a49SYabin Cui
ZSTD_eDist_matchComp(const void * p,const void * q)390*01826a49SYabin Cui static int ZSTD_eDist_matchComp(const void* p, const void* q)
391*01826a49SYabin Cui {
392*01826a49SYabin Cui S32 const l = ((ZSTD_eDist_match*)p)->srcIdx;
393*01826a49SYabin Cui S32 const r = ((ZSTD_eDist_match*)q)->srcIdx;
394*01826a49SYabin Cui return (l - r);
395*01826a49SYabin Cui }
396*01826a49SYabin Cui
397*01826a49SYabin Cui /* The matches from the approach above will all be of the form
398*01826a49SYabin Cui * (dictIdx, srcIdx, 1). This method combines contiguous matches
399*01826a49SYabin Cui * of length MINMATCH or greater. Matches less than MINMATCH
400*01826a49SYabin Cui * are discarded */
ZSTD_eDist_combineMatches(ZSTD_eDist_state * state)401*01826a49SYabin Cui static void ZSTD_eDist_combineMatches(ZSTD_eDist_state* state)
402*01826a49SYabin Cui {
403*01826a49SYabin Cui /* Create a new buffer to put the combined matches into
404*01826a49SYabin Cui * and memcpy to state->matches after */
405*01826a49SYabin Cui ZSTD_eDist_match* combinedMatches =
406*01826a49SYabin Cui ZSTD_malloc(state->nbMatches * sizeof(ZSTD_eDist_match),
407*01826a49SYabin Cui ZSTD_defaultCMem);
408*01826a49SYabin Cui
409*01826a49SYabin Cui U32 nbCombinedMatches = 1;
410*01826a49SYabin Cui size_t i;
411*01826a49SYabin Cui
412*01826a49SYabin Cui /* Make sure that the srcIdx and dictIdx are in sorted order.
413*01826a49SYabin Cui * The combination step won't work otherwise */
414*01826a49SYabin Cui qsort(state->matches, state->nbMatches, sizeof(ZSTD_eDist_match), ZSTD_eDist_matchComp);
415*01826a49SYabin Cui
416*01826a49SYabin Cui memcpy(combinedMatches, state->matches, sizeof(ZSTD_eDist_match));
417*01826a49SYabin Cui for (i = 1; i < state->nbMatches; i++) {
418*01826a49SYabin Cui ZSTD_eDist_match const match = state->matches[i];
419*01826a49SYabin Cui ZSTD_eDist_match const combinedMatch =
420*01826a49SYabin Cui combinedMatches[nbCombinedMatches - 1];
421*01826a49SYabin Cui if (combinedMatch.srcIdx + combinedMatch.matchLength == match.srcIdx &&
422*01826a49SYabin Cui combinedMatch.dictIdx + combinedMatch.matchLength == match.dictIdx) {
423*01826a49SYabin Cui combinedMatches[nbCombinedMatches - 1].matchLength++;
424*01826a49SYabin Cui } else {
425*01826a49SYabin Cui /* Discard matches that are less than MINMATCH */
426*01826a49SYabin Cui if (combinedMatches[nbCombinedMatches - 1].matchLength < MINMATCH) {
427*01826a49SYabin Cui nbCombinedMatches--;
428*01826a49SYabin Cui }
429*01826a49SYabin Cui
430*01826a49SYabin Cui memcpy(combinedMatches + nbCombinedMatches,
431*01826a49SYabin Cui state->matches + i, sizeof(ZSTD_eDist_match));
432*01826a49SYabin Cui nbCombinedMatches++;
433*01826a49SYabin Cui }
434*01826a49SYabin Cui }
435*01826a49SYabin Cui memcpy(state->matches, combinedMatches, nbCombinedMatches * sizeof(ZSTD_eDist_match));
436*01826a49SYabin Cui state->nbMatches = nbCombinedMatches;
437*01826a49SYabin Cui ZSTD_free(combinedMatches, ZSTD_defaultCMem);
438*01826a49SYabin Cui }
439*01826a49SYabin Cui
ZSTD_eDist_convertMatchesToSequences(ZSTD_Sequence * sequences,ZSTD_eDist_state * state)440*01826a49SYabin Cui static size_t ZSTD_eDist_convertMatchesToSequences(ZSTD_Sequence* sequences,
441*01826a49SYabin Cui ZSTD_eDist_state* state)
442*01826a49SYabin Cui {
443*01826a49SYabin Cui const ZSTD_eDist_match* matches = state->matches;
444*01826a49SYabin Cui size_t const nbMatches = state->nbMatches;
445*01826a49SYabin Cui size_t const dictSize = state->dictSize;
446*01826a49SYabin Cui size_t nbSequences = 0;
447*01826a49SYabin Cui size_t i;
448*01826a49SYabin Cui for (i = 0; i < nbMatches; i++) {
449*01826a49SYabin Cui ZSTD_eDist_match const match = matches[i];
450*01826a49SYabin Cui U32 const litLength = !i ? match.srcIdx :
451*01826a49SYabin Cui match.srcIdx - (matches[i - 1].srcIdx + matches[i - 1].matchLength);
452*01826a49SYabin Cui U32 const offset = (match.srcIdx + dictSize) - match.dictIdx;
453*01826a49SYabin Cui U32 const matchLength = match.matchLength;
454*01826a49SYabin Cui sequences[nbSequences].offset = offset;
455*01826a49SYabin Cui sequences[nbSequences].litLength = litLength;
456*01826a49SYabin Cui sequences[nbSequences].matchLength = matchLength;
457*01826a49SYabin Cui nbSequences++;
458*01826a49SYabin Cui }
459*01826a49SYabin Cui return nbSequences;
460*01826a49SYabin Cui }
461*01826a49SYabin Cui
462*01826a49SYabin Cui /*-*************************************
463*01826a49SYabin Cui * Internal utils
464*01826a49SYabin Cui ***************************************/
465*01826a49SYabin Cui
ZSTD_eDist_hamingDist(const BYTE * const a,const BYTE * const b,size_t n)466*01826a49SYabin Cui static size_t ZSTD_eDist_hamingDist(const BYTE* const a,
467*01826a49SYabin Cui const BYTE* const b, size_t n)
468*01826a49SYabin Cui {
469*01826a49SYabin Cui size_t i;
470*01826a49SYabin Cui size_t dist = 0;
471*01826a49SYabin Cui for (i = 0; i < n; i++)
472*01826a49SYabin Cui dist += a[i] != b[i];
473*01826a49SYabin Cui return dist;
474*01826a49SYabin Cui }
475*01826a49SYabin Cui
476*01826a49SYabin Cui /* This is a pretty naive recursive implementation that should only
477*01826a49SYabin Cui * be used for quick tests obviously. Don't try and run this on a
478*01826a49SYabin Cui * GB file or something. There are faster implementations. Use those
479*01826a49SYabin Cui * if you need to run it for large files. */
ZSTD_eDist_levenshteinDist(const BYTE * const s,size_t const sn,const BYTE * const t,size_t const tn)480*01826a49SYabin Cui static size_t ZSTD_eDist_levenshteinDist(const BYTE* const s,
481*01826a49SYabin Cui size_t const sn, const BYTE* const t,
482*01826a49SYabin Cui size_t const tn)
483*01826a49SYabin Cui {
484*01826a49SYabin Cui size_t a, b, c;
485*01826a49SYabin Cui
486*01826a49SYabin Cui if (!sn)
487*01826a49SYabin Cui return tn;
488*01826a49SYabin Cui if (!tn)
489*01826a49SYabin Cui return sn;
490*01826a49SYabin Cui
491*01826a49SYabin Cui if (s[sn - 1] == t[tn - 1])
492*01826a49SYabin Cui return ZSTD_eDist_levenshteinDist(
493*01826a49SYabin Cui s, sn - 1, t, tn - 1);
494*01826a49SYabin Cui
495*01826a49SYabin Cui a = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn - 1);
496*01826a49SYabin Cui b = ZSTD_eDist_levenshteinDist(s, sn, t, tn - 1);
497*01826a49SYabin Cui c = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn);
498*01826a49SYabin Cui
499*01826a49SYabin Cui if (a > b)
500*01826a49SYabin Cui a = b;
501*01826a49SYabin Cui if (a > c)
502*01826a49SYabin Cui a = c;
503*01826a49SYabin Cui
504*01826a49SYabin Cui return a + 1;
505*01826a49SYabin Cui }
506*01826a49SYabin Cui
ZSTD_eDist_validateMatches(ZSTD_eDist_match * matches,size_t const nbMatches,const BYTE * const dict,size_t const dictSize,const BYTE * const src,size_t const srcSize)507*01826a49SYabin Cui static void ZSTD_eDist_validateMatches(ZSTD_eDist_match* matches,
508*01826a49SYabin Cui size_t const nbMatches, const BYTE* const dict,
509*01826a49SYabin Cui size_t const dictSize, const BYTE* const src,
510*01826a49SYabin Cui size_t const srcSize)
511*01826a49SYabin Cui {
512*01826a49SYabin Cui size_t i;
513*01826a49SYabin Cui for (i = 0; i < nbMatches; i++) {
514*01826a49SYabin Cui ZSTD_eDist_match match = matches[i];
515*01826a49SYabin Cui U32 const dictIdx = match.dictIdx;
516*01826a49SYabin Cui U32 const srcIdx = match.srcIdx;
517*01826a49SYabin Cui U32 const matchLength = match.matchLength;
518*01826a49SYabin Cui
519*01826a49SYabin Cui assert(dictIdx + matchLength < dictSize);
520*01826a49SYabin Cui assert(srcIdx + matchLength < srcSize);
521*01826a49SYabin Cui assert(!memcmp(dict + dictIdx, src + srcIdx, matchLength));
522*01826a49SYabin Cui }
523*01826a49SYabin Cui }
524*01826a49SYabin Cui
525*01826a49SYabin Cui /*-*************************************
526*01826a49SYabin Cui * API
527*01826a49SYabin Cui ***************************************/
528*01826a49SYabin Cui
ZSTD_eDist_genSequences(ZSTD_Sequence * sequences,const void * dict,size_t dictSize,const void * src,size_t srcSize,int useHeuristics)529*01826a49SYabin Cui size_t ZSTD_eDist_genSequences(ZSTD_Sequence* sequences,
530*01826a49SYabin Cui const void* dict, size_t dictSize,
531*01826a49SYabin Cui const void* src, size_t srcSize,
532*01826a49SYabin Cui int useHeuristics)
533*01826a49SYabin Cui {
534*01826a49SYabin Cui size_t const nbDiags = dictSize + srcSize + 3;
535*01826a49SYabin Cui S32* buffer = ZSTD_malloc(nbDiags * 2 * sizeof(S32), ZSTD_defaultCMem);
536*01826a49SYabin Cui ZSTD_eDist_state state;
537*01826a49SYabin Cui size_t nbSequences = 0;
538*01826a49SYabin Cui
539*01826a49SYabin Cui state.dict = (const BYTE*)dict;
540*01826a49SYabin Cui state.src = (const BYTE*)src;
541*01826a49SYabin Cui state.dictSize = dictSize;
542*01826a49SYabin Cui state.srcSize = srcSize;
543*01826a49SYabin Cui state.forwardDiag = buffer;
544*01826a49SYabin Cui state.backwardDiag = buffer + nbDiags;
545*01826a49SYabin Cui state.forwardDiag += srcSize + 1;
546*01826a49SYabin Cui state.backwardDiag += srcSize + 1;
547*01826a49SYabin Cui state.matches = ZSTD_malloc(srcSize * sizeof(ZSTD_eDist_match), ZSTD_defaultCMem);
548*01826a49SYabin Cui state.nbMatches = 0;
549*01826a49SYabin Cui
550*01826a49SYabin Cui ZSTD_eDist_compare(&state, 0, dictSize, 0, srcSize, 1);
551*01826a49SYabin Cui ZSTD_eDist_combineMatches(&state);
552*01826a49SYabin Cui nbSequences = ZSTD_eDist_convertMatchesToSequences(sequences, &state);
553*01826a49SYabin Cui
554*01826a49SYabin Cui ZSTD_free(buffer, ZSTD_defaultCMem);
555*01826a49SYabin Cui ZSTD_free(state.matches, ZSTD_defaultCMem);
556*01826a49SYabin Cui
557*01826a49SYabin Cui return nbSequences;
558*01826a49SYabin Cui }
559