xref: /aosp_15_r20/external/zstd/tests/external_matchfinder.c (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1*01826a49SYabin Cui /*
2*01826a49SYabin Cui  * Copyright (c) Yann Collet, Meta Platforms, Inc.
3*01826a49SYabin Cui  * All rights reserved.
4*01826a49SYabin Cui  *
5*01826a49SYabin Cui  * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui  * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui  * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui  */
10*01826a49SYabin Cui 
11*01826a49SYabin Cui #include "external_matchfinder.h"
12*01826a49SYabin Cui #include <string.h>
13*01826a49SYabin Cui #include "zstd_compress_internal.h"
14*01826a49SYabin Cui 
15*01826a49SYabin Cui #define HSIZE 1024
16*01826a49SYabin Cui static U32 const HLOG = 10;
17*01826a49SYabin Cui static U32 const MLS = 4;
18*01826a49SYabin Cui static U32 const BADIDX = 0xffffffff;
19*01826a49SYabin Cui 
simpleSequenceProducer(void * sequenceProducerState,ZSTD_Sequence * outSeqs,size_t outSeqsCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,int compressionLevel,size_t windowSize)20*01826a49SYabin Cui static size_t simpleSequenceProducer(
21*01826a49SYabin Cui   void* sequenceProducerState,
22*01826a49SYabin Cui   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
23*01826a49SYabin Cui   const void* src, size_t srcSize,
24*01826a49SYabin Cui   const void* dict, size_t dictSize,
25*01826a49SYabin Cui   int compressionLevel,
26*01826a49SYabin Cui   size_t windowSize
27*01826a49SYabin Cui ) {
28*01826a49SYabin Cui     const BYTE* const istart = (const BYTE*)src;
29*01826a49SYabin Cui     const BYTE* const iend = istart + srcSize;
30*01826a49SYabin Cui     const BYTE* ip = istart;
31*01826a49SYabin Cui     const BYTE* anchor = istart;
32*01826a49SYabin Cui     size_t seqCount = 0;
33*01826a49SYabin Cui     U32 hashTable[HSIZE];
34*01826a49SYabin Cui 
35*01826a49SYabin Cui     (void)sequenceProducerState;
36*01826a49SYabin Cui     (void)dict;
37*01826a49SYabin Cui     (void)dictSize;
38*01826a49SYabin Cui     (void)outSeqsCapacity;
39*01826a49SYabin Cui     (void)compressionLevel;
40*01826a49SYabin Cui 
41*01826a49SYabin Cui     {   int i;
42*01826a49SYabin Cui         for (i=0; i < HSIZE; i++) {
43*01826a49SYabin Cui             hashTable[i] = BADIDX;
44*01826a49SYabin Cui     }   }
45*01826a49SYabin Cui 
46*01826a49SYabin Cui     while (ip + MLS < iend) {
47*01826a49SYabin Cui         size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
48*01826a49SYabin Cui         U32 const matchIndex = hashTable[hash];
49*01826a49SYabin Cui         hashTable[hash] = (U32)(ip - istart);
50*01826a49SYabin Cui 
51*01826a49SYabin Cui         if (matchIndex != BADIDX) {
52*01826a49SYabin Cui             const BYTE* const match = istart + matchIndex;
53*01826a49SYabin Cui             U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
54*01826a49SYabin Cui             if (matchLen >= ZSTD_MINMATCH_MIN) {
55*01826a49SYabin Cui                 U32 const litLen = (U32)(ip - anchor);
56*01826a49SYabin Cui                 U32 const offset = (U32)(ip - match);
57*01826a49SYabin Cui                 ZSTD_Sequence const seq = {
58*01826a49SYabin Cui                     offset, litLen, matchLen, 0
59*01826a49SYabin Cui                 };
60*01826a49SYabin Cui 
61*01826a49SYabin Cui                 /* Note: it's crucial to stay within the window size! */
62*01826a49SYabin Cui                 if (offset <= windowSize) {
63*01826a49SYabin Cui                     outSeqs[seqCount++] = seq;
64*01826a49SYabin Cui                     ip += matchLen;
65*01826a49SYabin Cui                     anchor = ip;
66*01826a49SYabin Cui                     continue;
67*01826a49SYabin Cui                 }
68*01826a49SYabin Cui             }
69*01826a49SYabin Cui         }
70*01826a49SYabin Cui 
71*01826a49SYabin Cui         ip++;
72*01826a49SYabin Cui     }
73*01826a49SYabin Cui 
74*01826a49SYabin Cui     {   ZSTD_Sequence const finalSeq = {
75*01826a49SYabin Cui             0, (U32)(iend - anchor), 0, 0
76*01826a49SYabin Cui         };
77*01826a49SYabin Cui         outSeqs[seqCount++] = finalSeq;
78*01826a49SYabin Cui     }
79*01826a49SYabin Cui 
80*01826a49SYabin Cui     return seqCount;
81*01826a49SYabin Cui }
82*01826a49SYabin Cui 
zstreamSequenceProducer(void * sequenceProducerState,ZSTD_Sequence * outSeqs,size_t outSeqsCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,int compressionLevel,size_t windowSize)83*01826a49SYabin Cui size_t zstreamSequenceProducer(
84*01826a49SYabin Cui   void* sequenceProducerState,
85*01826a49SYabin Cui   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
86*01826a49SYabin Cui   const void* src, size_t srcSize,
87*01826a49SYabin Cui   const void* dict, size_t dictSize,
88*01826a49SYabin Cui   int compressionLevel,
89*01826a49SYabin Cui   size_t windowSize
90*01826a49SYabin Cui ) {
91*01826a49SYabin Cui     EMF_testCase const testCase = *((EMF_testCase*)sequenceProducerState);
92*01826a49SYabin Cui     memset(outSeqs, 0, outSeqsCapacity);
93*01826a49SYabin Cui 
94*01826a49SYabin Cui     switch (testCase) {
95*01826a49SYabin Cui         case EMF_ZERO_SEQS:
96*01826a49SYabin Cui             return 0;
97*01826a49SYabin Cui         case EMF_ONE_BIG_SEQ:
98*01826a49SYabin Cui             outSeqs[0].offset = 0;
99*01826a49SYabin Cui             outSeqs[0].matchLength = 0;
100*01826a49SYabin Cui             outSeqs[0].litLength = (U32)(srcSize);
101*01826a49SYabin Cui             return 1;
102*01826a49SYabin Cui          case EMF_LOTS_OF_SEQS:
103*01826a49SYabin Cui             return simpleSequenceProducer(
104*01826a49SYabin Cui                 sequenceProducerState,
105*01826a49SYabin Cui                 outSeqs, outSeqsCapacity,
106*01826a49SYabin Cui                 src, srcSize,
107*01826a49SYabin Cui                 dict, dictSize,
108*01826a49SYabin Cui                 compressionLevel,
109*01826a49SYabin Cui                 windowSize
110*01826a49SYabin Cui             );
111*01826a49SYabin Cui         case EMF_INVALID_OFFSET:
112*01826a49SYabin Cui             outSeqs[0].offset = 1 << 20;
113*01826a49SYabin Cui             outSeqs[0].matchLength = 4;
114*01826a49SYabin Cui             outSeqs[0].litLength = (U32)(srcSize - 4);
115*01826a49SYabin Cui             return 1;
116*01826a49SYabin Cui         case EMF_INVALID_MATCHLEN:
117*01826a49SYabin Cui             outSeqs[0].offset = 1;
118*01826a49SYabin Cui             outSeqs[0].matchLength = (U32)(srcSize);
119*01826a49SYabin Cui             outSeqs[0].litLength = 1;
120*01826a49SYabin Cui             return 1;
121*01826a49SYabin Cui         case EMF_INVALID_LITLEN:
122*01826a49SYabin Cui             outSeqs[0].offset = 0;
123*01826a49SYabin Cui             outSeqs[0].matchLength = 0;
124*01826a49SYabin Cui             outSeqs[0].litLength = (U32)(srcSize + 1);
125*01826a49SYabin Cui             return 1;
126*01826a49SYabin Cui         case EMF_INVALID_LAST_LITS:
127*01826a49SYabin Cui             outSeqs[0].offset = 1;
128*01826a49SYabin Cui             outSeqs[0].matchLength = 1;
129*01826a49SYabin Cui             outSeqs[0].litLength = 1;
130*01826a49SYabin Cui             outSeqs[1].offset = 0;
131*01826a49SYabin Cui             outSeqs[1].matchLength = 0;
132*01826a49SYabin Cui             outSeqs[1].litLength = (U32)(srcSize - 1);
133*01826a49SYabin Cui             return 2;
134*01826a49SYabin Cui         case EMF_SMALL_ERROR:
135*01826a49SYabin Cui             return outSeqsCapacity + 1;
136*01826a49SYabin Cui         case EMF_BIG_ERROR:
137*01826a49SYabin Cui         default:
138*01826a49SYabin Cui             return ZSTD_SEQUENCE_PRODUCER_ERROR;
139*01826a49SYabin Cui     }
140*01826a49SYabin Cui }
141