1*01826a49SYabin Cui /*
2*01826a49SYabin Cui * Copyright (c) Yann Collet, Meta Platforms, Inc.
3*01826a49SYabin Cui * All rights reserved.
4*01826a49SYabin Cui *
5*01826a49SYabin Cui * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui */
10*01826a49SYabin Cui
11*01826a49SYabin Cui #include "external_matchfinder.h"
12*01826a49SYabin Cui #include <string.h>
13*01826a49SYabin Cui #include "zstd_compress_internal.h"
14*01826a49SYabin Cui
15*01826a49SYabin Cui #define HSIZE 1024
16*01826a49SYabin Cui static U32 const HLOG = 10;
17*01826a49SYabin Cui static U32 const MLS = 4;
18*01826a49SYabin Cui static U32 const BADIDX = 0xffffffff;
19*01826a49SYabin Cui
simpleSequenceProducer(void * sequenceProducerState,ZSTD_Sequence * outSeqs,size_t outSeqsCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,int compressionLevel,size_t windowSize)20*01826a49SYabin Cui static size_t simpleSequenceProducer(
21*01826a49SYabin Cui void* sequenceProducerState,
22*01826a49SYabin Cui ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
23*01826a49SYabin Cui const void* src, size_t srcSize,
24*01826a49SYabin Cui const void* dict, size_t dictSize,
25*01826a49SYabin Cui int compressionLevel,
26*01826a49SYabin Cui size_t windowSize
27*01826a49SYabin Cui ) {
28*01826a49SYabin Cui const BYTE* const istart = (const BYTE*)src;
29*01826a49SYabin Cui const BYTE* const iend = istart + srcSize;
30*01826a49SYabin Cui const BYTE* ip = istart;
31*01826a49SYabin Cui const BYTE* anchor = istart;
32*01826a49SYabin Cui size_t seqCount = 0;
33*01826a49SYabin Cui U32 hashTable[HSIZE];
34*01826a49SYabin Cui
35*01826a49SYabin Cui (void)sequenceProducerState;
36*01826a49SYabin Cui (void)dict;
37*01826a49SYabin Cui (void)dictSize;
38*01826a49SYabin Cui (void)outSeqsCapacity;
39*01826a49SYabin Cui (void)compressionLevel;
40*01826a49SYabin Cui
41*01826a49SYabin Cui { int i;
42*01826a49SYabin Cui for (i=0; i < HSIZE; i++) {
43*01826a49SYabin Cui hashTable[i] = BADIDX;
44*01826a49SYabin Cui } }
45*01826a49SYabin Cui
46*01826a49SYabin Cui while (ip + MLS < iend) {
47*01826a49SYabin Cui size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
48*01826a49SYabin Cui U32 const matchIndex = hashTable[hash];
49*01826a49SYabin Cui hashTable[hash] = (U32)(ip - istart);
50*01826a49SYabin Cui
51*01826a49SYabin Cui if (matchIndex != BADIDX) {
52*01826a49SYabin Cui const BYTE* const match = istart + matchIndex;
53*01826a49SYabin Cui U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
54*01826a49SYabin Cui if (matchLen >= ZSTD_MINMATCH_MIN) {
55*01826a49SYabin Cui U32 const litLen = (U32)(ip - anchor);
56*01826a49SYabin Cui U32 const offset = (U32)(ip - match);
57*01826a49SYabin Cui ZSTD_Sequence const seq = {
58*01826a49SYabin Cui offset, litLen, matchLen, 0
59*01826a49SYabin Cui };
60*01826a49SYabin Cui
61*01826a49SYabin Cui /* Note: it's crucial to stay within the window size! */
62*01826a49SYabin Cui if (offset <= windowSize) {
63*01826a49SYabin Cui outSeqs[seqCount++] = seq;
64*01826a49SYabin Cui ip += matchLen;
65*01826a49SYabin Cui anchor = ip;
66*01826a49SYabin Cui continue;
67*01826a49SYabin Cui }
68*01826a49SYabin Cui }
69*01826a49SYabin Cui }
70*01826a49SYabin Cui
71*01826a49SYabin Cui ip++;
72*01826a49SYabin Cui }
73*01826a49SYabin Cui
74*01826a49SYabin Cui { ZSTD_Sequence const finalSeq = {
75*01826a49SYabin Cui 0, (U32)(iend - anchor), 0, 0
76*01826a49SYabin Cui };
77*01826a49SYabin Cui outSeqs[seqCount++] = finalSeq;
78*01826a49SYabin Cui }
79*01826a49SYabin Cui
80*01826a49SYabin Cui return seqCount;
81*01826a49SYabin Cui }
82*01826a49SYabin Cui
zstreamSequenceProducer(void * sequenceProducerState,ZSTD_Sequence * outSeqs,size_t outSeqsCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,int compressionLevel,size_t windowSize)83*01826a49SYabin Cui size_t zstreamSequenceProducer(
84*01826a49SYabin Cui void* sequenceProducerState,
85*01826a49SYabin Cui ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
86*01826a49SYabin Cui const void* src, size_t srcSize,
87*01826a49SYabin Cui const void* dict, size_t dictSize,
88*01826a49SYabin Cui int compressionLevel,
89*01826a49SYabin Cui size_t windowSize
90*01826a49SYabin Cui ) {
91*01826a49SYabin Cui EMF_testCase const testCase = *((EMF_testCase*)sequenceProducerState);
92*01826a49SYabin Cui memset(outSeqs, 0, outSeqsCapacity);
93*01826a49SYabin Cui
94*01826a49SYabin Cui switch (testCase) {
95*01826a49SYabin Cui case EMF_ZERO_SEQS:
96*01826a49SYabin Cui return 0;
97*01826a49SYabin Cui case EMF_ONE_BIG_SEQ:
98*01826a49SYabin Cui outSeqs[0].offset = 0;
99*01826a49SYabin Cui outSeqs[0].matchLength = 0;
100*01826a49SYabin Cui outSeqs[0].litLength = (U32)(srcSize);
101*01826a49SYabin Cui return 1;
102*01826a49SYabin Cui case EMF_LOTS_OF_SEQS:
103*01826a49SYabin Cui return simpleSequenceProducer(
104*01826a49SYabin Cui sequenceProducerState,
105*01826a49SYabin Cui outSeqs, outSeqsCapacity,
106*01826a49SYabin Cui src, srcSize,
107*01826a49SYabin Cui dict, dictSize,
108*01826a49SYabin Cui compressionLevel,
109*01826a49SYabin Cui windowSize
110*01826a49SYabin Cui );
111*01826a49SYabin Cui case EMF_INVALID_OFFSET:
112*01826a49SYabin Cui outSeqs[0].offset = 1 << 20;
113*01826a49SYabin Cui outSeqs[0].matchLength = 4;
114*01826a49SYabin Cui outSeqs[0].litLength = (U32)(srcSize - 4);
115*01826a49SYabin Cui return 1;
116*01826a49SYabin Cui case EMF_INVALID_MATCHLEN:
117*01826a49SYabin Cui outSeqs[0].offset = 1;
118*01826a49SYabin Cui outSeqs[0].matchLength = (U32)(srcSize);
119*01826a49SYabin Cui outSeqs[0].litLength = 1;
120*01826a49SYabin Cui return 1;
121*01826a49SYabin Cui case EMF_INVALID_LITLEN:
122*01826a49SYabin Cui outSeqs[0].offset = 0;
123*01826a49SYabin Cui outSeqs[0].matchLength = 0;
124*01826a49SYabin Cui outSeqs[0].litLength = (U32)(srcSize + 1);
125*01826a49SYabin Cui return 1;
126*01826a49SYabin Cui case EMF_INVALID_LAST_LITS:
127*01826a49SYabin Cui outSeqs[0].offset = 1;
128*01826a49SYabin Cui outSeqs[0].matchLength = 1;
129*01826a49SYabin Cui outSeqs[0].litLength = 1;
130*01826a49SYabin Cui outSeqs[1].offset = 0;
131*01826a49SYabin Cui outSeqs[1].matchLength = 0;
132*01826a49SYabin Cui outSeqs[1].litLength = (U32)(srcSize - 1);
133*01826a49SYabin Cui return 2;
134*01826a49SYabin Cui case EMF_SMALL_ERROR:
135*01826a49SYabin Cui return outSeqsCapacity + 1;
136*01826a49SYabin Cui case EMF_BIG_ERROR:
137*01826a49SYabin Cui default:
138*01826a49SYabin Cui return ZSTD_SEQUENCE_PRODUCER_ERROR;
139*01826a49SYabin Cui }
140*01826a49SYabin Cui }
141