1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vpx_mem/vpx_mem.h"
12 #include "vpx_ports/mem.h"
13
14 #include "vp9/common/vp9_blockd.h"
15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_entropy.h"
17 #if CONFIG_COEFFICIENT_RANGE_CHECKING
18 #include "vp9/common/vp9_idct.h"
19 #endif
20
21 #include "vp9/decoder/vp9_detokenize.h"
22
23 #define EOB_CONTEXT_NODE 0
24 #define ZERO_CONTEXT_NODE 1
25 #define ONE_CONTEXT_NODE 2
26
27 #define INCREMENT_COUNT(token) \
28 do { \
29 if (counts) ++coef_counts[band][ctx][token]; \
30 } while (0)
31
read_bool(vpx_reader * r,int prob,BD_VALUE * value,int * count,unsigned int * range)32 static INLINE int read_bool(vpx_reader *r, int prob, BD_VALUE *value,
33 int *count, unsigned int *range) {
34 const unsigned int split = (*range * prob + (256 - prob)) >> CHAR_BIT;
35 const BD_VALUE bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT);
36 #if CONFIG_BITSTREAM_DEBUG
37 const int queue_r = bitstream_queue_get_read();
38 const int frame_idx = bitstream_queue_get_frame_read();
39 int ref_result, ref_prob;
40 bitstream_queue_pop(&ref_result, &ref_prob);
41 if (prob != ref_prob) {
42 fprintf(stderr,
43 "\n *** [bit] prob error, frame_idx_r %d prob %d ref_prob %d "
44 "queue_r %d\n",
45 frame_idx, prob, ref_prob, queue_r);
46
47 assert(0);
48 }
49 #endif
50
51 if (*count < 0) {
52 r->value = *value;
53 r->count = *count;
54 vpx_reader_fill(r);
55 *value = r->value;
56 *count = r->count;
57 }
58
59 if (*value >= bigsplit) {
60 *range = *range - split;
61 *value = *value - bigsplit;
62 {
63 const int shift = vpx_norm[*range];
64 *range <<= shift;
65 *value <<= shift;
66 *count -= shift;
67 }
68 #if CONFIG_BITSTREAM_DEBUG
69 {
70 const int bit = 1;
71 if (bit != ref_result) {
72 fprintf(
73 stderr,
74 "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d "
75 "queue_r %d\n",
76 frame_idx, bit, ref_result, queue_r);
77
78 assert(0);
79 }
80 }
81 #endif
82 return 1;
83 }
84 *range = split;
85 {
86 const int shift = vpx_norm[*range];
87 *range <<= shift;
88 *value <<= shift;
89 *count -= shift;
90 }
91 #if CONFIG_BITSTREAM_DEBUG
92 {
93 const int bit = 0;
94 if (bit != ref_result) {
95 fprintf(stderr,
96 "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d "
97 "queue_r %d\n",
98 frame_idx, bit, ref_result, queue_r);
99
100 assert(0);
101 }
102 }
103 #endif
104 return 0;
105 }
106
read_coeff(vpx_reader * r,const vpx_prob * probs,int n,BD_VALUE * value,int * count,unsigned int * range)107 static INLINE int read_coeff(vpx_reader *r, const vpx_prob *probs, int n,
108 BD_VALUE *value, int *count, unsigned int *range) {
109 int i, val = 0;
110 for (i = 0; i < n; ++i)
111 val = (val << 1) | read_bool(r, probs[i], value, count, range);
112 return val;
113 }
114
decode_coefs(const MACROBLOCKD * xd,PLANE_TYPE type,tran_low_t * dqcoeff,TX_SIZE tx_size,const int16_t * dq,int ctx,const int16_t * scan,const int16_t * nb,vpx_reader * r)115 static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
116 tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
117 int ctx, const int16_t *scan, const int16_t *nb,
118 vpx_reader *r) {
119 FRAME_COUNTS *counts = xd->counts;
120 const int max_eob = 16 << (tx_size << 1);
121 const FRAME_CONTEXT *const fc = xd->fc;
122 const int ref = is_inter_block(xd->mi[0]);
123 int band, c = 0;
124 const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
125 fc->coef_probs[tx_size][type][ref];
126 const vpx_prob *prob;
127 unsigned int(*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
128 unsigned int(*eob_branch_count)[COEFF_CONTEXTS];
129 uint8_t token_cache[32 * 32];
130 const uint8_t *band_translate = get_band_translate(tx_size);
131 const int dq_shift = (tx_size == TX_32X32);
132 int v;
133 int16_t dqv = dq[0];
134 const uint8_t *const cat6_prob =
135 #if CONFIG_VP9_HIGHBITDEPTH
136 (xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12
137 : (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2
138 :
139 #endif // CONFIG_VP9_HIGHBITDEPTH
140 vp9_cat6_prob;
141 const int cat6_bits =
142 #if CONFIG_VP9_HIGHBITDEPTH
143 (xd->bd == VPX_BITS_12) ? 18
144 : (xd->bd == VPX_BITS_10) ? 16
145 :
146 #endif // CONFIG_VP9_HIGHBITDEPTH
147 14;
148 // Keep value, range, and count as locals. The compiler produces better
149 // results with the locals than using r directly.
150 BD_VALUE value = r->value;
151 unsigned int range = r->range;
152 int count = r->count;
153
154 if (counts) {
155 coef_counts = counts->coef[tx_size][type][ref];
156 eob_branch_count = counts->eob_branch[tx_size][type][ref];
157 }
158
159 while (c < max_eob) {
160 int val = -1;
161 band = *band_translate++;
162 prob = coef_probs[band][ctx];
163 if (counts) ++eob_branch_count[band][ctx];
164 if (!read_bool(r, prob[EOB_CONTEXT_NODE], &value, &count, &range)) {
165 INCREMENT_COUNT(EOB_MODEL_TOKEN);
166 break;
167 }
168
169 while (!read_bool(r, prob[ZERO_CONTEXT_NODE], &value, &count, &range)) {
170 INCREMENT_COUNT(ZERO_TOKEN);
171 dqv = dq[1];
172 token_cache[scan[c]] = 0;
173 ++c;
174 if (c >= max_eob) {
175 r->value = value;
176 r->range = range;
177 r->count = count;
178 return c; // zero tokens at the end (no eob token)
179 }
180 ctx = get_coef_context(nb, token_cache, c);
181 band = *band_translate++;
182 prob = coef_probs[band][ctx];
183 }
184
185 if (read_bool(r, prob[ONE_CONTEXT_NODE], &value, &count, &range)) {
186 const vpx_prob *p = vp9_pareto8_full[prob[PIVOT_NODE] - 1];
187 INCREMENT_COUNT(TWO_TOKEN);
188 if (read_bool(r, p[0], &value, &count, &range)) {
189 if (read_bool(r, p[3], &value, &count, &range)) {
190 token_cache[scan[c]] = 5;
191 if (read_bool(r, p[5], &value, &count, &range)) {
192 if (read_bool(r, p[7], &value, &count, &range)) {
193 val = CAT6_MIN_VAL +
194 read_coeff(r, cat6_prob, cat6_bits, &value, &count, &range);
195 } else {
196 val = CAT5_MIN_VAL +
197 read_coeff(r, vp9_cat5_prob, 5, &value, &count, &range);
198 }
199 } else if (read_bool(r, p[6], &value, &count, &range)) {
200 val = CAT4_MIN_VAL +
201 read_coeff(r, vp9_cat4_prob, 4, &value, &count, &range);
202 } else {
203 val = CAT3_MIN_VAL +
204 read_coeff(r, vp9_cat3_prob, 3, &value, &count, &range);
205 }
206 } else {
207 token_cache[scan[c]] = 4;
208 if (read_bool(r, p[4], &value, &count, &range)) {
209 val = CAT2_MIN_VAL +
210 read_coeff(r, vp9_cat2_prob, 2, &value, &count, &range);
211 } else {
212 val = CAT1_MIN_VAL +
213 read_coeff(r, vp9_cat1_prob, 1, &value, &count, &range);
214 }
215 }
216 #if CONFIG_VP9_HIGHBITDEPTH
217 // val may use 18-bits
218 v = (int)(((int64_t)val * dqv) >> dq_shift);
219 #else
220 v = (val * dqv) >> dq_shift;
221 #endif
222 } else {
223 if (read_bool(r, p[1], &value, &count, &range)) {
224 token_cache[scan[c]] = 3;
225 v = ((3 + read_bool(r, p[2], &value, &count, &range)) * dqv) >>
226 dq_shift;
227 } else {
228 token_cache[scan[c]] = 2;
229 v = (2 * dqv) >> dq_shift;
230 }
231 }
232 } else {
233 INCREMENT_COUNT(ONE_TOKEN);
234 token_cache[scan[c]] = 1;
235 v = dqv >> dq_shift;
236 }
237 #if CONFIG_COEFFICIENT_RANGE_CHECKING
238 #if CONFIG_VP9_HIGHBITDEPTH
239 dqcoeff[scan[c]] = highbd_check_range(
240 read_bool(r, 128, &value, &count, &range) ? -v : v, xd->bd);
241 #else
242 dqcoeff[scan[c]] =
243 check_range(read_bool(r, 128, &value, &count, &range) ? -v : v);
244 #endif // CONFIG_VP9_HIGHBITDEPTH
245 #else
246 if (read_bool(r, 128, &value, &count, &range)) {
247 dqcoeff[scan[c]] = (tran_low_t)-v;
248 } else {
249 dqcoeff[scan[c]] = (tran_low_t)v;
250 }
251 #endif // CONFIG_COEFFICIENT_RANGE_CHECKING
252 ++c;
253 ctx = get_coef_context(nb, token_cache, c);
254 dqv = dq[1];
255 }
256
257 r->value = value;
258 r->range = range;
259 r->count = count;
260 return c;
261 }
262
get_ctx_shift(MACROBLOCKD * xd,int * ctx_shift_a,int * ctx_shift_l,int x,int y,unsigned int tx_size_in_blocks)263 static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l,
264 int x, int y, unsigned int tx_size_in_blocks) {
265 if (xd->max_blocks_wide) {
266 if (tx_size_in_blocks + x > xd->max_blocks_wide)
267 *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8;
268 }
269 if (xd->max_blocks_high) {
270 if (tx_size_in_blocks + y > xd->max_blocks_high)
271 *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8;
272 }
273 }
274
vp9_decode_block_tokens(TileWorkerData * twd,int plane,const ScanOrder * sc,int x,int y,TX_SIZE tx_size,int seg_id)275 int vp9_decode_block_tokens(TileWorkerData *twd, int plane, const ScanOrder *sc,
276 int x, int y, TX_SIZE tx_size, int seg_id) {
277 vpx_reader *r = &twd->bit_reader;
278 MACROBLOCKD *xd = &twd->xd;
279 struct macroblockd_plane *const pd = &xd->plane[plane];
280 const int16_t *const dequant = pd->seg_dequant[seg_id];
281 int eob;
282 ENTROPY_CONTEXT *a = pd->above_context + x;
283 ENTROPY_CONTEXT *l = pd->left_context + y;
284 int ctx;
285 int ctx_shift_a = 0;
286 int ctx_shift_l = 0;
287
288 switch (tx_size) {
289 case TX_4X4:
290 ctx = a[0] != 0;
291 ctx += l[0] != 0;
292 eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
293 dequant, ctx, sc->scan, sc->neighbors, r);
294 a[0] = l[0] = (eob > 0);
295 break;
296 case TX_8X8:
297 get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8);
298 ctx = !!*(const uint16_t *)a;
299 ctx += !!*(const uint16_t *)l;
300 eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
301 dequant, ctx, sc->scan, sc->neighbors, r);
302 *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a;
303 *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l;
304 break;
305 case TX_16X16:
306 get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16);
307 ctx = !!*(const uint32_t *)a;
308 ctx += !!*(const uint32_t *)l;
309 eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
310 dequant, ctx, sc->scan, sc->neighbors, r);
311 *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a;
312 *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l;
313 break;
314 case TX_32X32:
315 get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32);
316 // NOTE: casting to uint64_t here is safe because the default memory
317 // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte
318 // boundaries.
319 ctx = !!*(const uint64_t *)a;
320 ctx += !!*(const uint64_t *)l;
321 eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
322 dequant, ctx, sc->scan, sc->neighbors, r);
323 *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a;
324 *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l;
325 break;
326 default:
327 assert(0 && "Invalid transform size.");
328 eob = 0;
329 break;
330 }
331
332 return eob;
333 }
334