1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * 6*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2001-2008, International Business Machines 7*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 8*0e209d39SAndroid Build Coastguard Worker * 9*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 10*0e209d39SAndroid Build Coastguard Worker * file name: utrie2_impl.h 11*0e209d39SAndroid Build Coastguard Worker * encoding: UTF-8 12*0e209d39SAndroid Build Coastguard Worker * tab size: 8 (not used) 13*0e209d39SAndroid Build Coastguard Worker * indentation:4 14*0e209d39SAndroid Build Coastguard Worker * 15*0e209d39SAndroid Build Coastguard Worker * created on: 2008sep26 (split off from utrie2.c) 16*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer 17*0e209d39SAndroid Build Coastguard Worker * 18*0e209d39SAndroid Build Coastguard Worker * Definitions needed for both runtime and builder code for UTrie2, 19*0e209d39SAndroid Build Coastguard Worker * used by utrie2.c and utrie2_builder.c. 20*0e209d39SAndroid Build Coastguard Worker */ 21*0e209d39SAndroid Build Coastguard Worker 22*0e209d39SAndroid Build Coastguard Worker #ifndef __UTRIE2_IMPL_H__ 23*0e209d39SAndroid Build Coastguard Worker #define __UTRIE2_IMPL_H__ 24*0e209d39SAndroid Build Coastguard Worker 25*0e209d39SAndroid Build Coastguard Worker #ifdef UCPTRIE_DEBUG 26*0e209d39SAndroid Build Coastguard Worker #include "unicode/umutablecptrie.h" 27*0e209d39SAndroid Build Coastguard Worker #endif 28*0e209d39SAndroid Build Coastguard Worker #include "utrie2.h" 29*0e209d39SAndroid Build Coastguard Worker 30*0e209d39SAndroid Build Coastguard Worker /* Public UTrie2 API implementation ----------------------------------------- */ 31*0e209d39SAndroid Build Coastguard Worker 32*0e209d39SAndroid Build Coastguard Worker /* 33*0e209d39SAndroid Build Coastguard Worker * These definitions are mostly needed by utrie2.cpp, 34*0e209d39SAndroid Build Coastguard Worker * but also by utrie2_serialize() and utrie2_swap(). 35*0e209d39SAndroid Build Coastguard Worker */ 36*0e209d39SAndroid Build Coastguard Worker 37*0e209d39SAndroid Build Coastguard Worker // UTrie2 signature values, in platform endianness and opposite endianness. 38*0e209d39SAndroid Build Coastguard Worker // The UTrie2 signature ASCII byte values spell "Tri2". 39*0e209d39SAndroid Build Coastguard Worker #define UTRIE2_SIG 0x54726932 40*0e209d39SAndroid Build Coastguard Worker #define UTRIE2_OE_SIG 0x32697254 41*0e209d39SAndroid Build Coastguard Worker 42*0e209d39SAndroid Build Coastguard Worker /** 43*0e209d39SAndroid Build Coastguard Worker * Trie data structure in serialized form: 44*0e209d39SAndroid Build Coastguard Worker * 45*0e209d39SAndroid Build Coastguard Worker * UTrie2Header header; 46*0e209d39SAndroid Build Coastguard Worker * uint16_t index[header.index2Length]; 47*0e209d39SAndroid Build Coastguard Worker * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...] 48*0e209d39SAndroid Build Coastguard Worker * @internal 49*0e209d39SAndroid Build Coastguard Worker */ 50*0e209d39SAndroid Build Coastguard Worker typedef struct UTrie2Header { 51*0e209d39SAndroid Build Coastguard Worker /** "Tri2" in big-endian US-ASCII (0x54726932) */ 52*0e209d39SAndroid Build Coastguard Worker uint32_t signature; 53*0e209d39SAndroid Build Coastguard Worker 54*0e209d39SAndroid Build Coastguard Worker /** 55*0e209d39SAndroid Build Coastguard Worker * options bit field: 56*0e209d39SAndroid Build Coastguard Worker * 15.. 4 reserved (0) 57*0e209d39SAndroid Build Coastguard Worker * 3.. 0 UTrie2ValueBits valueBits 58*0e209d39SAndroid Build Coastguard Worker */ 59*0e209d39SAndroid Build Coastguard Worker uint16_t options; 60*0e209d39SAndroid Build Coastguard Worker 61*0e209d39SAndroid Build Coastguard Worker /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ 62*0e209d39SAndroid Build Coastguard Worker uint16_t indexLength; 63*0e209d39SAndroid Build Coastguard Worker 64*0e209d39SAndroid Build Coastguard Worker /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ 65*0e209d39SAndroid Build Coastguard Worker uint16_t shiftedDataLength; 66*0e209d39SAndroid Build Coastguard Worker 67*0e209d39SAndroid Build Coastguard Worker /** Null index and data blocks, not shifted. */ 68*0e209d39SAndroid Build Coastguard Worker uint16_t index2NullOffset, dataNullOffset; 69*0e209d39SAndroid Build Coastguard Worker 70*0e209d39SAndroid Build Coastguard Worker /** 71*0e209d39SAndroid Build Coastguard Worker * First code point of the single-value range ending with U+10ffff, 72*0e209d39SAndroid Build Coastguard Worker * rounded up and then shifted right by UTRIE2_SHIFT_1. 73*0e209d39SAndroid Build Coastguard Worker */ 74*0e209d39SAndroid Build Coastguard Worker uint16_t shiftedHighStart; 75*0e209d39SAndroid Build Coastguard Worker } UTrie2Header; 76*0e209d39SAndroid Build Coastguard Worker 77*0e209d39SAndroid Build Coastguard Worker /** 78*0e209d39SAndroid Build Coastguard Worker * Constants for use with UTrie2Header.options. 79*0e209d39SAndroid Build Coastguard Worker * @internal 80*0e209d39SAndroid Build Coastguard Worker */ 81*0e209d39SAndroid Build Coastguard Worker enum { 82*0e209d39SAndroid Build Coastguard Worker /** Mask to get the UTrie2ValueBits valueBits from options. */ 83*0e209d39SAndroid Build Coastguard Worker UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf 84*0e209d39SAndroid Build Coastguard Worker }; 85*0e209d39SAndroid Build Coastguard Worker 86*0e209d39SAndroid Build Coastguard Worker /* Building a trie ---------------------------------------------------------- */ 87*0e209d39SAndroid Build Coastguard Worker 88*0e209d39SAndroid Build Coastguard Worker /* 89*0e209d39SAndroid Build Coastguard Worker * These definitions are mostly needed by utrie2_builder.c, but also by 90*0e209d39SAndroid Build Coastguard Worker * utrie2_get32() and utrie2_enum(). 91*0e209d39SAndroid Build Coastguard Worker */ 92*0e209d39SAndroid Build Coastguard Worker 93*0e209d39SAndroid Build Coastguard Worker enum { 94*0e209d39SAndroid Build Coastguard Worker /** 95*0e209d39SAndroid Build Coastguard Worker * At build time, leave a gap in the index-2 table, 96*0e209d39SAndroid Build Coastguard Worker * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table 97*0e209d39SAndroid Build Coastguard Worker * and the supplementary index-1 table. 98*0e209d39SAndroid Build Coastguard Worker * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting. 99*0e209d39SAndroid Build Coastguard Worker */ 100*0e209d39SAndroid Build Coastguard Worker UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, 101*0e209d39SAndroid Build Coastguard Worker UNEWTRIE2_INDEX_GAP_LENGTH= 102*0e209d39SAndroid Build Coastguard Worker ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)& 103*0e209d39SAndroid Build Coastguard Worker ~UTRIE2_INDEX_2_MASK, 104*0e209d39SAndroid Build Coastguard Worker 105*0e209d39SAndroid Build Coastguard Worker /** 106*0e209d39SAndroid Build Coastguard Worker * Maximum length of the build-time index-2 array. 107*0e209d39SAndroid Build Coastguard Worker * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2, 108*0e209d39SAndroid Build Coastguard Worker * plus the part of the index-2 table for lead surrogate code points, 109*0e209d39SAndroid Build Coastguard Worker * plus the build-time index gap, 110*0e209d39SAndroid Build Coastguard Worker * plus the null index-2 block. 111*0e209d39SAndroid Build Coastguard Worker */ 112*0e209d39SAndroid Build Coastguard Worker UNEWTRIE2_MAX_INDEX_2_LENGTH= 113*0e209d39SAndroid Build Coastguard Worker (0x110000>>UTRIE2_SHIFT_2)+ 114*0e209d39SAndroid Build Coastguard Worker UTRIE2_LSCP_INDEX_2_LENGTH+ 115*0e209d39SAndroid Build Coastguard Worker UNEWTRIE2_INDEX_GAP_LENGTH+ 116*0e209d39SAndroid Build Coastguard Worker UTRIE2_INDEX_2_BLOCK_LENGTH, 117*0e209d39SAndroid Build Coastguard Worker 118*0e209d39SAndroid Build Coastguard Worker UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1 119*0e209d39SAndroid Build Coastguard Worker }; 120*0e209d39SAndroid Build Coastguard Worker 121*0e209d39SAndroid Build Coastguard Worker /** 122*0e209d39SAndroid Build Coastguard Worker * Maximum length of the build-time data array. 123*0e209d39SAndroid Build Coastguard Worker * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block, 124*0e209d39SAndroid Build Coastguard Worker * plus values for the 0x400 surrogate code units. 125*0e209d39SAndroid Build Coastguard Worker */ 126*0e209d39SAndroid Build Coastguard Worker #define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400) 127*0e209d39SAndroid Build Coastguard Worker 128*0e209d39SAndroid Build Coastguard Worker /* 129*0e209d39SAndroid Build Coastguard Worker * Build-time trie structure. 130*0e209d39SAndroid Build Coastguard Worker * 131*0e209d39SAndroid Build Coastguard Worker * Just using a boolean flag for "repeat use" could lead to data array overflow 132*0e209d39SAndroid Build Coastguard Worker * because we would not be able to detect when a data block becomes unused. 133*0e209d39SAndroid Build Coastguard Worker * It also leads to orphan data blocks that are kept through serialization. 134*0e209d39SAndroid Build Coastguard Worker * 135*0e209d39SAndroid Build Coastguard Worker * Need to use reference counting for data blocks, 136*0e209d39SAndroid Build Coastguard Worker * and allocDataBlock() needs to look for a free block before increasing dataLength. 137*0e209d39SAndroid Build Coastguard Worker * 138*0e209d39SAndroid Build Coastguard Worker * This scheme seems like overkill for index-2 blocks since the whole index array is 139*0e209d39SAndroid Build Coastguard Worker * preallocated anyway (unlike the growable data array). 140*0e209d39SAndroid Build Coastguard Worker * Just allocating multiple index-2 blocks as needed. 141*0e209d39SAndroid Build Coastguard Worker */ 142*0e209d39SAndroid Build Coastguard Worker struct UNewTrie2 { 143*0e209d39SAndroid Build Coastguard Worker int32_t index1[UNEWTRIE2_INDEX_1_LENGTH]; 144*0e209d39SAndroid Build Coastguard Worker int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH]; 145*0e209d39SAndroid Build Coastguard Worker uint32_t *data; 146*0e209d39SAndroid Build Coastguard Worker #ifdef UCPTRIE_DEBUG 147*0e209d39SAndroid Build Coastguard Worker UMutableCPTrie *t3; 148*0e209d39SAndroid Build Coastguard Worker #endif 149*0e209d39SAndroid Build Coastguard Worker 150*0e209d39SAndroid Build Coastguard Worker uint32_t initialValue, errorValue; 151*0e209d39SAndroid Build Coastguard Worker int32_t index2Length, dataCapacity, dataLength; 152*0e209d39SAndroid Build Coastguard Worker int32_t firstFreeBlock; 153*0e209d39SAndroid Build Coastguard Worker int32_t index2NullOffset, dataNullOffset; 154*0e209d39SAndroid Build Coastguard Worker UChar32 highStart; 155*0e209d39SAndroid Build Coastguard Worker UBool isCompacted; 156*0e209d39SAndroid Build Coastguard Worker 157*0e209d39SAndroid Build Coastguard Worker /** 158*0e209d39SAndroid Build Coastguard Worker * Multi-purpose per-data-block table. 159*0e209d39SAndroid Build Coastguard Worker * 160*0e209d39SAndroid Build Coastguard Worker * Before compacting: 161*0e209d39SAndroid Build Coastguard Worker * 162*0e209d39SAndroid Build Coastguard Worker * Per-data-block reference counters/free-block list. 163*0e209d39SAndroid Build Coastguard Worker * 0: unused 164*0e209d39SAndroid Build Coastguard Worker * >0: reference counter (number of index-2 entries pointing here) 165*0e209d39SAndroid Build Coastguard Worker * <0: next free data block in free-block list 166*0e209d39SAndroid Build Coastguard Worker * 167*0e209d39SAndroid Build Coastguard Worker * While compacting: 168*0e209d39SAndroid Build Coastguard Worker * 169*0e209d39SAndroid Build Coastguard Worker * Map of adjusted indexes, used in compactData() and compactIndex2(). 170*0e209d39SAndroid Build Coastguard Worker * Maps from original indexes to new ones. 171*0e209d39SAndroid Build Coastguard Worker */ 172*0e209d39SAndroid Build Coastguard Worker int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2]; 173*0e209d39SAndroid Build Coastguard Worker }; 174*0e209d39SAndroid Build Coastguard Worker 175*0e209d39SAndroid Build Coastguard Worker #endif 176