1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 5*0e209d39SAndroid Build Coastguard Worker * 6*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2002-2010, International Business Machines 7*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 8*0e209d39SAndroid Build Coastguard Worker * 9*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 10*0e209d39SAndroid Build Coastguard Worker * file name: propsvec.h 11*0e209d39SAndroid Build Coastguard Worker * encoding: UTF-8 12*0e209d39SAndroid Build Coastguard Worker * tab size: 8 (not used) 13*0e209d39SAndroid Build Coastguard Worker * indentation:4 14*0e209d39SAndroid Build Coastguard Worker * 15*0e209d39SAndroid Build Coastguard Worker * created on: 2002feb22 16*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer 17*0e209d39SAndroid Build Coastguard Worker * 18*0e209d39SAndroid Build Coastguard Worker * Store bits (Unicode character properties) in bit set vectors. 19*0e209d39SAndroid Build Coastguard Worker */ 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker #ifndef __UPROPSVEC_H__ 22*0e209d39SAndroid Build Coastguard Worker #define __UPROPSVEC_H__ 23*0e209d39SAndroid Build Coastguard Worker 24*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 25*0e209d39SAndroid Build Coastguard Worker #include "utrie.h" 26*0e209d39SAndroid Build Coastguard Worker #include "utrie2.h" 27*0e209d39SAndroid Build Coastguard Worker 28*0e209d39SAndroid Build Coastguard Worker U_CDECL_BEGIN 29*0e209d39SAndroid Build Coastguard Worker 30*0e209d39SAndroid Build Coastguard Worker /** 31*0e209d39SAndroid Build Coastguard Worker * Unicode Properties Vectors associated with code point ranges. 32*0e209d39SAndroid Build Coastguard Worker * 33*0e209d39SAndroid Build Coastguard Worker * Rows of uint32_t integers in a contiguous array store 34*0e209d39SAndroid Build Coastguard Worker * the range limits and the properties vectors. 35*0e209d39SAndroid Build Coastguard Worker * 36*0e209d39SAndroid Build Coastguard Worker * Logically, each row has a certain number of uint32_t values, 37*0e209d39SAndroid Build Coastguard Worker * which is set via the upvec_open() "columns" parameter. 38*0e209d39SAndroid Build Coastguard Worker * 39*0e209d39SAndroid Build Coastguard Worker * Internally, two additional columns are stored. 40*0e209d39SAndroid Build Coastguard Worker * In each internal row, 41*0e209d39SAndroid Build Coastguard Worker * row[0] contains the start code point and 42*0e209d39SAndroid Build Coastguard Worker * row[1] contains the limit code point, 43*0e209d39SAndroid Build Coastguard Worker * which is the start of the next range. 44*0e209d39SAndroid Build Coastguard Worker * 45*0e209d39SAndroid Build Coastguard Worker * Initially, there is only one "normal" row for 46*0e209d39SAndroid Build Coastguard Worker * range [0..0x110000[ with values 0. 47*0e209d39SAndroid Build Coastguard Worker * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. 48*0e209d39SAndroid Build Coastguard Worker * 49*0e209d39SAndroid Build Coastguard Worker * It would be possible to store only one range boundary per row, 50*0e209d39SAndroid Build Coastguard Worker * but self-contained rows allow to later sort them by contents. 51*0e209d39SAndroid Build Coastguard Worker */ 52*0e209d39SAndroid Build Coastguard Worker struct UPropsVectors; 53*0e209d39SAndroid Build Coastguard Worker typedef struct UPropsVectors UPropsVectors; 54*0e209d39SAndroid Build Coastguard Worker 55*0e209d39SAndroid Build Coastguard Worker /* 56*0e209d39SAndroid Build Coastguard Worker * Special pseudo code points for storing the initialValue and the errorValue, 57*0e209d39SAndroid Build Coastguard Worker * which are used to initialize a UTrie2 or similar. 58*0e209d39SAndroid Build Coastguard Worker */ 59*0e209d39SAndroid Build Coastguard Worker #define UPVEC_FIRST_SPECIAL_CP 0x110000 60*0e209d39SAndroid Build Coastguard Worker #define UPVEC_INITIAL_VALUE_CP 0x110000 61*0e209d39SAndroid Build Coastguard Worker #define UPVEC_ERROR_VALUE_CP 0x110001 62*0e209d39SAndroid Build Coastguard Worker #define UPVEC_MAX_CP 0x110001 63*0e209d39SAndroid Build Coastguard Worker 64*0e209d39SAndroid Build Coastguard Worker /* 65*0e209d39SAndroid Build Coastguard Worker * Special pseudo code point used in upvec_compact() signalling the end of 66*0e209d39SAndroid Build Coastguard Worker * delivering special values and the beginning of delivering real ones. 67*0e209d39SAndroid Build Coastguard Worker * Stable value, unlike UPVEC_MAX_CP which might grow over time. 68*0e209d39SAndroid Build Coastguard Worker */ 69*0e209d39SAndroid Build Coastguard Worker #define UPVEC_START_REAL_VALUES_CP 0x200000 70*0e209d39SAndroid Build Coastguard Worker 71*0e209d39SAndroid Build Coastguard Worker /* 72*0e209d39SAndroid Build Coastguard Worker * Open a UPropsVectors object. 73*0e209d39SAndroid Build Coastguard Worker * @param columns Number of value integers (uint32_t) per row. 74*0e209d39SAndroid Build Coastguard Worker */ 75*0e209d39SAndroid Build Coastguard Worker U_CAPI UPropsVectors * U_EXPORT2 76*0e209d39SAndroid Build Coastguard Worker upvec_open(int32_t columns, UErrorCode *pErrorCode); 77*0e209d39SAndroid Build Coastguard Worker 78*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 79*0e209d39SAndroid Build Coastguard Worker upvec_close(UPropsVectors *pv); 80*0e209d39SAndroid Build Coastguard Worker 81*0e209d39SAndroid Build Coastguard Worker /* 82*0e209d39SAndroid Build Coastguard Worker * In rows for code points [start..end], select the column, 83*0e209d39SAndroid Build Coastguard Worker * reset the mask bits and set the value bits (ANDed with the mask). 84*0e209d39SAndroid Build Coastguard Worker * 85*0e209d39SAndroid Build Coastguard Worker * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). 86*0e209d39SAndroid Build Coastguard Worker */ 87*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 88*0e209d39SAndroid Build Coastguard Worker upvec_setValue(UPropsVectors *pv, 89*0e209d39SAndroid Build Coastguard Worker UChar32 start, UChar32 end, 90*0e209d39SAndroid Build Coastguard Worker int32_t column, 91*0e209d39SAndroid Build Coastguard Worker uint32_t value, uint32_t mask, 92*0e209d39SAndroid Build Coastguard Worker UErrorCode *pErrorCode); 93*0e209d39SAndroid Build Coastguard Worker 94*0e209d39SAndroid Build Coastguard Worker /* 95*0e209d39SAndroid Build Coastguard Worker * Logically const but must not be used on the same pv concurrently! 96*0e209d39SAndroid Build Coastguard Worker * Always returns 0 if called after upvec_compact(). 97*0e209d39SAndroid Build Coastguard Worker */ 98*0e209d39SAndroid Build Coastguard Worker U_CAPI uint32_t U_EXPORT2 99*0e209d39SAndroid Build Coastguard Worker upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); 100*0e209d39SAndroid Build Coastguard Worker 101*0e209d39SAndroid Build Coastguard Worker /* 102*0e209d39SAndroid Build Coastguard Worker * pRangeStart and pRangeEnd can be NULL. 103*0e209d39SAndroid Build Coastguard Worker * @return NULL if rowIndex out of range and for illegal arguments, 104*0e209d39SAndroid Build Coastguard Worker * or if called after upvec_compact() 105*0e209d39SAndroid Build Coastguard Worker */ 106*0e209d39SAndroid Build Coastguard Worker U_CAPI uint32_t * U_EXPORT2 107*0e209d39SAndroid Build Coastguard Worker upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, 108*0e209d39SAndroid Build Coastguard Worker UChar32 *pRangeStart, UChar32 *pRangeEnd); 109*0e209d39SAndroid Build Coastguard Worker 110*0e209d39SAndroid Build Coastguard Worker /* 111*0e209d39SAndroid Build Coastguard Worker * Compact the vectors: 112*0e209d39SAndroid Build Coastguard Worker * - modify the memory 113*0e209d39SAndroid Build Coastguard Worker * - keep only unique vectors 114*0e209d39SAndroid Build Coastguard Worker * - store them contiguously from the beginning of the memory 115*0e209d39SAndroid Build Coastguard Worker * - for each (non-unique) row, call the handler function 116*0e209d39SAndroid Build Coastguard Worker * 117*0e209d39SAndroid Build Coastguard Worker * The handler's rowIndex is the index of the row in the compacted 118*0e209d39SAndroid Build Coastguard Worker * memory block. 119*0e209d39SAndroid Build Coastguard Worker * (Therefore, it starts at 0 increases in increments of the columns value.) 120*0e209d39SAndroid Build Coastguard Worker * 121*0e209d39SAndroid Build Coastguard Worker * In a first phase, only special values are delivered (each exactly once), 122*0e209d39SAndroid Build Coastguard Worker * with start==end both equalling a special pseudo code point. 123*0e209d39SAndroid Build Coastguard Worker * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP 124*0e209d39SAndroid Build Coastguard Worker * where rowIndex is the length of the compacted array, 125*0e209d39SAndroid Build Coastguard Worker * and the row is arbitrary (but not NULL). 126*0e209d39SAndroid Build Coastguard Worker * Then, in the second phase, the handler is called for each row of real values. 127*0e209d39SAndroid Build Coastguard Worker */ 128*0e209d39SAndroid Build Coastguard Worker typedef void U_CALLCONV 129*0e209d39SAndroid Build Coastguard Worker UPVecCompactHandler(void *context, 130*0e209d39SAndroid Build Coastguard Worker UChar32 start, UChar32 end, 131*0e209d39SAndroid Build Coastguard Worker int32_t rowIndex, uint32_t *row, int32_t columns, 132*0e209d39SAndroid Build Coastguard Worker UErrorCode *pErrorCode); 133*0e209d39SAndroid Build Coastguard Worker 134*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 135*0e209d39SAndroid Build Coastguard Worker upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); 136*0e209d39SAndroid Build Coastguard Worker 137*0e209d39SAndroid Build Coastguard Worker /* 138*0e209d39SAndroid Build Coastguard Worker * Get the vectors array after calling upvec_compact(). 139*0e209d39SAndroid Build Coastguard Worker * The caller must not modify nor release the returned array. 140*0e209d39SAndroid Build Coastguard Worker * Returns NULL if called before upvec_compact(). 141*0e209d39SAndroid Build Coastguard Worker */ 142*0e209d39SAndroid Build Coastguard Worker U_CAPI const uint32_t * U_EXPORT2 143*0e209d39SAndroid Build Coastguard Worker upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); 144*0e209d39SAndroid Build Coastguard Worker 145*0e209d39SAndroid Build Coastguard Worker /* 146*0e209d39SAndroid Build Coastguard Worker * Get a clone of the vectors array after calling upvec_compact(). 147*0e209d39SAndroid Build Coastguard Worker * The caller owns the returned array and must uprv_free() it. 148*0e209d39SAndroid Build Coastguard Worker * Returns NULL if called before upvec_compact(). 149*0e209d39SAndroid Build Coastguard Worker */ 150*0e209d39SAndroid Build Coastguard Worker U_CAPI uint32_t * U_EXPORT2 151*0e209d39SAndroid Build Coastguard Worker upvec_cloneArray(const UPropsVectors *pv, 152*0e209d39SAndroid Build Coastguard Worker int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); 153*0e209d39SAndroid Build Coastguard Worker 154*0e209d39SAndroid Build Coastguard Worker /* 155*0e209d39SAndroid Build Coastguard Worker * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted 156*0e209d39SAndroid Build Coastguard Worker * vectors array, and freeze the trie. 157*0e209d39SAndroid Build Coastguard Worker */ 158*0e209d39SAndroid Build Coastguard Worker U_CAPI UTrie2 * U_EXPORT2 159*0e209d39SAndroid Build Coastguard Worker upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); 160*0e209d39SAndroid Build Coastguard Worker 161*0e209d39SAndroid Build Coastguard Worker struct UPVecToUTrie2Context { 162*0e209d39SAndroid Build Coastguard Worker UTrie2 *trie; 163*0e209d39SAndroid Build Coastguard Worker int32_t initialValue; 164*0e209d39SAndroid Build Coastguard Worker int32_t errorValue; 165*0e209d39SAndroid Build Coastguard Worker int32_t maxValue; 166*0e209d39SAndroid Build Coastguard Worker }; 167*0e209d39SAndroid Build Coastguard Worker typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; 168*0e209d39SAndroid Build Coastguard Worker 169*0e209d39SAndroid Build Coastguard Worker /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ 170*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_CALLCONV 171*0e209d39SAndroid Build Coastguard Worker upvec_compactToUTrie2Handler(void *context, 172*0e209d39SAndroid Build Coastguard Worker UChar32 start, UChar32 end, 173*0e209d39SAndroid Build Coastguard Worker int32_t rowIndex, uint32_t *row, int32_t columns, 174*0e209d39SAndroid Build Coastguard Worker UErrorCode *pErrorCode); 175*0e209d39SAndroid Build Coastguard Worker 176*0e209d39SAndroid Build Coastguard Worker U_CDECL_END 177*0e209d39SAndroid Build Coastguard Worker 178*0e209d39SAndroid Build Coastguard Worker #endif 179