1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1999-2011, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 8*0e209d39SAndroid Build Coastguard Worker * 9*0e209d39SAndroid Build Coastguard Worker * File USC_IMPL.H 10*0e209d39SAndroid Build Coastguard Worker * 11*0e209d39SAndroid Build Coastguard Worker * Modification History: 12*0e209d39SAndroid Build Coastguard Worker * 13*0e209d39SAndroid Build Coastguard Worker * Date Name Description 14*0e209d39SAndroid Build Coastguard Worker * 07/08/2002 Eric Mader Creation. 15*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 16*0e209d39SAndroid Build Coastguard Worker */ 17*0e209d39SAndroid Build Coastguard Worker 18*0e209d39SAndroid Build Coastguard Worker #ifndef USC_IMPL_H 19*0e209d39SAndroid Build Coastguard Worker #define USC_IMPL_H 20*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 21*0e209d39SAndroid Build Coastguard Worker #include "unicode/uscript.h" 22*0e209d39SAndroid Build Coastguard Worker 23*0e209d39SAndroid Build Coastguard Worker /** 24*0e209d39SAndroid Build Coastguard Worker * <code>UScriptRun</code> is used to find runs of characters in 25*0e209d39SAndroid Build Coastguard Worker * the same script. It implements a simple iterator over an array 26*0e209d39SAndroid Build Coastguard Worker * of characters. The iterator will resolve script-neutral characters 27*0e209d39SAndroid Build Coastguard Worker * like punctuation into the script of the surrounding characters. 28*0e209d39SAndroid Build Coastguard Worker * 29*0e209d39SAndroid Build Coastguard Worker * The iterator will try to match paired punctuation. If it sees an 30*0e209d39SAndroid Build Coastguard Worker * opening punctuation character, it will remember the script that 31*0e209d39SAndroid Build Coastguard Worker * was assigned to that character, and assign the same script to the 32*0e209d39SAndroid Build Coastguard Worker * matching closing punctuation. 33*0e209d39SAndroid Build Coastguard Worker * 34*0e209d39SAndroid Build Coastguard Worker * Scripts are chosen based on the <code>UScriptCode</code> enumeration. 35*0e209d39SAndroid Build Coastguard Worker * No attempt is made to combine related scripts into a single run. In 36*0e209d39SAndroid Build Coastguard Worker * particular, Hiragana, Katakana, and Han characters will appear in separate 37*0e209d39SAndroid Build Coastguard Worker * runs. 38*0e209d39SAndroid Build Coastguard Worker 39*0e209d39SAndroid Build Coastguard Worker * Here is an example of how to iterate over script runs: 40*0e209d39SAndroid Build Coastguard Worker * <pre> 41*0e209d39SAndroid Build Coastguard Worker * \code 42*0e209d39SAndroid Build Coastguard Worker * void printScriptRuns(const UChar *text, int32_t length) 43*0e209d39SAndroid Build Coastguard Worker * { 44*0e209d39SAndroid Build Coastguard Worker * UErrorCode error = U_ZERO_ERROR; 45*0e209d39SAndroid Build Coastguard Worker * UScriptRun *scriptRun = uscript_openRun(text, testLength, &error); 46*0e209d39SAndroid Build Coastguard Worker * int32_t start = 0, limit = 0; 47*0e209d39SAndroid Build Coastguard Worker * UScriptCode code = USCRIPT_INVALID_CODE; 48*0e209d39SAndroid Build Coastguard Worker * 49*0e209d39SAndroid Build Coastguard Worker * while (uscript_nextRun(&start, &limit, &code)) { 50*0e209d39SAndroid Build Coastguard Worker * printf("Script '%s' from %d to %d.\n", uscript_getName(code), start, limit); 51*0e209d39SAndroid Build Coastguard Worker * } 52*0e209d39SAndroid Build Coastguard Worker * 53*0e209d39SAndroid Build Coastguard Worker * uscript_closeRun(scriptRun); 54*0e209d39SAndroid Build Coastguard Worker * } 55*0e209d39SAndroid Build Coastguard Worker * </pre> 56*0e209d39SAndroid Build Coastguard Worker */ 57*0e209d39SAndroid Build Coastguard Worker struct UScriptRun; 58*0e209d39SAndroid Build Coastguard Worker 59*0e209d39SAndroid Build Coastguard Worker typedef struct UScriptRun UScriptRun; 60*0e209d39SAndroid Build Coastguard Worker 61*0e209d39SAndroid Build Coastguard Worker /** 62*0e209d39SAndroid Build Coastguard Worker * Create a <code>UScriptRun</code> object for iterating over the given text. This object must 63*0e209d39SAndroid Build Coastguard Worker * be freed using <code>uscript_closeRun()</code>. Note that this object does not copy the source text, 64*0e209d39SAndroid Build Coastguard Worker * only the pointer to it. You must make sure that the pointer remains valid until you call 65*0e209d39SAndroid Build Coastguard Worker * <code>uscript_closeRun()</code> or <code>uscript_setRunText()</code>. 66*0e209d39SAndroid Build Coastguard Worker * 67*0e209d39SAndroid Build Coastguard Worker * @param src is the address of the array of characters over which to iterate. 68*0e209d39SAndroid Build Coastguard Worker * if <code>src == NULL</code> and <code>length == 0</code>, 69*0e209d39SAndroid Build Coastguard Worker * an empty <code>UScriptRun</code> object will be returned. 70*0e209d39SAndroid Build Coastguard Worker * 71*0e209d39SAndroid Build Coastguard Worker * @param length is the number of characters over which to iterate. 72*0e209d39SAndroid Build Coastguard Worker * 73*0e209d39SAndroid Build Coastguard Worker * @param pErrorCode is a pointer to a valid <code>UErrorCode</code> value. If this value 74*0e209d39SAndroid Build Coastguard Worker * indicates a failure on entry, the function will immediately return. 75*0e209d39SAndroid Build Coastguard Worker * On exit the value will indicate the success of the operation. 76*0e209d39SAndroid Build Coastguard Worker * 77*0e209d39SAndroid Build Coastguard Worker * @return the address of <code>UScriptRun</code> object which will iterate over the text, 78*0e209d39SAndroid Build Coastguard Worker * or <code>NULL</code> if the operation failed. 79*0e209d39SAndroid Build Coastguard Worker */ 80*0e209d39SAndroid Build Coastguard Worker U_CAPI UScriptRun * U_EXPORT2 81*0e209d39SAndroid Build Coastguard Worker uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode); 82*0e209d39SAndroid Build Coastguard Worker 83*0e209d39SAndroid Build Coastguard Worker /** 84*0e209d39SAndroid Build Coastguard Worker * Frees the given <code>UScriptRun</code> object and any storage associated with it. 85*0e209d39SAndroid Build Coastguard Worker * On return, scriptRun no longer points to a valid <code>UScriptRun</code> object. 86*0e209d39SAndroid Build Coastguard Worker * 87*0e209d39SAndroid Build Coastguard Worker * @param scriptRun is the <code>UScriptRun</code> object which will be freed. 88*0e209d39SAndroid Build Coastguard Worker */ 89*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 90*0e209d39SAndroid Build Coastguard Worker uscript_closeRun(UScriptRun *scriptRun); 91*0e209d39SAndroid Build Coastguard Worker 92*0e209d39SAndroid Build Coastguard Worker /** 93*0e209d39SAndroid Build Coastguard Worker * Reset the <code>UScriptRun</code> object so that it will start iterating from 94*0e209d39SAndroid Build Coastguard Worker * the beginning. 95*0e209d39SAndroid Build Coastguard Worker * 96*0e209d39SAndroid Build Coastguard Worker * @param scriptRun is the address of the <code>UScriptRun</code> object to be reset. 97*0e209d39SAndroid Build Coastguard Worker */ 98*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 99*0e209d39SAndroid Build Coastguard Worker uscript_resetRun(UScriptRun *scriptRun); 100*0e209d39SAndroid Build Coastguard Worker 101*0e209d39SAndroid Build Coastguard Worker /** 102*0e209d39SAndroid Build Coastguard Worker * Change the text over which the given <code>UScriptRun</code> object iterates. 103*0e209d39SAndroid Build Coastguard Worker * 104*0e209d39SAndroid Build Coastguard Worker * @param scriptRun is the <code>UScriptRun</code> object which will be changed. 105*0e209d39SAndroid Build Coastguard Worker * 106*0e209d39SAndroid Build Coastguard Worker * @param src is the address of the new array of characters over which to iterate. 107*0e209d39SAndroid Build Coastguard Worker * If <code>src == NULL</code> and <code>length == 0</code>, 108*0e209d39SAndroid Build Coastguard Worker * the <code>UScriptRun</code> object will become empty. 109*0e209d39SAndroid Build Coastguard Worker * 110*0e209d39SAndroid Build Coastguard Worker * @param length is the new number of characters over which to iterate 111*0e209d39SAndroid Build Coastguard Worker * 112*0e209d39SAndroid Build Coastguard Worker * @param pErrorCode is a pointer to a valid <code>UErrorCode</code> value. If this value 113*0e209d39SAndroid Build Coastguard Worker * indicates a failure on entry, the function will immediately return. 114*0e209d39SAndroid Build Coastguard Worker * On exit the value will indicate the success of the operation. 115*0e209d39SAndroid Build Coastguard Worker */ 116*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 117*0e209d39SAndroid Build Coastguard Worker uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode); 118*0e209d39SAndroid Build Coastguard Worker 119*0e209d39SAndroid Build Coastguard Worker /** 120*0e209d39SAndroid Build Coastguard Worker * Advance the <code>UScriptRun</code> object to the next script run, return the start and limit 121*0e209d39SAndroid Build Coastguard Worker * offsets, and the script of the run. 122*0e209d39SAndroid Build Coastguard Worker * 123*0e209d39SAndroid Build Coastguard Worker * @param scriptRun is the address of the <code>UScriptRun</code> object. 124*0e209d39SAndroid Build Coastguard Worker * 125*0e209d39SAndroid Build Coastguard Worker * @param pRunStart is a pointer to the variable to receive the starting offset of the next run. 126*0e209d39SAndroid Build Coastguard Worker * This pointer can be <code>NULL</code> if the value is not needed. 127*0e209d39SAndroid Build Coastguard Worker * 128*0e209d39SAndroid Build Coastguard Worker * @param pRunLimit is a pointer to the variable to receive the limit offset of the next run. 129*0e209d39SAndroid Build Coastguard Worker * This pointer can be <code>NULL</code> if the value is not needed. 130*0e209d39SAndroid Build Coastguard Worker * 131*0e209d39SAndroid Build Coastguard Worker * @param pRunScript is a pointer to the variable to receive the UScriptCode for the 132*0e209d39SAndroid Build Coastguard Worker * script of the current run. This pointer can be <code>NULL</code> if the value is not needed. 133*0e209d39SAndroid Build Coastguard Worker * 134*0e209d39SAndroid Build Coastguard Worker * @return true if there was another script run. 135*0e209d39SAndroid Build Coastguard Worker */ 136*0e209d39SAndroid Build Coastguard Worker U_CAPI UBool U_EXPORT2 137*0e209d39SAndroid Build Coastguard Worker uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript); 138*0e209d39SAndroid Build Coastguard Worker 139*0e209d39SAndroid Build Coastguard Worker #endif 140