1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1999-2007, International Business Machines Corporation 6*0e209d39SAndroid Build Coastguard Worker * and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 8*0e209d39SAndroid Build Coastguard Worker * Date Name Description 9*0e209d39SAndroid Build Coastguard Worker * 11/17/99 aliu Creation. 10*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 11*0e209d39SAndroid Build Coastguard Worker */ 12*0e209d39SAndroid Build Coastguard Worker #ifndef RBT_SET_H 13*0e209d39SAndroid Build Coastguard Worker #define RBT_SET_H 14*0e209d39SAndroid Build Coastguard Worker 15*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_TRANSLITERATION 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h" 20*0e209d39SAndroid Build Coastguard Worker #include "unicode/utrans.h" 21*0e209d39SAndroid Build Coastguard Worker #include "uvector.h" 22*0e209d39SAndroid Build Coastguard Worker 23*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 24*0e209d39SAndroid Build Coastguard Worker 25*0e209d39SAndroid Build Coastguard Worker class Replaceable; 26*0e209d39SAndroid Build Coastguard Worker class TransliterationRule; 27*0e209d39SAndroid Build Coastguard Worker class TransliterationRuleData; 28*0e209d39SAndroid Build Coastguard Worker class UnicodeFilter; 29*0e209d39SAndroid Build Coastguard Worker class UnicodeString; 30*0e209d39SAndroid Build Coastguard Worker class UnicodeSet; 31*0e209d39SAndroid Build Coastguard Worker 32*0e209d39SAndroid Build Coastguard Worker /** 33*0e209d39SAndroid Build Coastguard Worker * A set of rules for a <code>RuleBasedTransliterator</code>. 34*0e209d39SAndroid Build Coastguard Worker * @author Alan Liu 35*0e209d39SAndroid Build Coastguard Worker */ 36*0e209d39SAndroid Build Coastguard Worker class TransliterationRuleSet : public UMemory { 37*0e209d39SAndroid Build Coastguard Worker /** 38*0e209d39SAndroid Build Coastguard Worker * Vector of rules, in the order added. This is used while the 39*0e209d39SAndroid Build Coastguard Worker * rule set is getting built. After that, freeze() reorders and 40*0e209d39SAndroid Build Coastguard Worker * indexes the rules into rules[]. Any given rule is stored once 41*0e209d39SAndroid Build Coastguard Worker * in ruleVector, and one or more times in rules[]. ruleVector 42*0e209d39SAndroid Build Coastguard Worker * owns and deletes the rules. 43*0e209d39SAndroid Build Coastguard Worker */ 44*0e209d39SAndroid Build Coastguard Worker UVector* ruleVector; 45*0e209d39SAndroid Build Coastguard Worker 46*0e209d39SAndroid Build Coastguard Worker /** 47*0e209d39SAndroid Build Coastguard Worker * Sorted and indexed table of rules. This is created by freeze() 48*0e209d39SAndroid Build Coastguard Worker * from the rules in ruleVector. It contains alias pointers to 49*0e209d39SAndroid Build Coastguard Worker * the rules in ruleVector. It is zero before freeze() is called 50*0e209d39SAndroid Build Coastguard Worker * and non-zero thereafter. 51*0e209d39SAndroid Build Coastguard Worker */ 52*0e209d39SAndroid Build Coastguard Worker TransliterationRule** rules; 53*0e209d39SAndroid Build Coastguard Worker 54*0e209d39SAndroid Build Coastguard Worker /** 55*0e209d39SAndroid Build Coastguard Worker * Index table. For text having a first character c, compute x = c&0xFF. 56*0e209d39SAndroid Build Coastguard Worker * Now use rules[index[x]..index[x+1]-1]. This index table is created by 57*0e209d39SAndroid Build Coastguard Worker * freeze(). Before freeze() is called it contains garbage. 58*0e209d39SAndroid Build Coastguard Worker */ 59*0e209d39SAndroid Build Coastguard Worker int32_t index[257]; 60*0e209d39SAndroid Build Coastguard Worker 61*0e209d39SAndroid Build Coastguard Worker /** 62*0e209d39SAndroid Build Coastguard Worker * Length of the longest preceding context 63*0e209d39SAndroid Build Coastguard Worker */ 64*0e209d39SAndroid Build Coastguard Worker int32_t maxContextLength; 65*0e209d39SAndroid Build Coastguard Worker 66*0e209d39SAndroid Build Coastguard Worker public: 67*0e209d39SAndroid Build Coastguard Worker 68*0e209d39SAndroid Build Coastguard Worker /** 69*0e209d39SAndroid Build Coastguard Worker * Construct a new empty rule set. 70*0e209d39SAndroid Build Coastguard Worker * @param status Output parameter filled in with success or failure status. 71*0e209d39SAndroid Build Coastguard Worker */ 72*0e209d39SAndroid Build Coastguard Worker TransliterationRuleSet(UErrorCode& status); 73*0e209d39SAndroid Build Coastguard Worker 74*0e209d39SAndroid Build Coastguard Worker /** 75*0e209d39SAndroid Build Coastguard Worker * Copy constructor. 76*0e209d39SAndroid Build Coastguard Worker */ 77*0e209d39SAndroid Build Coastguard Worker TransliterationRuleSet(const TransliterationRuleSet&); 78*0e209d39SAndroid Build Coastguard Worker 79*0e209d39SAndroid Build Coastguard Worker /** 80*0e209d39SAndroid Build Coastguard Worker * Destructor. 81*0e209d39SAndroid Build Coastguard Worker */ 82*0e209d39SAndroid Build Coastguard Worker virtual ~TransliterationRuleSet(); 83*0e209d39SAndroid Build Coastguard Worker 84*0e209d39SAndroid Build Coastguard Worker /** 85*0e209d39SAndroid Build Coastguard Worker * Change the data object that this rule belongs to. Used 86*0e209d39SAndroid Build Coastguard Worker * internally by the TransliterationRuleData copy constructor. 87*0e209d39SAndroid Build Coastguard Worker * @param data the new data value to be set. 88*0e209d39SAndroid Build Coastguard Worker */ 89*0e209d39SAndroid Build Coastguard Worker void setData(const TransliterationRuleData* data); 90*0e209d39SAndroid Build Coastguard Worker 91*0e209d39SAndroid Build Coastguard Worker /** 92*0e209d39SAndroid Build Coastguard Worker * Return the maximum context length. 93*0e209d39SAndroid Build Coastguard Worker * @return the length of the longest preceding context. 94*0e209d39SAndroid Build Coastguard Worker */ 95*0e209d39SAndroid Build Coastguard Worker virtual int32_t getMaximumContextLength() const; 96*0e209d39SAndroid Build Coastguard Worker 97*0e209d39SAndroid Build Coastguard Worker /** 98*0e209d39SAndroid Build Coastguard Worker * Add a rule to this set. Rules are added in order, and order is 99*0e209d39SAndroid Build Coastguard Worker * significant. The last call to this method must be followed by 100*0e209d39SAndroid Build Coastguard Worker * a call to <code>freeze()</code> before the rule set is used. 101*0e209d39SAndroid Build Coastguard Worker * This method must <em>not</em> be called after freeze() has been 102*0e209d39SAndroid Build Coastguard Worker * called. 103*0e209d39SAndroid Build Coastguard Worker * 104*0e209d39SAndroid Build Coastguard Worker * @param adoptedRule the rule to add 105*0e209d39SAndroid Build Coastguard Worker */ 106*0e209d39SAndroid Build Coastguard Worker virtual void addRule(TransliterationRule* adoptedRule, 107*0e209d39SAndroid Build Coastguard Worker UErrorCode& status); 108*0e209d39SAndroid Build Coastguard Worker 109*0e209d39SAndroid Build Coastguard Worker /** 110*0e209d39SAndroid Build Coastguard Worker * Check this for masked rules and index it to optimize performance. 111*0e209d39SAndroid Build Coastguard Worker * The sequence of operations is: (1) add rules to a set using 112*0e209d39SAndroid Build Coastguard Worker * <code>addRule()</code>; (2) freeze the set using 113*0e209d39SAndroid Build Coastguard Worker * <code>freeze()</code>; (3) use the rule set. If 114*0e209d39SAndroid Build Coastguard Worker * <code>addRule()</code> is called after calling this method, it 115*0e209d39SAndroid Build Coastguard Worker * invalidates this object, and this method must be called again. 116*0e209d39SAndroid Build Coastguard Worker * That is, <code>freeze()</code> may be called multiple times, 117*0e209d39SAndroid Build Coastguard Worker * although for optimal performance it shouldn't be. 118*0e209d39SAndroid Build Coastguard Worker * @param parseError A pointer to UParseError to receive information about errors 119*0e209d39SAndroid Build Coastguard Worker * occurred. 120*0e209d39SAndroid Build Coastguard Worker * @param status Output parameter filled in with success or failure status. 121*0e209d39SAndroid Build Coastguard Worker */ 122*0e209d39SAndroid Build Coastguard Worker virtual void freeze(UParseError& parseError, UErrorCode& status); 123*0e209d39SAndroid Build Coastguard Worker 124*0e209d39SAndroid Build Coastguard Worker /** 125*0e209d39SAndroid Build Coastguard Worker * Transliterate the given text with the given UTransPosition 126*0e209d39SAndroid Build Coastguard Worker * indices. Return true if the transliteration should continue 127*0e209d39SAndroid Build Coastguard Worker * or false if it should halt (because of a U_PARTIAL_MATCH match). 128*0e209d39SAndroid Build Coastguard Worker * Note that false is only ever returned if isIncremental is true. 129*0e209d39SAndroid Build Coastguard Worker * @param text the text to be transliterated 130*0e209d39SAndroid Build Coastguard Worker * @param index the position indices, which will be updated 131*0e209d39SAndroid Build Coastguard Worker * @param isIncremental if true, assume new text may be inserted 132*0e209d39SAndroid Build Coastguard Worker * at index.limit, and return false if thrre is a partial match. 133*0e209d39SAndroid Build Coastguard Worker * @return true unless a U_PARTIAL_MATCH has been obtained, 134*0e209d39SAndroid Build Coastguard Worker * indicating that transliteration should stop until more text 135*0e209d39SAndroid Build Coastguard Worker * arrives. 136*0e209d39SAndroid Build Coastguard Worker */ 137*0e209d39SAndroid Build Coastguard Worker UBool transliterate(Replaceable& text, 138*0e209d39SAndroid Build Coastguard Worker UTransPosition& index, 139*0e209d39SAndroid Build Coastguard Worker UBool isIncremental); 140*0e209d39SAndroid Build Coastguard Worker 141*0e209d39SAndroid Build Coastguard Worker /** 142*0e209d39SAndroid Build Coastguard Worker * Create rule strings that represents this rule set. 143*0e209d39SAndroid Build Coastguard Worker * @param result string to receive the rule strings. Current 144*0e209d39SAndroid Build Coastguard Worker * contents will be deleted. 145*0e209d39SAndroid Build Coastguard Worker * @param escapeUnprintable True, will escape the unprintable characters 146*0e209d39SAndroid Build Coastguard Worker * @return A reference to 'result'. 147*0e209d39SAndroid Build Coastguard Worker */ 148*0e209d39SAndroid Build Coastguard Worker virtual UnicodeString& toRules(UnicodeString& result, 149*0e209d39SAndroid Build Coastguard Worker UBool escapeUnprintable) const; 150*0e209d39SAndroid Build Coastguard Worker 151*0e209d39SAndroid Build Coastguard Worker /** 152*0e209d39SAndroid Build Coastguard Worker * Return the set of all characters that may be modified 153*0e209d39SAndroid Build Coastguard Worker * (getTarget=false) or emitted (getTarget=true) by this set. 154*0e209d39SAndroid Build Coastguard Worker */ 155*0e209d39SAndroid Build Coastguard Worker UnicodeSet& getSourceTargetSet(UnicodeSet& result, 156*0e209d39SAndroid Build Coastguard Worker UBool getTarget) const; 157*0e209d39SAndroid Build Coastguard Worker 158*0e209d39SAndroid Build Coastguard Worker private: 159*0e209d39SAndroid Build Coastguard Worker 160*0e209d39SAndroid Build Coastguard Worker TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class 161*0e209d39SAndroid Build Coastguard Worker }; 162*0e209d39SAndroid Build Coastguard Worker 163*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 164*0e209d39SAndroid Build Coastguard Worker 165*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 166*0e209d39SAndroid Build Coastguard Worker 167*0e209d39SAndroid Build Coastguard Worker #endif 168