xref: /aosp_15_r20/external/icu/libicu/cts_headers/rbt_set.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker **********************************************************************
5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1999-2007, International Business Machines Corporation
6*0e209d39SAndroid Build Coastguard Worker * and others. All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker **********************************************************************
8*0e209d39SAndroid Build Coastguard Worker *   Date        Name        Description
9*0e209d39SAndroid Build Coastguard Worker *   11/17/99    aliu        Creation.
10*0e209d39SAndroid Build Coastguard Worker **********************************************************************
11*0e209d39SAndroid Build Coastguard Worker */
12*0e209d39SAndroid Build Coastguard Worker #ifndef RBT_SET_H
13*0e209d39SAndroid Build Coastguard Worker #define RBT_SET_H
14*0e209d39SAndroid Build Coastguard Worker 
15*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_TRANSLITERATION
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
20*0e209d39SAndroid Build Coastguard Worker #include "unicode/utrans.h"
21*0e209d39SAndroid Build Coastguard Worker #include "uvector.h"
22*0e209d39SAndroid Build Coastguard Worker 
23*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
24*0e209d39SAndroid Build Coastguard Worker 
25*0e209d39SAndroid Build Coastguard Worker class Replaceable;
26*0e209d39SAndroid Build Coastguard Worker class TransliterationRule;
27*0e209d39SAndroid Build Coastguard Worker class TransliterationRuleData;
28*0e209d39SAndroid Build Coastguard Worker class UnicodeFilter;
29*0e209d39SAndroid Build Coastguard Worker class UnicodeString;
30*0e209d39SAndroid Build Coastguard Worker class UnicodeSet;
31*0e209d39SAndroid Build Coastguard Worker 
32*0e209d39SAndroid Build Coastguard Worker /**
33*0e209d39SAndroid Build Coastguard Worker  * A set of rules for a <code>RuleBasedTransliterator</code>.
34*0e209d39SAndroid Build Coastguard Worker  * @author Alan Liu
35*0e209d39SAndroid Build Coastguard Worker  */
36*0e209d39SAndroid Build Coastguard Worker class TransliterationRuleSet : public UMemory {
37*0e209d39SAndroid Build Coastguard Worker     /**
38*0e209d39SAndroid Build Coastguard Worker      * Vector of rules, in the order added.  This is used while the
39*0e209d39SAndroid Build Coastguard Worker      * rule set is getting built.  After that, freeze() reorders and
40*0e209d39SAndroid Build Coastguard Worker      * indexes the rules into rules[].  Any given rule is stored once
41*0e209d39SAndroid Build Coastguard Worker      * in ruleVector, and one or more times in rules[].  ruleVector
42*0e209d39SAndroid Build Coastguard Worker      * owns and deletes the rules.
43*0e209d39SAndroid Build Coastguard Worker      */
44*0e209d39SAndroid Build Coastguard Worker     UVector* ruleVector;
45*0e209d39SAndroid Build Coastguard Worker 
46*0e209d39SAndroid Build Coastguard Worker     /**
47*0e209d39SAndroid Build Coastguard Worker      * Sorted and indexed table of rules.  This is created by freeze()
48*0e209d39SAndroid Build Coastguard Worker      * from the rules in ruleVector.  It contains alias pointers to
49*0e209d39SAndroid Build Coastguard Worker      * the rules in ruleVector.  It is zero before freeze() is called
50*0e209d39SAndroid Build Coastguard Worker      * and non-zero thereafter.
51*0e209d39SAndroid Build Coastguard Worker      */
52*0e209d39SAndroid Build Coastguard Worker     TransliterationRule** rules;
53*0e209d39SAndroid Build Coastguard Worker 
54*0e209d39SAndroid Build Coastguard Worker     /**
55*0e209d39SAndroid Build Coastguard Worker      * Index table.  For text having a first character c, compute x = c&0xFF.
56*0e209d39SAndroid Build Coastguard Worker      * Now use rules[index[x]..index[x+1]-1].  This index table is created by
57*0e209d39SAndroid Build Coastguard Worker      * freeze().  Before freeze() is called it contains garbage.
58*0e209d39SAndroid Build Coastguard Worker      */
59*0e209d39SAndroid Build Coastguard Worker     int32_t index[257];
60*0e209d39SAndroid Build Coastguard Worker 
61*0e209d39SAndroid Build Coastguard Worker     /**
62*0e209d39SAndroid Build Coastguard Worker      * Length of the longest preceding context
63*0e209d39SAndroid Build Coastguard Worker      */
64*0e209d39SAndroid Build Coastguard Worker     int32_t maxContextLength;
65*0e209d39SAndroid Build Coastguard Worker 
66*0e209d39SAndroid Build Coastguard Worker public:
67*0e209d39SAndroid Build Coastguard Worker 
68*0e209d39SAndroid Build Coastguard Worker     /**
69*0e209d39SAndroid Build Coastguard Worker      * Construct a new empty rule set.
70*0e209d39SAndroid Build Coastguard Worker      * @param status    Output parameter filled in with success or failure status.
71*0e209d39SAndroid Build Coastguard Worker      */
72*0e209d39SAndroid Build Coastguard Worker     TransliterationRuleSet(UErrorCode& status);
73*0e209d39SAndroid Build Coastguard Worker 
74*0e209d39SAndroid Build Coastguard Worker     /**
75*0e209d39SAndroid Build Coastguard Worker      * Copy constructor.
76*0e209d39SAndroid Build Coastguard Worker      */
77*0e209d39SAndroid Build Coastguard Worker     TransliterationRuleSet(const TransliterationRuleSet&);
78*0e209d39SAndroid Build Coastguard Worker 
79*0e209d39SAndroid Build Coastguard Worker     /**
80*0e209d39SAndroid Build Coastguard Worker      * Destructor.
81*0e209d39SAndroid Build Coastguard Worker      */
82*0e209d39SAndroid Build Coastguard Worker     virtual ~TransliterationRuleSet();
83*0e209d39SAndroid Build Coastguard Worker 
84*0e209d39SAndroid Build Coastguard Worker     /**
85*0e209d39SAndroid Build Coastguard Worker      * Change the data object that this rule belongs to.  Used
86*0e209d39SAndroid Build Coastguard Worker      * internally by the TransliterationRuleData copy constructor.
87*0e209d39SAndroid Build Coastguard Worker      * @param data    the new data value to be set.
88*0e209d39SAndroid Build Coastguard Worker      */
89*0e209d39SAndroid Build Coastguard Worker     void setData(const TransliterationRuleData* data);
90*0e209d39SAndroid Build Coastguard Worker 
91*0e209d39SAndroid Build Coastguard Worker     /**
92*0e209d39SAndroid Build Coastguard Worker      * Return the maximum context length.
93*0e209d39SAndroid Build Coastguard Worker      * @return the length of the longest preceding context.
94*0e209d39SAndroid Build Coastguard Worker      */
95*0e209d39SAndroid Build Coastguard Worker     virtual int32_t getMaximumContextLength() const;
96*0e209d39SAndroid Build Coastguard Worker 
97*0e209d39SAndroid Build Coastguard Worker     /**
98*0e209d39SAndroid Build Coastguard Worker      * Add a rule to this set.  Rules are added in order, and order is
99*0e209d39SAndroid Build Coastguard Worker      * significant.  The last call to this method must be followed by
100*0e209d39SAndroid Build Coastguard Worker      * a call to <code>freeze()</code> before the rule set is used.
101*0e209d39SAndroid Build Coastguard Worker      * This method must <em>not</em> be called after freeze() has been
102*0e209d39SAndroid Build Coastguard Worker      * called.
103*0e209d39SAndroid Build Coastguard Worker      *
104*0e209d39SAndroid Build Coastguard Worker      * @param adoptedRule the rule to add
105*0e209d39SAndroid Build Coastguard Worker      */
106*0e209d39SAndroid Build Coastguard Worker     virtual void addRule(TransliterationRule* adoptedRule,
107*0e209d39SAndroid Build Coastguard Worker                          UErrorCode& status);
108*0e209d39SAndroid Build Coastguard Worker 
109*0e209d39SAndroid Build Coastguard Worker     /**
110*0e209d39SAndroid Build Coastguard Worker      * Check this for masked rules and index it to optimize performance.
111*0e209d39SAndroid Build Coastguard Worker      * The sequence of operations is: (1) add rules to a set using
112*0e209d39SAndroid Build Coastguard Worker      * <code>addRule()</code>; (2) freeze the set using
113*0e209d39SAndroid Build Coastguard Worker      * <code>freeze()</code>; (3) use the rule set.  If
114*0e209d39SAndroid Build Coastguard Worker      * <code>addRule()</code> is called after calling this method, it
115*0e209d39SAndroid Build Coastguard Worker      * invalidates this object, and this method must be called again.
116*0e209d39SAndroid Build Coastguard Worker      * That is, <code>freeze()</code> may be called multiple times,
117*0e209d39SAndroid Build Coastguard Worker      * although for optimal performance it shouldn't be.
118*0e209d39SAndroid Build Coastguard Worker      * @param parseError A pointer to UParseError to receive information about errors
119*0e209d39SAndroid Build Coastguard Worker      *                   occurred.
120*0e209d39SAndroid Build Coastguard Worker      * @param status     Output parameter filled in with success or failure status.
121*0e209d39SAndroid Build Coastguard Worker      */
122*0e209d39SAndroid Build Coastguard Worker     virtual void freeze(UParseError& parseError, UErrorCode& status);
123*0e209d39SAndroid Build Coastguard Worker 
124*0e209d39SAndroid Build Coastguard Worker     /**
125*0e209d39SAndroid Build Coastguard Worker      * Transliterate the given text with the given UTransPosition
126*0e209d39SAndroid Build Coastguard Worker      * indices.  Return true if the transliteration should continue
127*0e209d39SAndroid Build Coastguard Worker      * or false if it should halt (because of a U_PARTIAL_MATCH match).
128*0e209d39SAndroid Build Coastguard Worker      * Note that false is only ever returned if isIncremental is true.
129*0e209d39SAndroid Build Coastguard Worker      * @param text the text to be transliterated
130*0e209d39SAndroid Build Coastguard Worker      * @param index the position indices, which will be updated
131*0e209d39SAndroid Build Coastguard Worker      * @param isIncremental if true, assume new text may be inserted
132*0e209d39SAndroid Build Coastguard Worker      * at index.limit, and return false if thrre is a partial match.
133*0e209d39SAndroid Build Coastguard Worker      * @return true unless a U_PARTIAL_MATCH has been obtained,
134*0e209d39SAndroid Build Coastguard Worker      * indicating that transliteration should stop until more text
135*0e209d39SAndroid Build Coastguard Worker      * arrives.
136*0e209d39SAndroid Build Coastguard Worker      */
137*0e209d39SAndroid Build Coastguard Worker     UBool transliterate(Replaceable& text,
138*0e209d39SAndroid Build Coastguard Worker                         UTransPosition& index,
139*0e209d39SAndroid Build Coastguard Worker                         UBool isIncremental);
140*0e209d39SAndroid Build Coastguard Worker 
141*0e209d39SAndroid Build Coastguard Worker     /**
142*0e209d39SAndroid Build Coastguard Worker      * Create rule strings that represents this rule set.
143*0e209d39SAndroid Build Coastguard Worker      * @param result string to receive the rule strings.  Current
144*0e209d39SAndroid Build Coastguard Worker      * contents will be deleted.
145*0e209d39SAndroid Build Coastguard Worker      * @param escapeUnprintable  True, will escape the unprintable characters
146*0e209d39SAndroid Build Coastguard Worker      * @return    A reference to 'result'.
147*0e209d39SAndroid Build Coastguard Worker      */
148*0e209d39SAndroid Build Coastguard Worker     virtual UnicodeString& toRules(UnicodeString& result,
149*0e209d39SAndroid Build Coastguard Worker                                    UBool escapeUnprintable) const;
150*0e209d39SAndroid Build Coastguard Worker 
151*0e209d39SAndroid Build Coastguard Worker     /**
152*0e209d39SAndroid Build Coastguard Worker      * Return the set of all characters that may be modified
153*0e209d39SAndroid Build Coastguard Worker      * (getTarget=false) or emitted (getTarget=true) by this set.
154*0e209d39SAndroid Build Coastguard Worker      */
155*0e209d39SAndroid Build Coastguard Worker     UnicodeSet& getSourceTargetSet(UnicodeSet& result,
156*0e209d39SAndroid Build Coastguard Worker                    UBool getTarget) const;
157*0e209d39SAndroid Build Coastguard Worker 
158*0e209d39SAndroid Build Coastguard Worker private:
159*0e209d39SAndroid Build Coastguard Worker 
160*0e209d39SAndroid Build Coastguard Worker     TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
161*0e209d39SAndroid Build Coastguard Worker };
162*0e209d39SAndroid Build Coastguard Worker 
163*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
164*0e209d39SAndroid Build Coastguard Worker 
165*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_TRANSLITERATION */
166*0e209d39SAndroid Build Coastguard Worker 
167*0e209d39SAndroid Build Coastguard Worker #endif
168