xref: /aosp_15_r20/external/icu/libicu/cts_headers/rbt_data.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker **********************************************************************
5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1999-2007, International Business Machines Corporation
6*0e209d39SAndroid Build Coastguard Worker * and others. All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker **********************************************************************
8*0e209d39SAndroid Build Coastguard Worker *   Date        Name        Description
9*0e209d39SAndroid Build Coastguard Worker *   11/17/99    aliu        Creation.
10*0e209d39SAndroid Build Coastguard Worker **********************************************************************
11*0e209d39SAndroid Build Coastguard Worker */
12*0e209d39SAndroid Build Coastguard Worker #ifndef RBT_DATA_H
13*0e209d39SAndroid Build Coastguard Worker #define RBT_DATA_H
14*0e209d39SAndroid Build Coastguard Worker 
15*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
16*0e209d39SAndroid Build Coastguard Worker #include "unicode/uclean.h"
17*0e209d39SAndroid Build Coastguard Worker 
18*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_TRANSLITERATION
19*0e209d39SAndroid Build Coastguard Worker 
20*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
21*0e209d39SAndroid Build Coastguard Worker #include "rbt_set.h"
22*0e209d39SAndroid Build Coastguard Worker #include "hash.h"
23*0e209d39SAndroid Build Coastguard Worker 
24*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
25*0e209d39SAndroid Build Coastguard Worker 
26*0e209d39SAndroid Build Coastguard Worker class UnicodeFunctor;
27*0e209d39SAndroid Build Coastguard Worker class UnicodeMatcher;
28*0e209d39SAndroid Build Coastguard Worker class UnicodeReplacer;
29*0e209d39SAndroid Build Coastguard Worker 
30*0e209d39SAndroid Build Coastguard Worker /**
31*0e209d39SAndroid Build Coastguard Worker  * The rule data for a RuleBasedTransliterators.  RBT objects hold
32*0e209d39SAndroid Build Coastguard Worker  * a const pointer to a TRD object that they do not own.  TRD objects
33*0e209d39SAndroid Build Coastguard Worker  * are essentially the parsed rules in compact, usable form.  The
34*0e209d39SAndroid Build Coastguard Worker  * TRD objects themselves are held for the life of the process in
35*0e209d39SAndroid Build Coastguard Worker  * a static cache owned by Transliterator.
36*0e209d39SAndroid Build Coastguard Worker  *
37*0e209d39SAndroid Build Coastguard Worker  * This class' API is a little asymmetric.  There is a method to
38*0e209d39SAndroid Build Coastguard Worker  * define a variable, but no way to define a set.  This is because the
39*0e209d39SAndroid Build Coastguard Worker  * sets are defined by the parser in a UVector, and the vector is
40*0e209d39SAndroid Build Coastguard Worker  * copied into a fixed-size array here.  Once this is done, no new
41*0e209d39SAndroid Build Coastguard Worker  * sets may be defined.  In practice, there is no need to do so, since
42*0e209d39SAndroid Build Coastguard Worker  * generating the data and using it are discrete phases.  When there
43*0e209d39SAndroid Build Coastguard Worker  * is a need to access the set data during the parse phase, another
44*0e209d39SAndroid Build Coastguard Worker  * data structure handles this.  See the parsing code for more
45*0e209d39SAndroid Build Coastguard Worker  * details.
46*0e209d39SAndroid Build Coastguard Worker  */
47*0e209d39SAndroid Build Coastguard Worker class TransliterationRuleData : public UMemory {
48*0e209d39SAndroid Build Coastguard Worker 
49*0e209d39SAndroid Build Coastguard Worker public:
50*0e209d39SAndroid Build Coastguard Worker 
51*0e209d39SAndroid Build Coastguard Worker     // PUBLIC DATA MEMBERS
52*0e209d39SAndroid Build Coastguard Worker 
53*0e209d39SAndroid Build Coastguard Worker     /**
54*0e209d39SAndroid Build Coastguard Worker      * Rule table.  May be empty.
55*0e209d39SAndroid Build Coastguard Worker      */
56*0e209d39SAndroid Build Coastguard Worker     TransliterationRuleSet ruleSet;
57*0e209d39SAndroid Build Coastguard Worker 
58*0e209d39SAndroid Build Coastguard Worker     /**
59*0e209d39SAndroid Build Coastguard Worker      * Map variable name (String) to variable (UnicodeString).  A variable name
60*0e209d39SAndroid Build Coastguard Worker      * corresponds to zero or more characters, stored in a UnicodeString in
61*0e209d39SAndroid Build Coastguard Worker      * this hash.  One or more of these chars may also correspond to a
62*0e209d39SAndroid Build Coastguard Worker      * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
63*0e209d39SAndroid Build Coastguard Worker      * a stand-in: it is an index for a secondary lookup in
64*0e209d39SAndroid Build Coastguard Worker      * data.variables.  The stand-in also represents the UnicodeMatcher in
65*0e209d39SAndroid Build Coastguard Worker      * the stored rules.
66*0e209d39SAndroid Build Coastguard Worker      */
67*0e209d39SAndroid Build Coastguard Worker     Hashtable variableNames;
68*0e209d39SAndroid Build Coastguard Worker 
69*0e209d39SAndroid Build Coastguard Worker     /**
70*0e209d39SAndroid Build Coastguard Worker      * Map category variable (char16_t) to set (UnicodeFunctor).
71*0e209d39SAndroid Build Coastguard Worker      * Variables that correspond to a set of characters are mapped
72*0e209d39SAndroid Build Coastguard Worker      * from variable name to a stand-in character in data.variableNames.
73*0e209d39SAndroid Build Coastguard Worker      * The stand-in then serves as a key in this hash to lookup the
74*0e209d39SAndroid Build Coastguard Worker      * actual UnicodeFunctor object.  In addition, the stand-in is
75*0e209d39SAndroid Build Coastguard Worker      * stored in the rule text to represent the set of characters.
76*0e209d39SAndroid Build Coastguard Worker      * variables[i] represents character (variablesBase + i).
77*0e209d39SAndroid Build Coastguard Worker      */
78*0e209d39SAndroid Build Coastguard Worker     UnicodeFunctor** variables;
79*0e209d39SAndroid Build Coastguard Worker 
80*0e209d39SAndroid Build Coastguard Worker     /**
81*0e209d39SAndroid Build Coastguard Worker      * Flag that indicates whether the variables are owned (if a single
82*0e209d39SAndroid Build Coastguard Worker      * call to Transliterator::createFromRules() produces a CompoundTransliterator
83*0e209d39SAndroid Build Coastguard Worker      * with more than one RuleBasedTransliterator as children, they all share
84*0e209d39SAndroid Build Coastguard Worker      * the same variables list, so only the first one is considered to own
85*0e209d39SAndroid Build Coastguard Worker      * the variables)
86*0e209d39SAndroid Build Coastguard Worker      */
87*0e209d39SAndroid Build Coastguard Worker     UBool variablesAreOwned;
88*0e209d39SAndroid Build Coastguard Worker 
89*0e209d39SAndroid Build Coastguard Worker     /**
90*0e209d39SAndroid Build Coastguard Worker      * The character that represents variables[0].  Characters
91*0e209d39SAndroid Build Coastguard Worker      * variablesBase through variablesBase +
92*0e209d39SAndroid Build Coastguard Worker      * variablesLength - 1 represent UnicodeFunctor objects.
93*0e209d39SAndroid Build Coastguard Worker      */
94*0e209d39SAndroid Build Coastguard Worker     char16_t variablesBase;
95*0e209d39SAndroid Build Coastguard Worker 
96*0e209d39SAndroid Build Coastguard Worker     /**
97*0e209d39SAndroid Build Coastguard Worker      * The length of variables.
98*0e209d39SAndroid Build Coastguard Worker      */
99*0e209d39SAndroid Build Coastguard Worker     int32_t variablesLength;
100*0e209d39SAndroid Build Coastguard Worker 
101*0e209d39SAndroid Build Coastguard Worker public:
102*0e209d39SAndroid Build Coastguard Worker 
103*0e209d39SAndroid Build Coastguard Worker     /**
104*0e209d39SAndroid Build Coastguard Worker      * Constructor
105*0e209d39SAndroid Build Coastguard Worker      * @param status Output param set to success/failure code on exit.
106*0e209d39SAndroid Build Coastguard Worker      */
107*0e209d39SAndroid Build Coastguard Worker     TransliterationRuleData(UErrorCode& status);
108*0e209d39SAndroid Build Coastguard Worker 
109*0e209d39SAndroid Build Coastguard Worker     /**
110*0e209d39SAndroid Build Coastguard Worker      * Copy Constructor
111*0e209d39SAndroid Build Coastguard Worker      */
112*0e209d39SAndroid Build Coastguard Worker     TransliterationRuleData(const TransliterationRuleData&);
113*0e209d39SAndroid Build Coastguard Worker 
114*0e209d39SAndroid Build Coastguard Worker     /**
115*0e209d39SAndroid Build Coastguard Worker      * destructor
116*0e209d39SAndroid Build Coastguard Worker      */
117*0e209d39SAndroid Build Coastguard Worker     ~TransliterationRuleData();
118*0e209d39SAndroid Build Coastguard Worker 
119*0e209d39SAndroid Build Coastguard Worker     /**
120*0e209d39SAndroid Build Coastguard Worker      * Given a stand-in character, return the UnicodeFunctor that it
121*0e209d39SAndroid Build Coastguard Worker      * represents, or nullptr if it doesn't represent anything.
122*0e209d39SAndroid Build Coastguard Worker      * @param standIn    the given stand-in character.
123*0e209d39SAndroid Build Coastguard Worker      * @return           the UnicodeFunctor that 'standIn' represents
124*0e209d39SAndroid Build Coastguard Worker      */
125*0e209d39SAndroid Build Coastguard Worker     UnicodeFunctor* lookup(UChar32 standIn) const;
126*0e209d39SAndroid Build Coastguard Worker 
127*0e209d39SAndroid Build Coastguard Worker     /**
128*0e209d39SAndroid Build Coastguard Worker      * Given a stand-in character, return the UnicodeMatcher that it
129*0e209d39SAndroid Build Coastguard Worker      * represents, or nullptr if it doesn't represent anything or if it
130*0e209d39SAndroid Build Coastguard Worker      * represents something that is not a matcher.
131*0e209d39SAndroid Build Coastguard Worker      * @param standIn    the given stand-in character.
132*0e209d39SAndroid Build Coastguard Worker      * @return           return the UnicodeMatcher that 'standIn' represents
133*0e209d39SAndroid Build Coastguard Worker      */
134*0e209d39SAndroid Build Coastguard Worker     UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
135*0e209d39SAndroid Build Coastguard Worker 
136*0e209d39SAndroid Build Coastguard Worker     /**
137*0e209d39SAndroid Build Coastguard Worker      * Given a stand-in character, return the UnicodeReplacer that it
138*0e209d39SAndroid Build Coastguard Worker      * represents, or nullptr if it doesn't represent anything or if it
139*0e209d39SAndroid Build Coastguard Worker      * represents something that is not a replacer.
140*0e209d39SAndroid Build Coastguard Worker      * @param standIn    the given stand-in character.
141*0e209d39SAndroid Build Coastguard Worker      * @return           return the UnicodeReplacer that 'standIn' represents
142*0e209d39SAndroid Build Coastguard Worker      */
143*0e209d39SAndroid Build Coastguard Worker     UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
144*0e209d39SAndroid Build Coastguard Worker 
145*0e209d39SAndroid Build Coastguard Worker 
146*0e209d39SAndroid Build Coastguard Worker private:
147*0e209d39SAndroid Build Coastguard Worker     TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
148*0e209d39SAndroid Build Coastguard Worker };
149*0e209d39SAndroid Build Coastguard Worker 
150*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
151*0e209d39SAndroid Build Coastguard Worker 
152*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_TRANSLITERATION */
153*0e209d39SAndroid Build Coastguard Worker 
154*0e209d39SAndroid Build Coastguard Worker #endif
155