xref: /aosp_15_r20/external/cldr/common/transforms/fa-fa_FONIPA.xml (revision 912701f9769bb47905792267661f0baf2b85bed5)
1<?xml version="1.0" encoding="UTF-8" ?>
2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
3<!--
4Copyright © 1991-2017 Unicode, Inc.
5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
6For terms of use, see http://www.unicode.org/copyright.html
7-->
8<supplementalData>
9	<version number="$Revision$"/>
10	<transforms>
11		<transform source="fa" target="fa_FONIPA" direction="forward" alias="fa-fonipa-t-fa">
12			<tRule><![CDATA[
13[\u200c \u200d] → ;  # Strip off ZWJ and ZWNJ.
14::NFD;
15
16# Rewrite similarly-looking Arabic letters to Persian.
17ي → ی;
18ى → ی;
19ك → ک;
20ە → ه;
21::NULL;
22
23$VOWEL = [ َ ِ  ُ  ٓ ا و ی];
24$BOUNDARY = [^[:L:][:M:][:N:]];
25$IPA_CONSONANT = [ m n p b t d k ɡ ʔ f v s z ʃ ʒ ʁ ɢ h χ {t͡ʃ} {d͡ʒ} l ɾ ];
26
27# Vowels
28یّ → jj;
29وّ → vv;
30([ َ ِ  ُ])ّ → ّ | $1;
31َیْ → æj;
32ِی → ej;
33
34یوْ → iːv;
35{یو} ه $BOUNDARY → iːv;
36{یو} هٔ $BOUNDARY → iːv;
37یو → juː;
38َوْ → av;
39
40# Hamza forms
41ء → ʔ;
42ا َ ٔ → ʔæ; # Needed because NFD reorders fatha before hamza
43ا ٔ → ʔ;
44و ٔ → ʔ;
45ی ْ ٔ → ʔ; # Needed because NFD reorders sukun before hamza
46ی ِ ٔ → ʔe; # Needed because NFD reorders kasra before hamza
47ی ٔ → ʔ;
48
49{ َ ه} $BOUNDARY → æ;
50[^ːeoæ] {هٔ} $BOUNDARY → eje;
51[e] {هٔ} $BOUNDARY → je;
52[^ːeoæ] {ه} $BOUNDARY → e;
53[e] {ه} $BOUNDARY → ;
54اَ → æ;
55اً $BOUNDARY → æn;
56َ → æ;
57یه → je;
58یٰ → ɒː;
59$IPA_CONSONANT {وی} $VOWEL → uːj;
60# If yeh is preceded by a consonant and followed by a vowel,
61# it's pronounced /iːj/, but a sukun breaks that and makes it
62# be pronounced just as /j/.
63$IPA_CONSONANT {\u0652 یو} → juː;
64$IPA_CONSONANT {\u0652 ی} $VOWEL → j;
65$IPA_CONSONANT {ی} $VOWEL → iːj;
66{ی} $VOWEL → j;
67ی ْ → j;
68ی → iː;
69
70$BOUNDARY {ای} → iː;
71ا\u0653 → ɒː;
72آ → ɒː;
73اِ → e;
74$BOUNDARY {اُو} → o;
75اُ → o;
76$BOUNDARY {او} → uː;
77او → ɒːv;
78ا → ɒː; # Probably [^$BOUNDARY]
79ِ → e;
80ه ِ ّ → hhe; # Needed because NFD moves kasra before shadda
81هِ → he;
82
83خوا → χɒː;
84خوی → χiː;
85{و} $VOWEL → v;
86{و} ه $BOUNDARY → v;
87{و} هٔ $BOUNDARY → v;
88$IPA_CONSONANT {و} → uː;
89$IPA_CONSONANT \u0651 {و} → uː; # shadda after a consonant
90ُ{و} $IPA_CONSONANT → uː;
91
92$BOUNDARY {و} $BOUNDARY → va;
93{ ُو} $VOWEL → ov;
94ُ و ٔ → oʔ;
95 ُو → o;
96ُ → o;
97
98# Consonants
99پ → p;
100ب → b;
101[ت ط] → t;
102د → d;
103ک → k;
104گ → ɡ;
105ع → ʔ;
106چ → t͡ʃ;
107ج → d͡ʒ;
108ف → f;
109[س ص ث] → s;
110[ز ذ ض ظ] → z;
111ش → ʃ;
112ژ → ʒ;
113خ → χ;
114غ → ʁ;
115ق → ɢ;
116ح → h;
117م → m;
118ن → n;
119ه → h;
120ل → l;
121ر → ɾ;
122
123ْ → ;
124::NULL;
125
126# TODO: How to handle these?
127([$IPA_CONSONANT|$VOWEL]){ّ} → $1;
128
129[ ّ ٔ ً ٰ ] → ;
130
131::NFC;
132
133			]]></tRule>
134		</transform>
135	</transforms>
136</supplementalData>
137