1<?xml version="1.0" encoding="UTF-8" ?> 2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3<!-- 4Copyright © 1991-2013 Unicode, Inc. 5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6For terms of use, see http://www.unicode.org/copyright.html 7--> 8<supplementalData> 9 <version number="$Revision$"/> 10 <transforms> 11 <transform source="Arab" target="Latn" direction="both" alias="Arabic-Latin und-Latn-t-und-arab" backwardAlias="Latin-Arabic und-Arab-t-und-latn"> 12 <tRule><![CDATA[ 13# Generally follows UNGEGN 14# http://www.eki.ee/wgrs/rom1_ar.pdf 15# Occasionally deviates in the direction of ISO 233 16# http://homepage.mac.com/sirbinks/pdf/Arabic.pdf 17# a) where required for disambiguation. 18# b) with underdot instead of cedilla for letter like SAD, 19# since those are explicitly in Unicode for transliteration. 20# c) with extra non-Arabic-language letters, like PEH 21# 22# Does *not* do assimilation of "al", nor hyphenation. 23# While it could be done, we need to determine whether a prefix "al" could 24# occur other than as the definite article (since no space is used). 25:: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ]] ; 26:: NFKD (NFC); 27$disambig = ̱ ; 28$disambig2 = ̰ ; 29$under = ̣ ; 30$descender = ˌ; 31$notAbove = [[:^ccc=0:] & [:^ccc=230:]]; 32 33# non-letters 34[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR 35[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR 36٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR 37٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR 38# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate 39، ↔ ',' ; # ARABIC COMMA 40؛ ↔ ';' ; # ARABIC SEMICOLON 41؟ ↔ '?' ; # ARABIC QUESTION MARK 42٪ ↔ '%' ; # ARABIC PERCENT SIGN 43۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO 44۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE 45۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO 46۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE 47۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR 48۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE 49۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX 50۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN 51۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT 52۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE 53٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO 54١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE 55٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO 56٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE 57٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR 58٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE 59٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX 60٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN 61٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT 62٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE 63 64؉ ↔ ‰ ; # U+0609 ARABIC-INDIC PER MILLE SIGN 65؊ ↔ ‱ ; # U+060A ARABIC-INDIC PER TEN THOUSAND SIGN 66۔ ↔ '.' ; # U+06D4 ARABIC FULL STOP 67 68# letters 69# long vowels 70َا↔ ā ; # ARABIC FATHA, ARABIC LETTER ALEF 71ُو ↔ ū ; # ARABIC DAMMA, ARABIC LETTER WAW 72ِي ↔ ī ; # ARABIC KASRA, ARABIC LETTER YEH 73# longer items moved here to prevent masking 74ث ↔ t h $disambig ; # ARABIC LETTER THEH 75ذ ↔ d h $disambig ; # ARABIC LETTER THAL 76ش ↔ s h $disambig ; # ARABIC LETTER SHEEN 77ص ↔ s $under ; # ARABIC LETTER SAD 78ض ↔ d $under ; # ARABIC LETTER DAD 79ط ↔ t $under ; # ARABIC LETTER TAH 80ظ ↔ z $under ; # ARABIC LETTER ZAH 81غ ↔ g h $disambig ; # ARABIC LETTER GHAIN 82 83# WARNING: special case 84# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ 85# so on the return, we have to skip over (but preserve) the half-ring below (or others like it) 86# ةٕ ← ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS 87ة ↔ t ̈ ; # ARABIC LETTER TEH MARBUTA 88ة | $1 ← t ($notAbove+) ̈ ; # ARABIC LETTER TEH MARBUTA 89 90# non-Arabic language 91ژ ↔ z h $disambig ; # ARABIC LETTER JEH 92ڭ ↔ n $disambig g ; # ARABIC LETTER NG 93ۋ ↔ v $disambig ; # ARABIC LETTER VE 94ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH 95ښ ↔ s $descender; 96 97# Arabic language 98ء ↔ ʾ ; # ARABIC LETTER HAMZA 99ا ↔ a $under; # ARABIC LETTER ALEF 100ب ↔ b ; # ARABIC LETTER BEH 101ت ↔ t ; # ARABIC LETTER TEH 102ج ↔ j ; # ARABIC LETTER JEEM 103ح ↔ h $under ; # ARABIC LETTER HAH 104خ ↔ k h $disambig ; # ARABIC LETTER KHAH 105د ↔ d ; # ARABIC LETTER DAL 106ر ↔ r ; # ARABIC LETTER REH 107ز ↔ z ; # ARABIC LETTER ZAIN 108س ↔ s ; # ARABIC LETTER SEEN 109ع ↔ ʿ ; # ARABIC LETTER AIN 110ـ → ; # ARABIC TATWEEL 111ف ↔ f ; # ARABIC LETTER FEH 112ق ↔ q ; # ARABIC LETTER QAF 113ک ↔ k $disambig ; # ARABIC LETTER KEHEH 114ك ↔ k ; # ARABIC LETTER KAF 115ل ↔ l ; # ARABIC LETTER LAM 116م ↔ m ; # ARABIC LETTER MEEM 117ن ↔ n ; # ARABIC LETTER NOON 118ه ↔ h ; # ARABIC LETTER HEH 119و ↔ w ; # ARABIC LETTER WAW 120ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA 121ي ↔ y ; # ARABIC LETTER YEH 122ً ↔ aⁿ ; # ARABIC FATHATAN 123ٌ ↔ uⁿ ; # ARABIC DAMMATAN 124ٍ ↔ iⁿ ; # ARABIC KASRATAN 125َ ↔ a ; # ARABIC FATHA 126ُ ↔ u ; # ARABIC DAMMA 127ِ ↔ i ; # ARABIC KASRA 128ّ ↔ ̃ ; # ARABIC SHADDA 129ْ ↔ ̊ ; # ARABIC SUKUN 130 131# special combining marks 132ٓ ↔ ̂ ; # ARABIC MADDAH ABOVE 133ٔ ↔ ̉ ; # ARABIC HAMZA ABOVE 134ٕ ↔ ̹ ; # ARABIC HAMZA BELOW 135 136# Some non-Arabic language (not in UNGEGN) 137پ ↔ p ; # ARABIC LETTER PEH 138چ ↔ c h $disambig ; # ARABIC LETTER TCHEH 139ڤ ↔ v ; # ARABIC LETTER VEH 140# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW 141# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW 142گ ↔ g ; # ARABIC LETTER GAF 143 144# fallbacks TODO roundtrip where possible, using diacritics to distinguish 145#https://en.wikipedia.org/wiki/Sindhi_transliteration 146ٺ→ṭh; 147ٿ→th; 148ٽ→ṭ; 149ڙ→ṛ; 150ڦ→ph; 151ڻ→ṇ; 152ڱ→ṅ; 153ڃ→ñ; 154ڪ→k; 155ڄ→j̈; 156ۃ→ẖ; 157ڳ→g̤; 158ڍ→ḍh; 159ڌ→dh; 160ڏ→d̤; 161ڊ→ḍ; 162ڇ→ch; 163ڀ→bh; 164ٻ→ḇ; 165۽→'&'; 166۾→'mn'; 167 168#https://en.wiktionary.org/wiki/Wiktionary:Urdu_transliteration 169ھ → ʱ ; 170ں → ◌̃ ; 171ے → ai ; 172ڈ → ḍ ; 173ڑ → ṛ ; 174ٹ → ṭ ; 175 176#https://www.eki.ee/wgrs/rom2_ps.htm 177#https://en.wikipedia.org/wiki/Pashto_alphabet 178ټ → ṯ ; 179ځ → dz ; 180څ → ts ; 181ډ → ḏ ; 182ړ → ṟ ; 183ږ → z͟h ; 184ګ → g ; 185ڼ → ṉ ; 186ۍ → ạy ; 187ې → e ; 188 189#https://www.eki.ee/wgrs/rom1_ug.pdf 190ہ → ḥ ; 191ە → ĥ ; 192 193# fallbacks 194| s ← c } [eiy]; 195| k ← c ; 196| i ← e ; 197| u ← o ; 198| ks ← x ; 199| n ← ⁿ; 200:: (lower) ; 201::NFC (NFD); 202:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ̂-̄̈-̣̰̊-̱̹;ˌ]] ); 203 ]]></tRule> 204 </transform> 205 </transforms> 206</supplementalData> 207