xref: /aosp_15_r20/external/cldr/common/transforms/Han-Spacedhan.xml (revision 912701f9769bb47905792267661f0baf2b85bed5)
1*912701f9SAndroid Build Coastguard Worker<?xml version="1.0" encoding="UTF-8" ?>
2*912701f9SAndroid Build Coastguard Worker<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
3*912701f9SAndroid Build Coastguard Worker<!--
4*912701f9SAndroid Build Coastguard WorkerCopyright © 1991-2013 Unicode, Inc.
5*912701f9SAndroid Build Coastguard WorkerCLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
6*912701f9SAndroid Build Coastguard WorkerFor terms of use, see http://www.unicode.org/copyright.html
7*912701f9SAndroid Build Coastguard Worker-->
8*912701f9SAndroid Build Coastguard Worker<supplementalData>
9*912701f9SAndroid Build Coastguard Worker	<version number="$Revision$"/>
10*912701f9SAndroid Build Coastguard Worker	<transforms>
11*912701f9SAndroid Build Coastguard Worker		<transform source="Han" target="Spacedhan" direction="both" visibility="internal">
12*912701f9SAndroid Build Coastguard Worker			<tRule>
13*912701f9SAndroid Build Coastguard Worker# Only intended for internal use
14*912701f9SAndroid Build Coastguard Worker# Make sure Han are normalized, including characters that contain them.
15*912701f9SAndroid Build Coastguard Worker# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
16*912701f9SAndroid Build Coastguard Worker# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
17*912701f9SAndroid Build Coastguard Worker:: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾��-����-����-������][:ideographic:][:sc=han:]] nfkc;
18*912701f9SAndroid Build Coastguard Worker:: fullwidth-halfwidth;
19*912701f9SAndroid Build Coastguard Worker。 → '.';
20*912701f9SAndroid Build Coastguard Worker。→ '.';
21*912701f9SAndroid Build Coastguard Worker、→ ',';
22*912701f9SAndroid Build Coastguard Worker、→ ',';
23*912701f9SAndroid Build Coastguard Worker《→ '«';
24*912701f9SAndroid Build Coastguard Worker》→ '»';
25*912701f9SAndroid Build Coastguard Worker〈 → '‹';
26*912701f9SAndroid Build Coastguard Worker 〉→ '›';
27*912701f9SAndroid Build Coastguard Worker「→ '‘';
28*912701f9SAndroid Build Coastguard Worker」→ '’';
29*912701f9SAndroid Build Coastguard Worker「→ '‘';
30*912701f9SAndroid Build Coastguard Worker」→ '’';
31*912701f9SAndroid Build Coastguard Worker『→ '“';
32*912701f9SAndroid Build Coastguard Worker』→ '”';
33*912701f9SAndroid Build Coastguard Worker
34*912701f9SAndroid Build Coastguard Worker・→ '‧';
35*912701f9SAndroid Build Coastguard Worker・ → '‧';
36*912701f9SAndroid Build Coastguard Worker々→ '⓶';
37*912701f9SAndroid Build Coastguard Worker〜→ '~';
38*912701f9SAndroid Build Coastguard Worker
39*912701f9SAndroid Build Coastguard Worker$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
40*912701f9SAndroid Build Coastguard Worker$initialPunct = [:Ps:][:Pi:];
41*912701f9SAndroid Build Coastguard Worker# add space between any Han or terminal punctuation and letters, and
42*912701f9SAndroid Build Coastguard Worker# between letters and Han or initial punct
43*912701f9SAndroid Build Coastguard Worker[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
44*912701f9SAndroid Build Coastguard Worker[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ;
45*912701f9SAndroid Build Coastguard Worker# remove spacing between ideographs and other letters
46*912701f9SAndroid Build Coastguard Worker← [:Ideographic:] { ' ' } [:Letter:] ;
47*912701f9SAndroid Build Coastguard Worker← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
48*912701f9SAndroid Build Coastguard Worker			</tRule>
49*912701f9SAndroid Build Coastguard Worker		</transform>
50*912701f9SAndroid Build Coastguard Worker	</transforms>
51*912701f9SAndroid Build Coastguard Worker</supplementalData>
52