1<?xml version="1.0" encoding="UTF-8" ?> 2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3<!-- 4Copyright © 1991-2013 Unicode, Inc. 5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6For terms of use, see http://www.unicode.org/copyright.html 7--> 8<supplementalData> 9 <version number="$Revision$"/> 10 <transforms> 11 <transform source="Han" target="Spacedhan" direction="both" visibility="internal"> 12 <tRule> 13# Only intended for internal use 14# Make sure Han are normalized, including characters that contain them. 15# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:] 16# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release! 17:: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾---][:ideographic:][:sc=han:]] nfkc; 18:: fullwidth-halfwidth; 19。 → '.'; 20。→ '.'; 21、→ ','; 22、→ ','; 23《→ '«'; 24》→ '»'; 25〈 → '‹'; 26 〉→ '›'; 27「→ '‘'; 28」→ '’'; 29「→ '‘'; 30」→ '’'; 31『→ '“'; 32』→ '”'; 33 34・→ '‧'; 35・ → '‧'; 36々→ '⓶'; 37〜→ '~'; 38 39$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]]; 40$initialPunct = [:Ps:][:Pi:]; 41# add space between any Han or terminal punctuation and letters, and 42# between letters and Han or initial punct 43[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ; 44[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ; 45# remove spacing between ideographs and other letters 46← [:Ideographic:] { ' ' } [:Letter:] ; 47← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ; 48 </tRule> 49 </transform> 50 </transforms> 51</supplementalData> 52