xref: /aosp_15_r20/external/libxml2/result/intsubset2.xml.sax2 (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1SAX.setDocumentLocator()
2SAX.startDocument()
3SAX.comment(
4Copyright (C) Electronic Dictionary Research and Development Group
5Released under Creative Commons Attribution-ShareAlike Licence (V4.0)
6
7This file only contains the kanjidic2 DTD without the actual database.
8
9http://nihongo.monash.edu/kanjidic2/index.html
10http://www.edrdg.org/edrdg/licence.html
11)
12SAX.internalSubset(kanjidic2, , )
13SAX.comment( Version 1.3
14	This is the DTD of the XML-format kanji file combining information from
15	the KANJIDIC and KANJD212 files. It is intended to be largely self-
16	documenting, with each field being accompanied by an explanatory
17	comment.
18
19	The file covers the following kanji:
20	(a) the 6,355 kanji from JIS X 0208;
21	(b) the 5,801 kanji from JIS X 0212;
22	(c) the 3,625 kanji from JIS X 0213 as follows:
23		(i) the 2,741 kanji which are also in JIS X 0212 have
24		JIS X 0213 code-points (kuten) added to the existing entry;
25		(ii) the 884 "new" kanji have new entries.
26
27	At the end of the explanation for a number of fields there is a tag
28	with the format [N]. This indicates the leading letter(s) of the
29	equivalent field in the KANJIDIC and KANJD212 files.
30
31	The KANJIDIC documentation should also be read for additional
32	information about the information in the file.
33	)
34SAX.elementDecl(kanjidic2, 4, ...)
35SAX.elementDecl(header, 4, ...)
36SAX.comment(
37	The single header element will contain identification information
38	about the version of the file
39	)
40SAX.elementDecl(file_version, 3, ...)
41SAX.comment(
42	This field denotes the version of kanjidic2 structure, as more
43	than one version may exist.
44	)
45SAX.elementDecl(database_version, 3, ...)
46SAX.comment(
47	The version of the file, in the format YYYY-NN, where NN will be
48	a number starting with 01 for the first version released in a
49	calendar year, then increasing for each version in that year.
50	)
51SAX.elementDecl(date_of_creation, 3, ...)
52SAX.comment(
53	The date the file was created in international format (YYYY-MM-DD).
54	)
55SAX.elementDecl(character, 4, ...)
56SAX.elementDecl(literal, 3, ...)
57SAX.comment(
58	The character itself in UTF8 coding.
59	)
60SAX.elementDecl(codepoint, 4, ...)
61SAX.comment(
62	The codepoint element states the code of the character in the various
63	character set standards.
64	)
65SAX.elementDecl(cp_value, 3, ...)
66SAX.comment(
67	The cp_value contains the codepoint of the character in a particular
68	standard. The standard will be identified in the cp_type attribute.
69	)
70SAX.attributeDecl(cp_value, cp_type, 1, 2, NULL, ...)
71SAX.comment(
72	The cp_type attribute states the coding standard applying to the
73	element. The values assigned so far are:
74		jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
75		jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
76		jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
77		ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
78	)
79SAX.elementDecl(radical, 4, ...)
80SAX.elementDecl(rad_value, 3, ...)
81SAX.comment(
82	The radical number, in the range 1 to 214. The particular
83	classification type is stated in the rad_type attribute.
84	)
85SAX.attributeDecl(rad_value, rad_type, 1, 2, NULL, ...)
86SAX.comment(
87	The rad_type attribute states the type of radical classification.
88		classical - as recorded in the KangXi Zidian.
89		nelson - as used in the Nelson "Modern Japanese-English
90		Character Dictionary" (i.e. the Classic, not the New Nelson).
91		This will only be used where Nelson reclassified the kanji.
92	)
93SAX.elementDecl(misc, 4, ...)
94SAX.elementDecl(grade, 3, ...)
95SAX.comment(
96	The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
97	the kanji is taught in Japanese schools. 8 indicates it is one of the
98	remaining Jouyou Kanji to be learned in junior high school, and 9
99	indicates it is a Jinmeiyou (for use in names) kanji. [G]
100	)
101SAX.elementDecl(stroke_count, 3, ...)
102SAX.comment(
103	The stroke count of the kanji, including the radical. If more than
104	one, the first is considered the accepted count, while subsequent ones
105	are common miscounts. (See Appendix E. of the KANJIDIC documentation
106	for some of the rules applied when counting strokes in some of the
107	radicals.) [S]
108	)
109SAX.elementDecl(variant, 3, ...)
110SAX.comment(
111	A cross-reference code to another kanji, usually regarded as a variant.
112	The type of cross-reference is given in the var_type attribute.
113	)
114SAX.attributeDecl(variant, var_type, 1, 2, NULL, ...)
115SAX.comment(
116	The var_type attribute indicates the type of variant code. The current
117	values are:
118		jis208 - in JIS X 0208 - kuten coding
119		jis212 - in JIS X 0212 - kuten coding
120		jis213 - in JIS X 0213 - kuten coding
121		deroo - De Roo number - numeric
122		njecd - Halpern NJECD index number - numeric
123		s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
124		nelson - "Classic" Nelson - numeric
125		oneill - Japanese Names (O'Neill) - numeric
126	)
127SAX.elementDecl(freq, 3, ...)
128SAX.comment(
129	A frequency-of-use ranking. The 2,500 most-used characters have a
130	ranking; those characters that lack this field are not ranked. The
131	frequency is a number from 1 to 2,500 that expresses the relative
132	frequency of occurrence of a character in modern Japanese. This is
133	based on a survey in newspapers, so it is biassed towards kanji
134	used in newspaper articles. The discrimination between the less
135	frequently used kanji is not strong.
136	)
137SAX.elementDecl(rad_name, 3, ...)
138SAX.comment(
139	When the kanji is itself a radical and has a name, this element
140	contains the name (in hiragana.) [T2]
141	)
142SAX.elementDecl(dic_number, 4, ...)
143SAX.comment(
144	This element contains the index numbers and similar unstructured
145	information such as page numbers in a number of published dictionaries,
146	and instructional books on kanji.
147	)
148SAX.elementDecl(dic_ref, 3, ...)
149SAX.comment(
150	Each dic_ref contains an index number. The particular dictionary,
151	etc. is defined by the dr_type attribute.
152	)
153SAX.attributeDecl(dic_ref, dr_type, 1, 2, NULL, ...)
154SAX.comment(
155	The dr_type defines the dictionary or reference book, etc. to which
156	dic_ref element applies. The initial allocation is:
157	  nelson_c - "Modern Reader's Japanese-English Character Dictionary",
158	  	edited by Andrew Nelson (now published as the "Classic"
159	  	Nelson).
160	  nelson_n - "The New Nelson Japanese-English Character Dictionary",
161	  	edited by John Haig.
162	  halpern_njecd - "New Japanese-English Character Dictionary",
163	  	edited by Jack Halpern.
164	  halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by
165	  	Jack Halpern.
166	  heisig - "Remembering The  Kanji"  by  James Heisig.
167	  gakken - "A  New Dictionary of Kanji Usage" (Gakken)
168	  oneill_names - "Japanese Names", by P.G. O'Neill.
169	  oneill_kk - "Essential Kanji" by P.G. O'Neill.
170	  moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
171	  	additional attributes are used: m_vol:  the volume of the
172	  	dictionary in which the kanji is found, and m_page: the page
173	  	number in the volume.
174	  henshall - "A Guide To Remembering Japanese Characters" by
175	  	Kenneth G.  Henshall.
176	  sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
177	  sakade - "A Guide To Reading and Writing Japanese" edited by
178	  	Florence Sakade.
179	  henshall3 - "A Guide To Reading and Writing Japanese" 3rd
180		edition, edited by Henshall, Seeley and De Groot.
181	  tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
182	  crowley - "The Kanji Way to Japanese Language Power" by
183	  	Dale Crowley.
184	  kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
185	  busy_people - "Japanese For Busy People" vols I-III, published
186		by the AJLT. The codes are the volume.chapter.
187	  kodansha_compact - the "Kodansha Compact Kanji Guide".
188	)
189SAX.attributeDecl(dic_ref, m_vol, 1, 3, NULL, ...)
190SAX.comment(
191	See above under "moro".
192	)
193SAX.attributeDecl(dic_ref, m_page, 1, 3, NULL, ...)
194SAX.comment(
195	See above under "moro".
196	)
197SAX.elementDecl(query_code, 4, ...)
198SAX.comment(
199	These codes contain information relating to the glyph, and can be used
200	for finding a required kanji. The type of code is defined by the
201	qc_type attribute.
202	)
203SAX.elementDecl(q_code, 3, ...)
204SAX.comment(
205	The q_code contains the actual query-code value, according to the
206	qc_type attribute.
207	)
208SAX.attributeDecl(q_code, qc_type, 1, 2, NULL, ...)
209SAX.comment(
210	The q_code attribute defines the type of query code. The current values
211	are:
212	  skip -  Halpern's SKIP (System  of  Kanji  Indexing  by  Patterns)
213	  	code. The  format is n-nn-nn.  See the KANJIDIC  documentation
214	  	for  a description of the code and restrictions on  the
215	  	commercial  use  of this data. [P]
216
217	  sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle
218	  	1996) by Spahn and Hadamitzky. They are in the form nxnn.n,
219	  	e.g.  3k11.2, where the  kanji has 3 strokes in the
220	  	identifying radical, it is radical "k" in the SH
221	  	classification system, there are 11 other strokes, and it is
222	  	the 2nd kanji in the 3k11 sequence. (I am very grateful to
223	  	Mark Spahn for providing the list of these descriptor codes
224	  	for the kanji in this file.) [I]
225	  four_corner - the "Four Corner" code for the kanji. This is a code
226	  	invented by Wang Chen in 1928. See the KANJIDIC documentation
227	  	for  an overview of  the Four Corner System. [Q]
228
229	  deroo - the codes developed by the late Father Joseph De Roo, and
230	  	published in  his book "2001 Kanji" (Bojinsha). Fr De Roo
231	  	gave his permission for these codes to be included. [DR]
232	  misclass - a possible misclassification of the kanji according
233		to one of the code types. (See the "Z" codes in the KANJIDIC
234		documentation for more details.)
235
236	)
237SAX.elementDecl(reading_meaning, 4, ...)
238SAX.comment(
239	The readings for the kanji in several languages, and the meanings, also
240	in several languages. The readings and meanings are grouped to enable
241	the handling of the situation where the meaning is differentiated by
242	reading. [T1]
243	)
244SAX.elementDecl(nanori, 3, ...)
245SAX.comment(
246	Japanese readings that are now only associated with names.
247	)
248SAX.elementDecl(rmgroup, 4, ...)
249SAX.elementDecl(reading, 3, ...)
250SAX.comment(
251	The reading element contains the reading or pronunciation
252	of the kanji.
253	)
254SAX.attributeDecl(reading, r_type, 1, 2, NULL, ...)
255SAX.comment(
256	The r_type attribute defines the type of reading in the reading
257	element. The current values are:
258	  pinyin - the modern PinYin romanization of the Chinese reading
259	  	of the kanji. The tones are represented by a concluding
260	  	digit. [Y]
261	  korean_r - the romanized form of the Korean reading(s) of the
262	  	kanji.  The readings are in the (Republic of Korea) Ministry
263	  	of Education style of romanization. [W]
264	  korean_h - the Korean reading(s) of the kanji in hangul.
265	  ja_on - the "on" Japanese reading of the kanji, in katakana. A
266	  	second attribute r_status, if present, will indicate with
267	  	a value of "jy" whether the reading is approved for a
268	  	"Jouyou kanji".
269	  ja_kun - the "kun" Japanese reading of the kanji, in hiragana.
270	  	Where relevant the okurigana is also included separated by a
271	  	".". Readings associated with prefixes and suffixes are
272	  	marked with a "-". A second attribute r_status, if present,
273	  	will indicate with a value of "jy" whether the reading is
274	  	approved for a "Jouyou kanji".
275	)
276SAX.attributeDecl(reading, r_status, 1, 3, NULL, ...)
277SAX.comment(
278	See under ja_on and ja_kun above.
279	)
280SAX.elementDecl(meaning, 3, ...)
281SAX.comment(
282	The meaning associated with the kanji.
283	)
284SAX.attributeDecl(meaning, m_lang, 1, 3, NULL, ...)
285SAX.comment(
286	The m_lang attribute defines the target language of the meaning. It
287	will be coded using the two-letter language code from the ISO 639
288	standard. When absent, the value "en" (i.e. English) is implied. [{}]
289	)
290SAX.externalSubset(kanjidic2, , )
291SAX.startElementNs(kanjidic2, NULL, NULL, 0, 0, 0)
292SAX.characters(
293, 1)
294SAX.endElementNs(kanjidic2, NULL, NULL)
295SAX.endDocument()
296