Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
XMLEntityCodec |
|
| 3.0;3 | ||||
XMLEntityCodec$1 |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$EntityMap |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$IntHashMap |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$IntHashMap$Entry |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$LookupEntityMap |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$PrimitiveEntityMap |
|
| 3.0;3 |
1 | /* | |
2 | * $Id: XMLEntityCodec.java 7963 2007-08-21 08:53:15Z dirk.olmes $ | |
3 | * -------------------------------------------------------------------------------------- | |
4 | * Copyright (c) MuleSource, Inc. All rights reserved. http://www.mulesource.com | |
5 | * | |
6 | * The software in this package is published under the terms of the CPAL v1.0 | |
7 | * license, a copy of which has been included with this distribution in the | |
8 | * LICENSE.txt file. | |
9 | */ | |
10 | ||
11 | package org.mule.util; | |
12 | ||
13 | import java.io.IOException; | |
14 | import java.io.Writer; | |
15 | import java.util.HashMap; | |
16 | import java.util.Map; | |
17 | ||
18 | /** | |
19 | * This encoder contains methods that convert characters to Character entities as | |
20 | * defined by http://www.w3.org/TR/REC-html40/sgml/entities.html. More precisely it | |
21 | * combines the functionality of {@link org.apache.commons.lang.StringEscapeUtils#escapeXml(String)} and | |
22 | * {@link org.apache.commons.lang.StringEscapeUtils#escapeHtml(String)} into a single pass. | |
23 | */ | |
24 | // @ThreadSafe | |
25 | public final class XMLEntityCodec | |
26 | { | |
27 | 2 | private static final Entities MuleEntities = new Entities(); |
28 | ||
29 | static | |
30 | { | |
31 | 2 | MuleEntities.addEntities(Entities.APOS_ARRAY); |
32 | 2 | MuleEntities.addEntities(Entities.BASIC_ARRAY); |
33 | 2 | MuleEntities.addEntities(Entities.ISO8859_1_ARRAY); |
34 | 2 | MuleEntities.addEntities(Entities.HTML40_ARRAY); |
35 | 2 | } |
36 | ||
37 | protected XMLEntityCodec() | |
38 | 0 | { |
39 | // no-op | |
40 | 0 | } |
41 | ||
42 | public static String encodeString(String str) | |
43 | { | |
44 | 6 | if (StringUtils.isEmpty(str)) |
45 | { | |
46 | 0 | return str; |
47 | } | |
48 | ||
49 | 6 | return MuleEntities.escape(str); |
50 | } | |
51 | ||
52 | public static String decodeString(String str) | |
53 | { | |
54 | 6 | if (StringUtils.isEmpty(str)) |
55 | { | |
56 | 0 | return str; |
57 | } | |
58 | ||
59 | 6 | return MuleEntities.unescape(str); |
60 | } | |
61 | ||
62 | /** | |
63 | * <p> | |
64 | * Returns the name of the entity identified by the specified value. | |
65 | * </p> | |
66 | * | |
67 | * @param value the value to locate | |
68 | * @return entity name associated with the specified value | |
69 | */ | |
70 | public static String entityName(int value) | |
71 | { | |
72 | 0 | return MuleEntities.map.name(value); |
73 | } | |
74 | ||
75 | /** | |
76 | * <p> | |
77 | * Returns the value of the entity identified by the specified name. | |
78 | * </p> | |
79 | * | |
80 | * @param name the name to locate | |
81 | * @return entity value associated with the specified name | |
82 | */ | |
83 | public static int entityValue(String name) | |
84 | { | |
85 | 0 | return MuleEntities.map.value(name); |
86 | } | |
87 | ||
88 | ||
89 | // | |
90 | // everything from here on is copied from commons-lang 2.2 + svn since it is not | |
91 | // extensible and referencing the package-private class can lead to classloader | |
92 | // problems :-( | |
93 | // | |
94 | ||
95 | /* | |
96 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
97 | * contributor license agreements. See the NOTICE file distributed with | |
98 | * this work for additional information regarding copyright ownership. | |
99 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
100 | * (the "License"); you may not use this file except in compliance with | |
101 | * the License. You may obtain a copy of the License at | |
102 | * | |
103 | * http://www.apache.org/licenses/LICENSE-2.0 | |
104 | * | |
105 | * Unless required by applicable law or agreed to in writing, software | |
106 | * distributed under the License is distributed on an "AS IS" BASIS, | |
107 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
108 | * See the License for the specific language governing permissions and | |
109 | * limitations under the License. | |
110 | */ | |
111 | ||
112 | /** | |
113 | * <p>Provides HTML and XML entity utilities.</p> | |
114 | * | |
115 | * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> | |
116 | * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> | |
117 | * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> | |
118 | * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> | |
119 | * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> | |
120 | * | |
121 | * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a> | |
122 | * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a> | |
123 | * @since 2.0 | |
124 | * @version $Id: XMLEntityCodec.java 7963 2007-08-21 08:53:15Z dirk.olmes $ | |
125 | */ | |
126 | 8 | private static class Entities |
127 | { | |
128 | ||
129 | 2 | private static final String[][] BASIC_ARRAY = |
130 | { | |
131 | {"quot", "34"}, // " - double-quote | |
132 | {"amp", "38"}, // & - ampersand | |
133 | {"lt", "60"}, // < - less-than | |
134 | {"gt", "62"}, // > - greater-than | |
135 | }; | |
136 | ||
137 | 2 | private static final String[][] APOS_ARRAY = |
138 | { | |
139 | {"apos", "39"}, // XML apostrophe | |
140 | }; | |
141 | ||
142 | // package scoped for testing | |
143 | 2 | static final String[][] ISO8859_1_ARRAY = |
144 | { | |
145 | {"nbsp", "160"}, // non-breaking space | |
146 | {"iexcl", "161"}, //inverted exclamation mark | |
147 | {"cent", "162"}, //cent sign | |
148 | {"pound", "163"}, //pound sign | |
149 | {"curren", "164"}, //currency sign | |
150 | {"yen", "165"}, //yen sign = yuan sign | |
151 | {"brvbar", "166"}, //broken bar = broken vertical bar | |
152 | {"sect", "167"}, //section sign | |
153 | {"uml", "168"}, //diaeresis = spacing diaeresis | |
154 | {"copy", "169"}, // � - copyright sign | |
155 | {"ordf", "170"}, //feminine ordinal indicator | |
156 | {"laquo", "171"}, //left-pointing double angle quotation mark = left pointing guillemet | |
157 | {"not", "172"}, //not sign | |
158 | {"shy", "173"}, //soft hyphen = discretionary hyphen | |
159 | {"reg", "174"}, // � - registered trademark sign | |
160 | {"macr", "175"}, //macron = spacing macron = overline = APL overbar | |
161 | {"deg", "176"}, //degree sign | |
162 | {"plusmn", "177"}, //plus-minus sign = plus-or-minus sign | |
163 | {"sup2", "178"}, //superscript two = superscript digit two = squared | |
164 | {"sup3", "179"}, //superscript three = superscript digit three = cubed | |
165 | {"acute", "180"}, //acute accent = spacing acute | |
166 | {"micro", "181"}, //micro sign | |
167 | {"para", "182"}, //pilcrow sign = paragraph sign | |
168 | {"middot", "183"}, //middle dot = Georgian comma = Greek middle dot | |
169 | {"cedil", "184"}, //cedilla = spacing cedilla | |
170 | {"sup1", "185"}, //superscript one = superscript digit one | |
171 | {"ordm", "186"}, //masculine ordinal indicator | |
172 | {"raquo", "187"}, //right-pointing double angle quotation mark = right pointing guillemet | |
173 | {"frac14", "188"}, //vulgar fraction one quarter = fraction one quarter | |
174 | {"frac12", "189"}, //vulgar fraction one half = fraction one half | |
175 | {"frac34", "190"}, //vulgar fraction three quarters = fraction three quarters | |
176 | {"iquest", "191"}, //inverted question mark = turned question mark | |
177 | {"Agrave", "192"}, // � - uppercase A, grave accent | |
178 | {"Aacute", "193"}, // � - uppercase A, acute accent | |
179 | {"Acirc", "194"}, // � - uppercase A, circumflex accent | |
180 | {"Atilde", "195"}, // � - uppercase A, tilde | |
181 | {"Auml", "196"}, // � - uppercase A, umlaut | |
182 | {"Aring", "197"}, // � - uppercase A, ring | |
183 | {"AElig", "198"}, // � - uppercase AE | |
184 | {"Ccedil", "199"}, // � - uppercase C, cedilla | |
185 | {"Egrave", "200"}, // � - uppercase E, grave accent | |
186 | {"Eacute", "201"}, // � - uppercase E, acute accent | |
187 | {"Ecirc", "202"}, // � - uppercase E, circumflex accent | |
188 | {"Euml", "203"}, // � - uppercase E, umlaut | |
189 | {"Igrave", "204"}, // � - uppercase I, grave accent | |
190 | {"Iacute", "205"}, // � - uppercase I, acute accent | |
191 | {"Icirc", "206"}, // � - uppercase I, circumflex accent | |
192 | {"Iuml", "207"}, // � - uppercase I, umlaut | |
193 | {"ETH", "208"}, // � - uppercase Eth, Icelandic | |
194 | {"Ntilde", "209"}, // � - uppercase N, tilde | |
195 | {"Ograve", "210"}, // � - uppercase O, grave accent | |
196 | {"Oacute", "211"}, // � - uppercase O, acute accent | |
197 | {"Ocirc", "212"}, // � - uppercase O, circumflex accent | |
198 | {"Otilde", "213"}, // � - uppercase O, tilde | |
199 | {"Ouml", "214"}, // � - uppercase O, umlaut | |
200 | {"times", "215"}, //multiplication sign | |
201 | {"Oslash", "216"}, // � - uppercase O, slash | |
202 | {"Ugrave", "217"}, // � - uppercase U, grave accent | |
203 | {"Uacute", "218"}, // � - uppercase U, acute accent | |
204 | {"Ucirc", "219"}, // � - uppercase U, circumflex accent | |
205 | {"Uuml", "220"}, // � - uppercase U, umlaut | |
206 | {"Yacute", "221"}, // � - uppercase Y, acute accent | |
207 | {"THORN", "222"}, // � - uppercase THORN, Icelandic | |
208 | {"szlig", "223"}, // � - lowercase sharps, German | |
209 | {"agrave", "224"}, // � - lowercase a, grave accent | |
210 | {"aacute", "225"}, // � - lowercase a, acute accent | |
211 | {"acirc", "226"}, // � - lowercase a, circumflex accent | |
212 | {"atilde", "227"}, // � - lowercase a, tilde | |
213 | {"auml", "228"}, // � - lowercase a, umlaut | |
214 | {"aring", "229"}, // � - lowercase a, ring | |
215 | {"aelig", "230"}, // � - lowercase ae | |
216 | {"ccedil", "231"}, // � - lowercase c, cedilla | |
217 | {"egrave", "232"}, // � - lowercase e, grave accent | |
218 | {"eacute", "233"}, // � - lowercase e, acute accent | |
219 | {"ecirc", "234"}, // � - lowercase e, circumflex accent | |
220 | {"euml", "235"}, // � - lowercase e, umlaut | |
221 | {"igrave", "236"}, // � - lowercase i, grave accent | |
222 | {"iacute", "237"}, // � - lowercase i, acute accent | |
223 | {"icirc", "238"}, // � - lowercase i, circumflex accent | |
224 | {"iuml", "239"}, // � - lowercase i, umlaut | |
225 | {"eth", "240"}, // � - lowercase eth, Icelandic | |
226 | {"ntilde", "241"}, // � - lowercase n, tilde | |
227 | {"ograve", "242"}, // � - lowercase o, grave accent | |
228 | {"oacute", "243"}, // � - lowercase o, acute accent | |
229 | {"ocirc", "244"}, // � - lowercase o, circumflex accent | |
230 | {"otilde", "245"}, // � - lowercase o, tilde | |
231 | {"ouml", "246"}, // � - lowercase o, umlaut | |
232 | {"divide", "247"}, // division sign | |
233 | {"oslash", "248"}, // � - lowercase o, slash | |
234 | {"ugrave", "249"}, // � - lowercase u, grave accent | |
235 | {"uacute", "250"}, // � - lowercase u, acute accent | |
236 | {"ucirc", "251"}, // � - lowercase u, circumflex accent | |
237 | {"uuml", "252"}, // � - lowercase u, umlaut | |
238 | {"yacute", "253"}, // � - lowercase y, acute accent | |
239 | {"thorn", "254"}, // � - lowercase thorn, Icelandic | |
240 | {"yuml", "255"}, // � - lowercase y, umlaut | |
241 | }; | |
242 | ||
243 | // http://www.w3.org/TR/REC-html40/sgml/entities.html | |
244 | // package scoped for testing | |
245 | 2 | static final String[][] HTML40_ARRAY = |
246 | { | |
247 | // <!-- Latin Extended-B --> | |
248 | {"fnof", "402"}, //latin small f with hook = function= florin, U+0192 ISOtech --> | |
249 | // <!-- Greek --> | |
250 | {"Alpha", "913"}, //greek capital letter alpha, U+0391 --> | |
251 | {"Beta", "914"}, //greek capital letter beta, U+0392 --> | |
252 | {"Gamma", "915"}, //greek capital letter gamma,U+0393 ISOgrk3 --> | |
253 | {"Delta", "916"}, //greek capital letter delta,U+0394 ISOgrk3 --> | |
254 | {"Epsilon", "917"}, //greek capital letter epsilon, U+0395 --> | |
255 | {"Zeta", "918"}, //greek capital letter zeta, U+0396 --> | |
256 | {"Eta", "919"}, //greek capital letter eta, U+0397 --> | |
257 | {"Theta", "920"}, //greek capital letter theta,U+0398 ISOgrk3 --> | |
258 | {"Iota", "921"}, //greek capital letter iota, U+0399 --> | |
259 | {"Kappa", "922"}, //greek capital letter kappa, U+039A --> | |
260 | {"Lambda", "923"}, //greek capital letter lambda,U+039B ISOgrk3 --> | |
261 | {"Mu", "924"}, //greek capital letter mu, U+039C --> | |
262 | {"Nu", "925"}, //greek capital letter nu, U+039D --> | |
263 | {"Xi", "926"}, //greek capital letter xi, U+039E ISOgrk3 --> | |
264 | {"Omicron", "927"}, //greek capital letter omicron, U+039F --> | |
265 | {"Pi", "928"}, //greek capital letter pi, U+03A0 ISOgrk3 --> | |
266 | {"Rho", "929"}, //greek capital letter rho, U+03A1 --> | |
267 | // <!-- there is no Sigmaf, and no U+03A2 character either --> | |
268 | {"Sigma", "931"}, //greek capital letter sigma,U+03A3 ISOgrk3 --> | |
269 | {"Tau", "932"}, //greek capital letter tau, U+03A4 --> | |
270 | {"Upsilon", "933"}, //greek capital letter upsilon,U+03A5 ISOgrk3 --> | |
271 | {"Phi", "934"}, //greek capital letter phi,U+03A6 ISOgrk3 --> | |
272 | {"Chi", "935"}, //greek capital letter chi, U+03A7 --> | |
273 | {"Psi", "936"}, //greek capital letter psi,U+03A8 ISOgrk3 --> | |
274 | {"Omega", "937"}, //greek capital letter omega,U+03A9 ISOgrk3 --> | |
275 | {"alpha", "945"}, //greek small letter alpha,U+03B1 ISOgrk3 --> | |
276 | {"beta", "946"}, //greek small letter beta, U+03B2 ISOgrk3 --> | |
277 | {"gamma", "947"}, //greek small letter gamma,U+03B3 ISOgrk3 --> | |
278 | {"delta", "948"}, //greek small letter delta,U+03B4 ISOgrk3 --> | |
279 | {"epsilon", "949"}, //greek small letter epsilon,U+03B5 ISOgrk3 --> | |
280 | {"zeta", "950"}, //greek small letter zeta, U+03B6 ISOgrk3 --> | |
281 | {"eta", "951"}, //greek small letter eta, U+03B7 ISOgrk3 --> | |
282 | {"theta", "952"}, //greek small letter theta,U+03B8 ISOgrk3 --> | |
283 | {"iota", "953"}, //greek small letter iota, U+03B9 ISOgrk3 --> | |
284 | {"kappa", "954"}, //greek small letter kappa,U+03BA ISOgrk3 --> | |
285 | {"lambda", "955"}, //greek small letter lambda,U+03BB ISOgrk3 --> | |
286 | {"mu", "956"}, //greek small letter mu, U+03BC ISOgrk3 --> | |
287 | {"nu", "957"}, //greek small letter nu, U+03BD ISOgrk3 --> | |
288 | {"xi", "958"}, //greek small letter xi, U+03BE ISOgrk3 --> | |
289 | {"omicron", "959"}, //greek small letter omicron, U+03BF NEW --> | |
290 | {"pi", "960"}, //greek small letter pi, U+03C0 ISOgrk3 --> | |
291 | {"rho", "961"}, //greek small letter rho, U+03C1 ISOgrk3 --> | |
292 | {"sigmaf", "962"}, //greek small letter final sigma,U+03C2 ISOgrk3 --> | |
293 | {"sigma", "963"}, //greek small letter sigma,U+03C3 ISOgrk3 --> | |
294 | {"tau", "964"}, //greek small letter tau, U+03C4 ISOgrk3 --> | |
295 | {"upsilon", "965"}, //greek small letter upsilon,U+03C5 ISOgrk3 --> | |
296 | {"phi", "966"}, //greek small letter phi, U+03C6 ISOgrk3 --> | |
297 | {"chi", "967"}, //greek small letter chi, U+03C7 ISOgrk3 --> | |
298 | {"psi", "968"}, //greek small letter psi, U+03C8 ISOgrk3 --> | |
299 | {"omega", "969"}, //greek small letter omega,U+03C9 ISOgrk3 --> | |
300 | {"thetasym", "977"}, //greek small letter theta symbol,U+03D1 NEW --> | |
301 | {"upsih", "978"}, //greek upsilon with hook symbol,U+03D2 NEW --> | |
302 | {"piv", "982"}, //greek pi symbol, U+03D6 ISOgrk3 --> | |
303 | // <!-- General Punctuation --> | |
304 | {"bull", "8226"}, //bullet = black small circle,U+2022 ISOpub --> | |
305 | // <!-- bullet is NOT the same as bullet operator, U+2219 --> | |
306 | {"hellip", "8230"}, //horizontal ellipsis = three dot leader,U+2026 ISOpub --> | |
307 | {"prime", "8242"}, //prime = minutes = feet, U+2032 ISOtech --> | |
308 | {"Prime", "8243"}, //double prime = seconds = inches,U+2033 ISOtech --> | |
309 | {"oline", "8254"}, //overline = spacing overscore,U+203E NEW --> | |
310 | {"frasl", "8260"}, //fraction slash, U+2044 NEW --> | |
311 | // <!-- Letterlike Symbols --> | |
312 | {"weierp", "8472"}, //script capital P = power set= Weierstrass p, U+2118 ISOamso --> | |
313 | {"image", "8465"}, //blackletter capital I = imaginary part,U+2111 ISOamso --> | |
314 | {"real", "8476"}, //blackletter capital R = real part symbol,U+211C ISOamso --> | |
315 | {"trade", "8482"}, //trade mark sign, U+2122 ISOnum --> | |
316 | {"alefsym", "8501"}, //alef symbol = first transfinite cardinal,U+2135 NEW --> | |
317 | // <!-- alef symbol is NOT the same as hebrew letter alef,U+05D0 although the | |
318 | // same glyph could be used to depict both characters --> | |
319 | // <!-- Arrows --> | |
320 | {"larr", "8592"}, //leftwards arrow, U+2190 ISOnum --> | |
321 | {"uarr", "8593"}, //upwards arrow, U+2191 ISOnum--> | |
322 | {"rarr", "8594"}, //rightwards arrow, U+2192 ISOnum --> | |
323 | {"darr", "8595"}, //downwards arrow, U+2193 ISOnum --> | |
324 | {"harr", "8596"}, //left right arrow, U+2194 ISOamsa --> | |
325 | {"crarr", "8629"}, //downwards arrow with corner leftwards= carriage return, U+21B5 NEW --> | |
326 | {"lArr", "8656"}, //leftwards double arrow, U+21D0 ISOtech --> | |
327 | // <!-- ISO 10646 does not say that lArr is the same as the 'is implied by' | |
328 | // arrow but also does not have any other character for that function. | |
329 | // So ? lArr canbe used for 'is implied by' as ISOtech suggests --> | |
330 | {"uArr", "8657"}, //upwards double arrow, U+21D1 ISOamsa --> | |
331 | {"rArr", "8658"}, //rightwards double arrow,U+21D2 ISOtech --> | |
332 | // <!-- ISO 10646 does not say this is the 'implies' character but does not | |
333 | // have another character with this function so ?rArr can be used for | |
334 | // 'implies' as ISOtech suggests --> | |
335 | {"dArr", "8659"}, //downwards double arrow, U+21D3 ISOamsa --> | |
336 | {"hArr", "8660"}, //left right double arrow,U+21D4 ISOamsa --> | |
337 | // <!-- Mathematical Operators --> | |
338 | {"forall", "8704"}, //for all, U+2200 ISOtech --> | |
339 | {"part", "8706"}, //partial differential, U+2202 ISOtech --> | |
340 | {"exist", "8707"}, //there exists, U+2203 ISOtech --> | |
341 | {"empty", "8709"}, //empty set = null set = diameter,U+2205 ISOamso --> | |
342 | {"nabla", "8711"}, //nabla = backward difference,U+2207 ISOtech --> | |
343 | {"isin", "8712"}, //element of, U+2208 ISOtech --> | |
344 | {"notin", "8713"}, //not an element of, U+2209 ISOtech --> | |
345 | {"ni", "8715"}, //contains as member, U+220B ISOtech --> | |
346 | // <!-- should there be a more memorable name than 'ni'? --> | |
347 | {"prod", "8719"}, //n-ary product = product sign,U+220F ISOamsb --> | |
348 | // <!-- prod is NOT the same character as U+03A0 'greek capital letter pi' | |
349 | // though the same glyph might be used for both --> | |
350 | {"sum", "8721"}, //n-ary summation, U+2211 ISOamsb --> | |
351 | // <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma' | |
352 | // though the same glyph might be used for both --> | |
353 | {"minus", "8722"}, //minus sign, U+2212 ISOtech --> | |
354 | {"lowast", "8727"}, //asterisk operator, U+2217 ISOtech --> | |
355 | {"radic", "8730"}, //square root = radical sign,U+221A ISOtech --> | |
356 | {"prop", "8733"}, //proportional to, U+221D ISOtech --> | |
357 | {"infin", "8734"}, //infinity, U+221E ISOtech --> | |
358 | {"ang", "8736"}, //angle, U+2220 ISOamso --> | |
359 | {"and", "8743"}, //logical and = wedge, U+2227 ISOtech --> | |
360 | {"or", "8744"}, //logical or = vee, U+2228 ISOtech --> | |
361 | {"cap", "8745"}, //intersection = cap, U+2229 ISOtech --> | |
362 | {"cup", "8746"}, //union = cup, U+222A ISOtech --> | |
363 | {"int", "8747"}, //integral, U+222B ISOtech --> | |
364 | {"there4", "8756"}, //therefore, U+2234 ISOtech --> | |
365 | {"sim", "8764"}, //tilde operator = varies with = similar to,U+223C ISOtech --> | |
366 | // <!-- tilde operator is NOT the same character as the tilde, U+007E,although | |
367 | // the same glyph might be used to represent both --> | |
368 | {"cong", "8773"}, //approximately equal to, U+2245 ISOtech --> | |
369 | {"asymp", "8776"}, //almost equal to = asymptotic to,U+2248 ISOamsr --> | |
370 | {"ne", "8800"}, //not equal to, U+2260 ISOtech --> | |
371 | {"equiv", "8801"}, //identical to, U+2261 ISOtech --> | |
372 | {"le", "8804"}, //less-than or equal to, U+2264 ISOtech --> | |
373 | {"ge", "8805"}, //greater-than or equal to,U+2265 ISOtech --> | |
374 | {"sub", "8834"}, //subset of, U+2282 ISOtech --> | |
375 | {"sup", "8835"}, //superset of, U+2283 ISOtech --> | |
376 | // <!-- note that nsup, 'not a superset of, U+2283' is not covered by the | |
377 | // Symbol font encoding and is not included. Should it be, for symmetry? | |
378 | // It is in ISOamsn --> <!ENTITY nsub", "8836"}, | |
379 | // not a subset of, U+2284 ISOamsn --> | |
380 | {"sube", "8838"}, //subset of or equal to, U+2286 ISOtech --> | |
381 | {"supe", "8839"}, //superset of or equal to,U+2287 ISOtech --> | |
382 | {"oplus", "8853"}, //circled plus = direct sum,U+2295 ISOamsb --> | |
383 | {"otimes", "8855"}, //circled times = vector product,U+2297 ISOamsb --> | |
384 | {"perp", "8869"}, //up tack = orthogonal to = perpendicular,U+22A5 ISOtech --> | |
385 | {"sdot", "8901"}, //dot operator, U+22C5 ISOamsb --> | |
386 | // <!-- dot operator is NOT the same character as U+00B7 middle dot --> | |
387 | // <!-- Miscellaneous Technical --> | |
388 | {"lceil", "8968"}, //left ceiling = apl upstile,U+2308 ISOamsc --> | |
389 | {"rceil", "8969"}, //right ceiling, U+2309 ISOamsc --> | |
390 | {"lfloor", "8970"}, //left floor = apl downstile,U+230A ISOamsc --> | |
391 | {"rfloor", "8971"}, //right floor, U+230B ISOamsc --> | |
392 | {"lang", "9001"}, //left-pointing angle bracket = bra,U+2329 ISOtech --> | |
393 | // <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' --> | |
394 | {"rang", "9002"}, //right-pointing angle bracket = ket,U+232A ISOtech --> | |
395 | // <!-- rang is NOT the same character as U+003E 'greater than' or U+203A | |
396 | // 'single right-pointing angle quotation mark' --> | |
397 | // <!-- Geometric Shapes --> | |
398 | {"loz", "9674"}, //lozenge, U+25CA ISOpub --> | |
399 | // <!-- Miscellaneous Symbols --> | |
400 | {"spades", "9824"}, //black spade suit, U+2660 ISOpub --> | |
401 | // <!-- black here seems to mean filled as opposed to hollow --> | |
402 | {"clubs", "9827"}, //black club suit = shamrock,U+2663 ISOpub --> | |
403 | {"hearts", "9829"}, //black heart suit = valentine,U+2665 ISOpub --> | |
404 | {"diams", "9830"}, //black diamond suit, U+2666 ISOpub --> | |
405 | ||
406 | // <!-- Latin Extended-A --> | |
407 | {"OElig", "338"}, // -- latin capital ligature OE,U+0152 ISOlat2 --> | |
408 | {"oelig", "339"}, // -- latin small ligature oe, U+0153 ISOlat2 --> | |
409 | // <!-- ligature is a misnomer, this is a separate character in some languages --> | |
410 | {"Scaron", "352"}, // -- latin capital letter S with caron,U+0160 ISOlat2 --> | |
411 | {"scaron", "353"}, // -- latin small letter s with caron,U+0161 ISOlat2 --> | |
412 | {"Yuml", "376"}, // -- latin capital letter Y with diaeresis,U+0178 ISOlat2 --> | |
413 | // <!-- Spacing Modifier Letters --> | |
414 | {"circ", "710"}, // -- modifier letter circumflex accent,U+02C6 ISOpub --> | |
415 | {"tilde", "732"}, //small tilde, U+02DC ISOdia --> | |
416 | // <!-- General Punctuation --> | |
417 | {"ensp", "8194"}, //en space, U+2002 ISOpub --> | |
418 | {"emsp", "8195"}, //em space, U+2003 ISOpub --> | |
419 | {"thinsp", "8201"}, //thin space, U+2009 ISOpub --> | |
420 | {"zwnj", "8204"}, //zero width non-joiner,U+200C NEW RFC 2070 --> | |
421 | {"zwj", "8205"}, //zero width joiner, U+200D NEW RFC 2070 --> | |
422 | {"lrm", "8206"}, //left-to-right mark, U+200E NEW RFC 2070 --> | |
423 | {"rlm", "8207"}, //right-to-left mark, U+200F NEW RFC 2070 --> | |
424 | {"ndash", "8211"}, //en dash, U+2013 ISOpub --> | |
425 | {"mdash", "8212"}, //em dash, U+2014 ISOpub --> | |
426 | {"lsquo", "8216"}, //left single quotation mark,U+2018 ISOnum --> | |
427 | {"rsquo", "8217"}, //right single quotation mark,U+2019 ISOnum --> | |
428 | {"sbquo", "8218"}, //single low-9 quotation mark, U+201A NEW --> | |
429 | {"ldquo", "8220"}, //left double quotation mark,U+201C ISOnum --> | |
430 | {"rdquo", "8221"}, //right double quotation mark,U+201D ISOnum --> | |
431 | {"bdquo", "8222"}, //double low-9 quotation mark, U+201E NEW --> | |
432 | {"dagger", "8224"}, //dagger, U+2020 ISOpub --> | |
433 | {"Dagger", "8225"}, //double dagger, U+2021 ISOpub --> | |
434 | {"permil", "8240"}, //per mille sign, U+2030 ISOtech --> | |
435 | {"lsaquo", "8249"}, //single left-pointing angle quotation mark,U+2039 ISO proposed --> | |
436 | // <!-- lsaquo is proposed but not yet ISO standardized --> | |
437 | {"rsaquo", "8250"}, //single right-pointing angle quotation mark,U+203A ISO proposed --> | |
438 | // <!-- rsaquo is proposed but not yet ISO standardized --> | |
439 | {"euro", "8364"}, // -- euro sign, U+20AC NEW --> | |
440 | }; | |
441 | ||
442 | // package scoped for testing | |
443 | 2 | private EntityMap map = new Entities.LookupEntityMap(); |
444 | ||
445 | /** | |
446 | * <p> | |
447 | * Adds entities to this entity. | |
448 | * </p> | |
449 | * | |
450 | * @param entityArray array of entities to be added | |
451 | */ | |
452 | public void addEntities(String[][] entityArray) | |
453 | { | |
454 | 512 | for (int i = 0; i < entityArray.length; ++i) |
455 | { | |
456 | 504 | addEntity(entityArray[i][0], Integer.parseInt(entityArray[i][1])); |
457 | } | |
458 | 8 | } |
459 | ||
460 | /** | |
461 | * <p> | |
462 | * Add an entity to this entity. | |
463 | * </p> | |
464 | * | |
465 | * @param name name of the entity | |
466 | * @param value vale of the entity | |
467 | */ | |
468 | public void addEntity(String name, int value) | |
469 | { | |
470 | 504 | map.add(name, value); |
471 | 504 | } |
472 | ||
473 | /** | |
474 | * <p> | |
475 | * Returns the name of the entity identified by the specified value. | |
476 | * </p> | |
477 | * | |
478 | * @param value the value to locate | |
479 | * @return entity name associated with the specified value | |
480 | */ | |
481 | public String entityName(int value) | |
482 | { | |
483 | 3078 | return map.name(value); |
484 | } | |
485 | ||
486 | /** | |
487 | * <p> | |
488 | * Returns the value of the entity identified by the specified name. | |
489 | * </p> | |
490 | * | |
491 | * @param name the name to locate | |
492 | * @return entity value associated with the specified name | |
493 | */ | |
494 | public int entityValue(String name) | |
495 | { | |
496 | 504 | return map.value(name); |
497 | } | |
498 | ||
499 | /** | |
500 | * <p> | |
501 | * Escapes the characters in a <code>String</code>. | |
502 | * </p> | |
503 | * <p> | |
504 | * For example, if you have called addEntity("foo", 0xA1), | |
505 | * escape("\u00A1") will return "&foo;" | |
506 | * </p> | |
507 | * | |
508 | * @param str The <code>String</code> to escape. | |
509 | * @return A new escaped <code>String</code>. | |
510 | */ | |
511 | public String escape(String str) | |
512 | { | |
513 | // todo: rewrite to use a Writer | |
514 | 6 | StringBuffer buf = new StringBuffer(str.length() * 2); |
515 | 3084 | for (int i = 0; i < str.length(); ++i) |
516 | { | |
517 | 3078 | char ch = str.charAt(i); |
518 | 3078 | String entityName = this.entityName(ch); |
519 | 3078 | if (entityName == null) |
520 | { | |
521 | 2574 | if (ch > 0x7F) |
522 | { | |
523 | 0 | buf.append('&'); |
524 | 0 | buf.append('#'); |
525 | 0 | buf.append((int) ch); |
526 | 0 | buf.append(';'); |
527 | } | |
528 | else | |
529 | { | |
530 | 2574 | buf.append(ch); |
531 | } | |
532 | } | |
533 | else | |
534 | { | |
535 | 504 | buf.append('&'); |
536 | 504 | buf.append(entityName); |
537 | 504 | buf.append(';'); |
538 | } | |
539 | } | |
540 | 6 | return buf.toString(); |
541 | } | |
542 | ||
543 | /** | |
544 | * <p> | |
545 | * Escapes the characters in the <code>String</code> passed and writes the | |
546 | * result to the <code>Writer</code> passed. | |
547 | * </p> | |
548 | * | |
549 | * @param writer The <code>Writer</code> to write the results of the | |
550 | * escaping to. Assumed to be a non-null value. | |
551 | * @param str The <code>String</code> to escape. Assumed to be a non-null | |
552 | * value. | |
553 | * @throws IOException when <code>Writer</code> passed throws the exception | |
554 | * from calls to the {@link Writer#write(int)} methods. | |
555 | * @see #escape(String) | |
556 | * @see Writer | |
557 | */ | |
558 | public void escape(Writer writer, String str) throws IOException | |
559 | { | |
560 | 0 | int len = str.length(); |
561 | 0 | for (int i = 0; i < len; i++) |
562 | { | |
563 | 0 | char c = str.charAt(i); |
564 | 0 | String entityName = this.entityName(c); |
565 | 0 | if (entityName == null) |
566 | { | |
567 | 0 | if (c > 0x7F) |
568 | { | |
569 | 0 | writer.write("&#"); |
570 | 0 | writer.write(Integer.toString(c, 10)); |
571 | 0 | writer.write(';'); |
572 | } | |
573 | else | |
574 | { | |
575 | 0 | writer.write(c); |
576 | } | |
577 | } | |
578 | else | |
579 | { | |
580 | 0 | writer.write('&'); |
581 | 0 | writer.write(entityName); |
582 | 0 | writer.write(';'); |
583 | } | |
584 | } | |
585 | 0 | } |
586 | ||
587 | /** | |
588 | * <p> | |
589 | * Unescapes the entities in a <code>String</code>. | |
590 | * </p> | |
591 | * <p> | |
592 | * For example, if you have called addEntity("foo", 0xA1), | |
593 | * unescape("&foo;") will return "\u00A1" | |
594 | * </p> | |
595 | * | |
596 | * @param str The <code>String</code> to escape. | |
597 | * @return A new escaped <code>String</code> or str itself if no unescaping | |
598 | * was necessary. | |
599 | */ | |
600 | public String unescape(String str) | |
601 | { | |
602 | 6 | int firstAmp = str.indexOf('&'); |
603 | 6 | if (firstAmp < 0) |
604 | { | |
605 | 0 | return str; |
606 | } | |
607 | ||
608 | 6 | StringBuffer buf = new StringBuffer(str.length()); |
609 | 6 | buf.append(str.substring(0, firstAmp)); |
610 | 3084 | for (int i = firstAmp; i < str.length(); ++i) |
611 | { | |
612 | 3078 | char ch = str.charAt(i); |
613 | 3078 | if (ch == '&') |
614 | { | |
615 | 504 | int semi = str.indexOf(';', i + 1); |
616 | 504 | if (semi == -1) |
617 | { | |
618 | 0 | buf.append(ch); |
619 | 0 | continue; |
620 | } | |
621 | 504 | int amph = str.indexOf('&', i + 1); |
622 | 504 | if (amph != -1 && amph < semi) |
623 | { | |
624 | // Then the text looks like &...&...; | |
625 | 0 | buf.append(ch); |
626 | 0 | continue; |
627 | } | |
628 | 504 | String entityName = str.substring(i + 1, semi); |
629 | int entityValue; | |
630 | 504 | if (entityName.length() == 0) |
631 | { | |
632 | 0 | entityValue = -1; |
633 | } | |
634 | 504 | else if (entityName.charAt(0) == '#') |
635 | { | |
636 | 0 | if (entityName.length() == 1) |
637 | { | |
638 | 0 | entityValue = -1; |
639 | } | |
640 | else | |
641 | { | |
642 | 0 | char charAt1 = entityName.charAt(1); |
643 | try | |
644 | { | |
645 | 0 | if (charAt1 == 'x' || charAt1 == 'X') |
646 | { | |
647 | 0 | entityValue = Integer.valueOf(entityName.substring(2), 16).intValue(); |
648 | } | |
649 | else | |
650 | { | |
651 | 0 | entityValue = Integer.parseInt(entityName.substring(1)); |
652 | } | |
653 | 0 | if (entityValue > 0xFFFF) |
654 | { | |
655 | 0 | entityValue = -1; |
656 | } | |
657 | } | |
658 | 0 | catch (NumberFormatException ex) |
659 | { | |
660 | 0 | entityValue = -1; |
661 | 0 | } |
662 | 0 | } |
663 | } | |
664 | else | |
665 | { | |
666 | 504 | entityValue = this.entityValue(entityName); |
667 | } | |
668 | 504 | if (entityValue == -1) |
669 | { | |
670 | 0 | buf.append('&'); |
671 | 0 | buf.append(entityName); |
672 | 0 | buf.append(';'); |
673 | } | |
674 | else | |
675 | { | |
676 | 504 | buf.append((char) (entityValue)); |
677 | } | |
678 | 504 | i = semi; |
679 | 504 | } |
680 | else | |
681 | { | |
682 | 2574 | buf.append(ch); |
683 | } | |
684 | } | |
685 | 6 | return buf.toString(); |
686 | } | |
687 | ||
688 | /** | |
689 | * <p> | |
690 | * Unescapes the escaped entities in the <code>String</code> passed and | |
691 | * writes the result to the <code>Writer</code> passed. | |
692 | * </p> | |
693 | * | |
694 | * @param writer The <code>Writer</code> to write the results to; assumed | |
695 | * to be non-null. | |
696 | * @param string The <code>String</code> to write the results to; assumed | |
697 | * to be non-null. | |
698 | * @throws IOException when <code>Writer</code> passed throws the exception | |
699 | * from calls to the {@link Writer#write(int)} methods. | |
700 | * @see #escape(String) | |
701 | * @see Writer | |
702 | */ | |
703 | public void unescape(Writer writer, String string) throws IOException | |
704 | { | |
705 | 0 | int firstAmp = string.indexOf('&'); |
706 | 0 | if (firstAmp < 0) |
707 | { | |
708 | 0 | writer.write(string); |
709 | 0 | return; |
710 | } | |
711 | ||
712 | 0 | writer.write(string, 0, firstAmp); |
713 | 0 | int len = string.length(); |
714 | 0 | for (int i = firstAmp; i < len; i++) |
715 | { | |
716 | 0 | char c = string.charAt(i); |
717 | 0 | if (c == '&') |
718 | { | |
719 | 0 | int nextIdx = i + 1; |
720 | 0 | int semiColonIdx = string.indexOf(';', nextIdx); |
721 | 0 | if (semiColonIdx == -1) |
722 | { | |
723 | 0 | writer.write(c); |
724 | 0 | continue; |
725 | } | |
726 | 0 | int amphersandIdx = string.indexOf('&', i + 1); |
727 | 0 | if (amphersandIdx != -1 && amphersandIdx < semiColonIdx) |
728 | { | |
729 | // Then the text looks like &...&...; | |
730 | 0 | writer.write(c); |
731 | 0 | continue; |
732 | } | |
733 | 0 | String entityContent = string.substring(nextIdx, semiColonIdx); |
734 | 0 | int entityValue = -1; |
735 | 0 | int entityContentLen = entityContent.length(); |
736 | 0 | if (entityContentLen > 0) |
737 | { | |
738 | 0 | if (entityContent.charAt(0) == '#') |
739 | { // escaped value content is an integer (decimal or | |
740 | // hexidecimal) | |
741 | 0 | if (entityContentLen > 1) |
742 | { | |
743 | 0 | char isHexChar = entityContent.charAt(1); |
744 | try | |
745 | { | |
746 | 0 | switch (isHexChar) |
747 | { | |
748 | case 'X' : | |
749 | case 'x' : | |
750 | 0 | entityValue = Integer.parseInt(entityContent.substring(2), 16); |
751 | 0 | break; |
752 | default : | |
753 | 0 | entityValue = Integer.parseInt(entityContent.substring(1), 10); |
754 | } | |
755 | 0 | if (entityValue > 0xFFFF) |
756 | { | |
757 | 0 | entityValue = -1; |
758 | } | |
759 | } | |
760 | 0 | catch (NumberFormatException e) |
761 | { | |
762 | 0 | entityValue = -1; |
763 | 0 | } |
764 | 0 | } |
765 | } | |
766 | else | |
767 | { // escaped value content is an entity name | |
768 | 0 | entityValue = this.entityValue(entityContent); |
769 | } | |
770 | } | |
771 | ||
772 | 0 | if (entityValue == -1) |
773 | { | |
774 | 0 | writer.write('&'); |
775 | 0 | writer.write(entityContent); |
776 | 0 | writer.write(';'); |
777 | } | |
778 | else | |
779 | { | |
780 | 0 | writer.write(entityValue); |
781 | } | |
782 | 0 | i = semiColonIdx; // move index up to the semi-colon |
783 | 0 | } |
784 | else | |
785 | { | |
786 | 0 | writer.write(c); |
787 | } | |
788 | } | |
789 | 0 | } |
790 | ||
791 | private static interface EntityMap | |
792 | { | |
793 | /** | |
794 | * <p> | |
795 | * Add an entry to this entity map. | |
796 | * </p> | |
797 | * | |
798 | * @param name the entity name | |
799 | * @param value the entity value | |
800 | */ | |
801 | void add(String name, int value); | |
802 | ||
803 | /** | |
804 | * <p> | |
805 | * Returns the name of the entity identified by the specified value. | |
806 | * </p> | |
807 | * | |
808 | * @param value the value to locate | |
809 | * @return entity name associated with the specified value | |
810 | */ | |
811 | String name(int value); | |
812 | ||
813 | /** | |
814 | * <p> | |
815 | * Returns the value of the entity identified by the specified name. | |
816 | * </p> | |
817 | * | |
818 | * @param name the name to locate | |
819 | * @return entity value associated with the specified name | |
820 | */ | |
821 | int value(String name); | |
822 | } | |
823 | ||
824 | 4 | private static class PrimitiveEntityMap implements EntityMap |
825 | { | |
826 | 2 | private Map mapNameToValue = new HashMap(); |
827 | 2 | private IntHashMap mapValueToName = new IntHashMap(); |
828 | ||
829 | /** | |
830 | * {@inheritDoc} | |
831 | */ | |
832 | public void add(String name, int value) | |
833 | { | |
834 | 504 | mapNameToValue.put(name, new Integer(value)); |
835 | 504 | mapValueToName.put(value, name); |
836 | 504 | } |
837 | ||
838 | /** | |
839 | * {@inheritDoc} | |
840 | */ | |
841 | public String name(int value) | |
842 | { | |
843 | 512 | return (String) mapValueToName.get(value); |
844 | } | |
845 | ||
846 | /** | |
847 | * {@inheritDoc} | |
848 | */ | |
849 | public int value(String name) | |
850 | { | |
851 | 504 | Object value = mapNameToValue.get(name); |
852 | 504 | if (value == null) |
853 | { | |
854 | 0 | return -1; |
855 | } | |
856 | 504 | return ((Integer) value).intValue(); |
857 | } | |
858 | } | |
859 | ||
860 | 4 | private static class LookupEntityMap extends PrimitiveEntityMap |
861 | { | |
862 | private static final int LOOKUP_TABLE_SIZE = 256; | |
863 | private String[] lookupTable; | |
864 | ||
865 | /** | |
866 | * {@inheritDoc} | |
867 | */ | |
868 | public String name(int value) | |
869 | { | |
870 | 3078 | if (value < LOOKUP_TABLE_SIZE) |
871 | { | |
872 | 3078 | return lookupTable()[value]; |
873 | } | |
874 | 0 | return super.name(value); |
875 | } | |
876 | ||
877 | /** | |
878 | * <p> | |
879 | * Returns the lookup table for this entity map. The lookup table is | |
880 | * created if it has not been previously. | |
881 | * </p> | |
882 | * | |
883 | * @return the lookup table | |
884 | */ | |
885 | private String[] lookupTable() | |
886 | { | |
887 | 3078 | if (lookupTable == null) |
888 | { | |
889 | 2 | createLookupTable(); |
890 | } | |
891 | 3078 | return lookupTable; |
892 | } | |
893 | ||
894 | /** | |
895 | * <p> | |
896 | * Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, | |
897 | * initialized with entity names. | |
898 | * </p> | |
899 | */ | |
900 | private void createLookupTable() | |
901 | { | |
902 | 2 | lookupTable = new String[LOOKUP_TABLE_SIZE]; |
903 | 514 | for (int i = 0; i < LOOKUP_TABLE_SIZE; ++i) |
904 | { | |
905 | 512 | lookupTable[i] = super.name(i); |
906 | } | |
907 | 2 | } |
908 | } | |
909 | ||
910 | /** | |
911 | * <p> | |
912 | * A hash map that uses primitive ints for the key rather than objects. | |
913 | * </p> | |
914 | * <p> | |
915 | * Note that this class is for internal optimization purposes only, and may | |
916 | * not be supported in future releases of Jakarta Commons Lang. Utilities of | |
917 | * this sort may be included in future releases of Jakarta Commons | |
918 | * Collections. | |
919 | * </p> | |
920 | * | |
921 | * @author Justin Couch | |
922 | * @author Alex Chaffee (alex@apache.org) | |
923 | * @author Stephen Colebourne | |
924 | * @since 2.0 | |
925 | * @version $Revision: 7963 $ | |
926 | * @see java.util.HashMap | |
927 | */ | |
928 | 2 | private static class IntHashMap |
929 | { | |
930 | ||
931 | /** | |
932 | * The hash table data. | |
933 | */ | |
934 | private transient Entry table[]; | |
935 | ||
936 | /** | |
937 | * The total number of entries in the hash table. | |
938 | */ | |
939 | private transient int count; | |
940 | ||
941 | /** | |
942 | * The table is rehashed when its size exceeds this threshold. (The value | |
943 | * of this field is (int)(capacity * loadFactor).) | |
944 | * | |
945 | * @serial | |
946 | */ | |
947 | private int threshold; | |
948 | ||
949 | /** | |
950 | * The load factor for the hashtable. | |
951 | * | |
952 | * @serial | |
953 | */ | |
954 | private float loadFactor; | |
955 | ||
956 | /** | |
957 | * <p> | |
958 | * Innerclass that acts as a datastructure to create a new entry in the | |
959 | * table. | |
960 | * </p> | |
961 | */ | |
962 | private static class Entry | |
963 | { | |
964 | int hash; | |
965 | int key; | |
966 | Object value; | |
967 | Entry next; | |
968 | ||
969 | /** | |
970 | * <p> | |
971 | * Create a new entry with the given values. | |
972 | * </p> | |
973 | * | |
974 | * @param hash The code used to hash the object with | |
975 | * @param key The key used to enter this in the table | |
976 | * @param value The value for this key | |
977 | * @param next A reference to the next entry in the table | |
978 | */ | |
979 | protected Entry(int hash, int key, Object value, Entry next) | |
980 | 504 | { |
981 | 504 | this.hash = hash; |
982 | 504 | this.key = key; |
983 | 504 | this.value = value; |
984 | 504 | this.next = next; |
985 | 504 | } |
986 | } | |
987 | ||
988 | /** | |
989 | * <p> | |
990 | * Constructs a new, empty hashtable with a default capacity and load | |
991 | * factor, which is <code>20</code> and <code>0.75</code> | |
992 | * respectively. | |
993 | * </p> | |
994 | */ | |
995 | public IntHashMap() | |
996 | { | |
997 | 2 | this(20, 0.75f); |
998 | 2 | } |
999 | ||
1000 | /** | |
1001 | * <p> | |
1002 | * Constructs a new, empty hashtable with the specified initial capacity | |
1003 | * and default load factor, which is <code>0.75</code>. | |
1004 | * </p> | |
1005 | * | |
1006 | * @param initialCapacity the initial capacity of the hashtable. | |
1007 | * @throws IllegalArgumentException if the initial capacity is less than | |
1008 | * zero. | |
1009 | */ | |
1010 | public IntHashMap(int initialCapacity) | |
1011 | { | |
1012 | 0 | this(initialCapacity, 0.75f); |
1013 | 0 | } |
1014 | ||
1015 | /** | |
1016 | * <p> | |
1017 | * Constructs a new, empty hashtable with the specified initial capacity | |
1018 | * and the specified load factor. | |
1019 | * </p> | |
1020 | * | |
1021 | * @param initialCapacity the initial capacity of the hashtable. | |
1022 | * @param loadFactor the load factor of the hashtable. | |
1023 | * @throws IllegalArgumentException if the initial capacity is less than | |
1024 | * zero, or if the load factor is nonpositive. | |
1025 | */ | |
1026 | public IntHashMap(int initialCapacity, float loadFactor) | |
1027 | { | |
1028 | 2 | super(); |
1029 | 2 | if (initialCapacity < 0) |
1030 | { | |
1031 | 0 | throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity); |
1032 | } | |
1033 | 2 | if (loadFactor <= 0) |
1034 | { | |
1035 | 0 | throw new IllegalArgumentException("Illegal Load: " + loadFactor); |
1036 | } | |
1037 | 2 | if (initialCapacity == 0) |
1038 | { | |
1039 | 0 | initialCapacity = 1; |
1040 | } | |
1041 | ||
1042 | 2 | this.loadFactor = loadFactor; |
1043 | 2 | table = new Entry[initialCapacity]; |
1044 | 2 | threshold = (int) (initialCapacity * loadFactor); |
1045 | 2 | } |
1046 | ||
1047 | /** | |
1048 | * <p> | |
1049 | * Returns the number of keys in this hashtable. | |
1050 | * </p> | |
1051 | * | |
1052 | * @return the number of keys in this hashtable. | |
1053 | */ | |
1054 | public int size() | |
1055 | { | |
1056 | 0 | return count; |
1057 | } | |
1058 | ||
1059 | /** | |
1060 | * <p> | |
1061 | * Tests if this hashtable maps no keys to values. | |
1062 | * </p> | |
1063 | * | |
1064 | * @return <code>true</code> if this hashtable maps no keys to values; | |
1065 | * <code>false</code> otherwise. | |
1066 | */ | |
1067 | public boolean isEmpty() | |
1068 | { | |
1069 | 0 | return count == 0; |
1070 | } | |
1071 | ||
1072 | /** | |
1073 | * <p> | |
1074 | * Tests if some key maps into the specified value in this hashtable. | |
1075 | * This operation is more expensive than the <code>containsKey</code> | |
1076 | * method. | |
1077 | * </p> | |
1078 | * <p> | |
1079 | * Note that this method is identical in functionality to containsValue, | |
1080 | * (which is part of the Map interface in the collections framework). | |
1081 | * </p> | |
1082 | * | |
1083 | * @param value a value to search for. | |
1084 | * @return <code>true</code> if and only if some key maps to the | |
1085 | * <code>value</code> argument in this hashtable as determined | |
1086 | * by the <tt>equals</tt> method; <code>false</code> | |
1087 | * otherwise. | |
1088 | * @throws NullPointerException if the value is <code>null</code>. | |
1089 | * @see #containsKey(int) | |
1090 | * @see #containsValue(Object) | |
1091 | * @see java.util.Map | |
1092 | */ | |
1093 | public boolean contains(Object value) | |
1094 | { | |
1095 | 0 | if (value == null) |
1096 | { | |
1097 | 0 | throw new IllegalArgumentException("parameter value may not be null"); |
1098 | } | |
1099 | ||
1100 | 0 | Entry tab[] = table; |
1101 | 0 | for (int i = tab.length; i-- > 0;) |
1102 | { | |
1103 | 0 | for (Entry e = tab[i]; e != null; e = e.next) |
1104 | { | |
1105 | 0 | if (e.value.equals(value)) |
1106 | { | |
1107 | 0 | return true; |
1108 | } | |
1109 | } | |
1110 | } | |
1111 | 0 | return false; |
1112 | } | |
1113 | ||
1114 | /** | |
1115 | * <p> | |
1116 | * Returns <code>true</code> if this HashMap maps one or more keys to | |
1117 | * this value. | |
1118 | * </p> | |
1119 | * <p> | |
1120 | * Note that this method is identical in functionality to contains (which | |
1121 | * predates the Map interface). | |
1122 | * </p> | |
1123 | * | |
1124 | * @param value value whose presence in this HashMap is to be tested. | |
1125 | * @return boolean <code>true</code> if the value is contained | |
1126 | * @see java.util.Map | |
1127 | * @since JDK1.2 | |
1128 | */ | |
1129 | public boolean containsValue(Object value) | |
1130 | { | |
1131 | 0 | return contains(value); |
1132 | } | |
1133 | ||
1134 | /** | |
1135 | * <p> | |
1136 | * Tests if the specified object is a key in this hashtable. | |
1137 | * </p> | |
1138 | * | |
1139 | * @param key possible key. | |
1140 | * @return <code>true</code> if and only if the specified object is a | |
1141 | * key in this hashtable, as determined by the <tt>equals</tt> | |
1142 | * method; <code>false</code> otherwise. | |
1143 | * @see #contains(Object) | |
1144 | */ | |
1145 | public boolean containsKey(int key) | |
1146 | { | |
1147 | 0 | Entry tab[] = table; |
1148 | 0 | int hash = key; |
1149 | 0 | int index = (hash & 0x7FFFFFFF) % tab.length; |
1150 | 0 | for (Entry e = tab[index]; e != null; e = e.next) |
1151 | { | |
1152 | 0 | if (e.hash == hash) |
1153 | { | |
1154 | 0 | return true; |
1155 | } | |
1156 | } | |
1157 | 0 | return false; |
1158 | } | |
1159 | ||
1160 | /** | |
1161 | * <p> | |
1162 | * Returns the value to which the specified key is mapped in this map. | |
1163 | * </p> | |
1164 | * | |
1165 | * @param key a key in the hashtable. | |
1166 | * @return the value to which the key is mapped in this hashtable; | |
1167 | * <code>null</code> if the key is not mapped to any value in | |
1168 | * this hashtable. | |
1169 | * @see #put(int, Object) | |
1170 | */ | |
1171 | public Object get(int key) | |
1172 | { | |
1173 | 512 | Entry tab[] = table; |
1174 | 512 | int hash = key; |
1175 | 512 | int index = (hash & 0x7FFFFFFF) % tab.length; |
1176 | 580 | for (Entry e = tab[index]; e != null; e = e.next) |
1177 | { | |
1178 | 270 | if (e.hash == hash) |
1179 | { | |
1180 | 202 | return e.value; |
1181 | } | |
1182 | } | |
1183 | 310 | return null; |
1184 | } | |
1185 | ||
1186 | /** | |
1187 | * <p> | |
1188 | * Increases the capacity of and internally reorganizes this hashtable, | |
1189 | * in order to accommodate and access its entries more efficiently. | |
1190 | * </p> | |
1191 | * <p> | |
1192 | * This method is called automatically when the number of keys in the | |
1193 | * hashtable exceeds this hashtable's capacity and load factor. | |
1194 | * </p> | |
1195 | */ | |
1196 | protected void rehash() | |
1197 | { | |
1198 | 10 | int oldCapacity = table.length; |
1199 | 10 | Entry oldMap[] = table; |
1200 | ||
1201 | 10 | int newCapacity = oldCapacity * 2 + 1; |
1202 | 10 | Entry newMap[] = new Entry[newCapacity]; |
1203 | ||
1204 | 10 | threshold = (int) (newCapacity * loadFactor); |
1205 | 10 | table = newMap; |
1206 | ||
1207 | 10 | for (int i = oldCapacity; i-- > 0;) |
1208 | { | |
1209 | 1292 | for (Entry old = oldMap[i]; old != null;) |
1210 | { | |
1211 | 966 | Entry e = old; |
1212 | 966 | old = old.next; |
1213 | ||
1214 | 966 | int index = (e.hash & 0x7FFFFFFF) % newCapacity; |
1215 | 966 | e.next = newMap[index]; |
1216 | 966 | newMap[index] = e; |
1217 | 966 | } |
1218 | } | |
1219 | 10 | } |
1220 | ||
1221 | /** | |
1222 | * <p> | |
1223 | * Maps the specified <code>key</code> to the specified | |
1224 | * <code>value</code> in this hashtable. The key cannot be | |
1225 | * <code>null</code>. | |
1226 | * </p> | |
1227 | * <p> | |
1228 | * The value can be retrieved by calling the <code>get</code> method | |
1229 | * with a key that is equal to the original key. | |
1230 | * </p> | |
1231 | * | |
1232 | * @param key the hashtable key. | |
1233 | * @param value the value. | |
1234 | * @return the previous value of the specified key in this hashtable, or | |
1235 | * <code>null</code> if it did not have one. | |
1236 | * @throws NullPointerException if the key is <code>null</code>. | |
1237 | * @see #get(int) | |
1238 | */ | |
1239 | public Object put(int key, Object value) | |
1240 | { | |
1241 | // Makes sure the key is not already in the hashtable. | |
1242 | 504 | Entry tab[] = table; |
1243 | 504 | int hash = key; |
1244 | 504 | int index = (hash & 0x7FFFFFFF) % tab.length; |
1245 | 644 | for (Entry e = tab[index]; e != null; e = e.next) |
1246 | { | |
1247 | 140 | if (e.hash == hash) |
1248 | { | |
1249 | 0 | Object old = e.value; |
1250 | 0 | e.value = value; |
1251 | 0 | return old; |
1252 | } | |
1253 | } | |
1254 | ||
1255 | 504 | if (count >= threshold) |
1256 | { | |
1257 | // Rehash the table if the threshold is exceeded | |
1258 | 10 | rehash(); |
1259 | ||
1260 | 10 | tab = table; |
1261 | 10 | index = (hash & 0x7FFFFFFF) % tab.length; |
1262 | } | |
1263 | ||
1264 | // Creates the new entry. | |
1265 | 504 | Entry e = new Entry(hash, key, value, tab[index]); |
1266 | 504 | tab[index] = e; |
1267 | 504 | count++; |
1268 | 504 | return null; |
1269 | } | |
1270 | ||
1271 | /** | |
1272 | * <p> | |
1273 | * Removes the key (and its corresponding value) from this hashtable. | |
1274 | * </p> | |
1275 | * <p> | |
1276 | * This method does nothing if the key is not present in the hashtable. | |
1277 | * </p> | |
1278 | * | |
1279 | * @param key the key that needs to be removed. | |
1280 | * @return the value to which the key had been mapped in this hashtable, | |
1281 | * or <code>null</code> if the key did not have a mapping. | |
1282 | */ | |
1283 | public Object remove(int key) | |
1284 | { | |
1285 | 0 | Entry tab[] = table; |
1286 | 0 | int hash = key; |
1287 | 0 | int index = (hash & 0x7FFFFFFF) % tab.length; |
1288 | 0 | for (Entry e = tab[index], prev = null; e != null; prev = e, e = e.next) |
1289 | { | |
1290 | 0 | if (e.hash == hash) |
1291 | { | |
1292 | 0 | if (prev != null) |
1293 | { | |
1294 | 0 | prev.next = e.next; |
1295 | } | |
1296 | else | |
1297 | { | |
1298 | 0 | tab[index] = e.next; |
1299 | } | |
1300 | 0 | count--; |
1301 | 0 | Object oldValue = e.value; |
1302 | 0 | e.value = null; |
1303 | 0 | return oldValue; |
1304 | } | |
1305 | } | |
1306 | 0 | return null; |
1307 | } | |
1308 | ||
1309 | /** | |
1310 | * <p>Clears this hashtable so that it contains no keys.</p> | |
1311 | */ | |
1312 | public synchronized void clear() | |
1313 | { | |
1314 | 0 | Entry tab[] = table; |
1315 | 0 | for (int index = tab.length; --index >= 0;) |
1316 | { | |
1317 | 0 | tab[index] = null; |
1318 | } | |
1319 | 0 | count = 0; |
1320 | 0 | } |
1321 | ||
1322 | } | |
1323 | ||
1324 | } | |
1325 | } |