Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
XMLEntityCodec |
|
| 3.0;3 | ||||
XMLEntityCodec$1 |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$EntityMap |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$IntHashMap |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$IntHashMap$Entry |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$LookupEntityMap |
|
| 3.0;3 | ||||
XMLEntityCodec$Entities$PrimitiveEntityMap |
|
| 3.0;3 |
1 | /* | |
2 | * Copyright (c) MuleSoft, Inc. All rights reserved. http://www.mulesoft.com | |
3 | * The software in this package is published under the terms of the CPAL v1.0 | |
4 | * license, a copy of which has been included with this distribution in the | |
5 | * LICENSE.txt file. | |
6 | */ | |
7 | package org.mule.util; | |
8 | ||
9 | import java.io.IOException; | |
10 | import java.io.Writer; | |
11 | import java.util.HashMap; | |
12 | import java.util.Map; | |
13 | ||
14 | /** | |
15 | * This encoder contains methods that convert characters to Character entities as | |
16 | * defined by http://www.w3.org/TR/REC-html40/sgml/entities.html. More precisely it | |
17 | * combines the functionality of {@link org.apache.commons.lang.StringEscapeUtils#escapeXml(String)} and | |
18 | * {@link org.apache.commons.lang.StringEscapeUtils#escapeHtml(String)} into a single pass. | |
19 | */ | |
20 | // @ThreadSafe | |
21 | public final class XMLEntityCodec | |
22 | { | |
23 | 0 | private static final Entities MuleEntities = new Entities(); |
24 | ||
25 | static | |
26 | { | |
27 | 0 | MuleEntities.addEntities(Entities.APOS_ARRAY); |
28 | 0 | MuleEntities.addEntities(Entities.BASIC_ARRAY); |
29 | 0 | MuleEntities.addEntities(Entities.ISO8859_1_ARRAY); |
30 | 0 | MuleEntities.addEntities(Entities.HTML40_ARRAY); |
31 | 0 | } |
32 | ||
33 | protected XMLEntityCodec() | |
34 | 0 | { |
35 | // no-op | |
36 | 0 | } |
37 | ||
38 | public static String encodeString(String str) | |
39 | { | |
40 | 0 | if (StringUtils.isEmpty(str)) |
41 | { | |
42 | 0 | return str; |
43 | } | |
44 | ||
45 | 0 | return MuleEntities.escape(str); |
46 | } | |
47 | ||
48 | public static String decodeString(String str) | |
49 | { | |
50 | 0 | if (StringUtils.isEmpty(str)) |
51 | { | |
52 | 0 | return str; |
53 | } | |
54 | ||
55 | 0 | return MuleEntities.unescape(str); |
56 | } | |
57 | ||
58 | /** | |
59 | * <p> | |
60 | * Returns the name of the entity identified by the specified value. | |
61 | * </p> | |
62 | * | |
63 | * @param value the value to locate | |
64 | * @return entity name associated with the specified value | |
65 | */ | |
66 | public static String entityName(int value) | |
67 | { | |
68 | 0 | return MuleEntities.map.name(value); |
69 | } | |
70 | ||
71 | /** | |
72 | * <p> | |
73 | * Returns the value of the entity identified by the specified name. | |
74 | * </p> | |
75 | * | |
76 | * @param name the name to locate | |
77 | * @return entity value associated with the specified name | |
78 | */ | |
79 | public static int entityValue(String name) | |
80 | { | |
81 | 0 | return MuleEntities.map.value(name); |
82 | } | |
83 | ||
84 | ||
85 | // | |
86 | // everything from here on is copied from commons-lang 2.2 + svn since it is not | |
87 | // extensible and referencing the package-private class can lead to classloader | |
88 | // problems :-( | |
89 | // | |
90 | ||
91 | /* | |
92 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
93 | * contributor license agreements. See the NOTICE file distributed with | |
94 | * this work for additional information regarding copyright ownership. | |
95 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
96 | * (the "License"); you may not use this file except in compliance with | |
97 | * the License. You may obtain a copy of the License at | |
98 | * | |
99 | * http://www.apache.org/licenses/LICENSE-2.0 | |
100 | * | |
101 | * Unless required by applicable law or agreed to in writing, software | |
102 | * distributed under the License is distributed on an "AS IS" BASIS, | |
103 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
104 | * See the License for the specific language governing permissions and | |
105 | * limitations under the License. | |
106 | */ | |
107 | ||
108 | /** | |
109 | * <p>Provides HTML and XML entity utilities.</p> | |
110 | * | |
111 | * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> | |
112 | * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> | |
113 | * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> | |
114 | * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> | |
115 | * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> | |
116 | * | |
117 | * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a> | |
118 | * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a> | |
119 | * @since 2.0 | |
120 | * @version $Id$ | |
121 | */ | |
122 | 0 | private static class Entities |
123 | { | |
124 | ||
125 | 0 | private static final String[][] BASIC_ARRAY = |
126 | { | |
127 | {"quot", "34"}, // " - double-quote | |
128 | {"amp", "38"}, // & - ampersand | |
129 | {"lt", "60"}, // < - less-than | |
130 | {"gt", "62"}, // > - greater-than | |
131 | }; | |
132 | ||
133 | 0 | private static final String[][] APOS_ARRAY = |
134 | { | |
135 | {"apos", "39"}, // XML apostrophe | |
136 | }; | |
137 | ||
138 | // package scoped for testing | |
139 | 0 | static final String[][] ISO8859_1_ARRAY = |
140 | { | |
141 | {"nbsp", "160"}, // non-breaking space | |
142 | {"iexcl", "161"}, //inverted exclamation mark | |
143 | {"cent", "162"}, //cent sign | |
144 | {"pound", "163"}, //pound sign | |
145 | {"curren", "164"}, //currency sign | |
146 | {"yen", "165"}, //yen sign = yuan sign | |
147 | {"brvbar", "166"}, //broken bar = broken vertical bar | |
148 | {"sect", "167"}, //section sign | |
149 | {"uml", "168"}, //diaeresis = spacing diaeresis | |
150 | {"copy", "169"}, // � - copyright sign | |
151 | {"ordf", "170"}, //feminine ordinal indicator | |
152 | {"laquo", "171"}, //left-pointing double angle quotation mark = left pointing guillemet | |
153 | {"not", "172"}, //not sign | |
154 | {"shy", "173"}, //soft hyphen = discretionary hyphen | |
155 | {"reg", "174"}, // � - registered trademark sign | |
156 | {"macr", "175"}, //macron = spacing macron = overline = APL overbar | |
157 | {"deg", "176"}, //degree sign | |
158 | {"plusmn", "177"}, //plus-minus sign = plus-or-minus sign | |
159 | {"sup2", "178"}, //superscript two = superscript digit two = squared | |
160 | {"sup3", "179"}, //superscript three = superscript digit three = cubed | |
161 | {"acute", "180"}, //acute accent = spacing acute | |
162 | {"micro", "181"}, //micro sign | |
163 | {"para", "182"}, //pilcrow sign = paragraph sign | |
164 | {"middot", "183"}, //middle dot = Georgian comma = Greek middle dot | |
165 | {"cedil", "184"}, //cedilla = spacing cedilla | |
166 | {"sup1", "185"}, //superscript one = superscript digit one | |
167 | {"ordm", "186"}, //masculine ordinal indicator | |
168 | {"raquo", "187"}, //right-pointing double angle quotation mark = right pointing guillemet | |
169 | {"frac14", "188"}, //vulgar fraction one quarter = fraction one quarter | |
170 | {"frac12", "189"}, //vulgar fraction one half = fraction one half | |
171 | {"frac34", "190"}, //vulgar fraction three quarters = fraction three quarters | |
172 | {"iquest", "191"}, //inverted question mark = turned question mark | |
173 | {"Agrave", "192"}, // � - uppercase A, grave accent | |
174 | {"Aacute", "193"}, // � - uppercase A, acute accent | |
175 | {"Acirc", "194"}, // � - uppercase A, circumflex accent | |
176 | {"Atilde", "195"}, // � - uppercase A, tilde | |
177 | {"Auml", "196"}, // � - uppercase A, umlaut | |
178 | {"Aring", "197"}, // � - uppercase A, ring | |
179 | {"AElig", "198"}, // � - uppercase AE | |
180 | {"Ccedil", "199"}, // � - uppercase C, cedilla | |
181 | {"Egrave", "200"}, // � - uppercase E, grave accent | |
182 | {"Eacute", "201"}, // � - uppercase E, acute accent | |
183 | {"Ecirc", "202"}, // � - uppercase E, circumflex accent | |
184 | {"Euml", "203"}, // � - uppercase E, umlaut | |
185 | {"Igrave", "204"}, // � - uppercase I, grave accent | |
186 | {"Iacute", "205"}, // � - uppercase I, acute accent | |
187 | {"Icirc", "206"}, // � - uppercase I, circumflex accent | |
188 | {"Iuml", "207"}, // � - uppercase I, umlaut | |
189 | {"ETH", "208"}, // � - uppercase Eth, Icelandic | |
190 | {"Ntilde", "209"}, // � - uppercase N, tilde | |
191 | {"Ograve", "210"}, // � - uppercase O, grave accent | |
192 | {"Oacute", "211"}, // � - uppercase O, acute accent | |
193 | {"Ocirc", "212"}, // � - uppercase O, circumflex accent | |
194 | {"Otilde", "213"}, // � - uppercase O, tilde | |
195 | {"Ouml", "214"}, // � - uppercase O, umlaut | |
196 | {"times", "215"}, //multiplication sign | |
197 | {"Oslash", "216"}, // � - uppercase O, slash | |
198 | {"Ugrave", "217"}, // � - uppercase U, grave accent | |
199 | {"Uacute", "218"}, // � - uppercase U, acute accent | |
200 | {"Ucirc", "219"}, // � - uppercase U, circumflex accent | |
201 | {"Uuml", "220"}, // � - uppercase U, umlaut | |
202 | {"Yacute", "221"}, // � - uppercase Y, acute accent | |
203 | {"THORN", "222"}, // � - uppercase THORN, Icelandic | |
204 | {"szlig", "223"}, // � - lowercase sharps, German | |
205 | {"agrave", "224"}, // � - lowercase a, grave accent | |
206 | {"aacute", "225"}, // � - lowercase a, acute accent | |
207 | {"acirc", "226"}, // � - lowercase a, circumflex accent | |
208 | {"atilde", "227"}, // � - lowercase a, tilde | |
209 | {"auml", "228"}, // � - lowercase a, umlaut | |
210 | {"aring", "229"}, // � - lowercase a, ring | |
211 | {"aelig", "230"}, // � - lowercase ae | |
212 | {"ccedil", "231"}, // � - lowercase c, cedilla | |
213 | {"egrave", "232"}, // � - lowercase e, grave accent | |
214 | {"eacute", "233"}, // � - lowercase e, acute accent | |
215 | {"ecirc", "234"}, // � - lowercase e, circumflex accent | |
216 | {"euml", "235"}, // � - lowercase e, umlaut | |
217 | {"igrave", "236"}, // � - lowercase i, grave accent | |
218 | {"iacute", "237"}, // � - lowercase i, acute accent | |
219 | {"icirc", "238"}, // � - lowercase i, circumflex accent | |
220 | {"iuml", "239"}, // � - lowercase i, umlaut | |
221 | {"eth", "240"}, // � - lowercase eth, Icelandic | |
222 | {"ntilde", "241"}, // � - lowercase n, tilde | |
223 | {"ograve", "242"}, // � - lowercase o, grave accent | |
224 | {"oacute", "243"}, // � - lowercase o, acute accent | |
225 | {"ocirc", "244"}, // � - lowercase o, circumflex accent | |
226 | {"otilde", "245"}, // � - lowercase o, tilde | |
227 | {"ouml", "246"}, // � - lowercase o, umlaut | |
228 | {"divide", "247"}, // division sign | |
229 | {"oslash", "248"}, // � - lowercase o, slash | |
230 | {"ugrave", "249"}, // � - lowercase u, grave accent | |
231 | {"uacute", "250"}, // � - lowercase u, acute accent | |
232 | {"ucirc", "251"}, // � - lowercase u, circumflex accent | |
233 | {"uuml", "252"}, // � - lowercase u, umlaut | |
234 | {"yacute", "253"}, // � - lowercase y, acute accent | |
235 | {"thorn", "254"}, // � - lowercase thorn, Icelandic | |
236 | {"yuml", "255"}, // � - lowercase y, umlaut | |
237 | }; | |
238 | ||
239 | // http://www.w3.org/TR/REC-html40/sgml/entities.html | |
240 | // package scoped for testing | |
241 | 0 | static final String[][] HTML40_ARRAY = |
242 | { | |
243 | // <!-- Latin Extended-B --> | |
244 | {"fnof", "402"}, //latin small f with hook = function= florin, U+0192 ISOtech --> | |
245 | // <!-- Greek --> | |
246 | {"Alpha", "913"}, //greek capital letter alpha, U+0391 --> | |
247 | {"Beta", "914"}, //greek capital letter beta, U+0392 --> | |
248 | {"Gamma", "915"}, //greek capital letter gamma,U+0393 ISOgrk3 --> | |
249 | {"Delta", "916"}, //greek capital letter delta,U+0394 ISOgrk3 --> | |
250 | {"Epsilon", "917"}, //greek capital letter epsilon, U+0395 --> | |
251 | {"Zeta", "918"}, //greek capital letter zeta, U+0396 --> | |
252 | {"Eta", "919"}, //greek capital letter eta, U+0397 --> | |
253 | {"Theta", "920"}, //greek capital letter theta,U+0398 ISOgrk3 --> | |
254 | {"Iota", "921"}, //greek capital letter iota, U+0399 --> | |
255 | {"Kappa", "922"}, //greek capital letter kappa, U+039A --> | |
256 | {"Lambda", "923"}, //greek capital letter lambda,U+039B ISOgrk3 --> | |
257 | {"Mu", "924"}, //greek capital letter mu, U+039C --> | |
258 | {"Nu", "925"}, //greek capital letter nu, U+039D --> | |
259 | {"Xi", "926"}, //greek capital letter xi, U+039E ISOgrk3 --> | |
260 | {"Omicron", "927"}, //greek capital letter omicron, U+039F --> | |
261 | {"Pi", "928"}, //greek capital letter pi, U+03A0 ISOgrk3 --> | |
262 | {"Rho", "929"}, //greek capital letter rho, U+03A1 --> | |
263 | // <!-- there is no Sigmaf, and no U+03A2 character either --> | |
264 | {"Sigma", "931"}, //greek capital letter sigma,U+03A3 ISOgrk3 --> | |
265 | {"Tau", "932"}, //greek capital letter tau, U+03A4 --> | |
266 | {"Upsilon", "933"}, //greek capital letter upsilon,U+03A5 ISOgrk3 --> | |
267 | {"Phi", "934"}, //greek capital letter phi,U+03A6 ISOgrk3 --> | |
268 | {"Chi", "935"}, //greek capital letter chi, U+03A7 --> | |
269 | {"Psi", "936"}, //greek capital letter psi,U+03A8 ISOgrk3 --> | |
270 | {"Omega", "937"}, //greek capital letter omega,U+03A9 ISOgrk3 --> | |
271 | {"alpha", "945"}, //greek small letter alpha,U+03B1 ISOgrk3 --> | |
272 | {"beta", "946"}, //greek small letter beta, U+03B2 ISOgrk3 --> | |
273 | {"gamma", "947"}, //greek small letter gamma,U+03B3 ISOgrk3 --> | |
274 | {"delta", "948"}, //greek small letter delta,U+03B4 ISOgrk3 --> | |
275 | {"epsilon", "949"}, //greek small letter epsilon,U+03B5 ISOgrk3 --> | |
276 | {"zeta", "950"}, //greek small letter zeta, U+03B6 ISOgrk3 --> | |
277 | {"eta", "951"}, //greek small letter eta, U+03B7 ISOgrk3 --> | |
278 | {"theta", "952"}, //greek small letter theta,U+03B8 ISOgrk3 --> | |
279 | {"iota", "953"}, //greek small letter iota, U+03B9 ISOgrk3 --> | |
280 | {"kappa", "954"}, //greek small letter kappa,U+03BA ISOgrk3 --> | |
281 | {"lambda", "955"}, //greek small letter lambda,U+03BB ISOgrk3 --> | |
282 | {"mu", "956"}, //greek small letter mu, U+03BC ISOgrk3 --> | |
283 | {"nu", "957"}, //greek small letter nu, U+03BD ISOgrk3 --> | |
284 | {"xi", "958"}, //greek small letter xi, U+03BE ISOgrk3 --> | |
285 | {"omicron", "959"}, //greek small letter omicron, U+03BF NEW --> | |
286 | {"pi", "960"}, //greek small letter pi, U+03C0 ISOgrk3 --> | |
287 | {"rho", "961"}, //greek small letter rho, U+03C1 ISOgrk3 --> | |
288 | {"sigmaf", "962"}, //greek small letter final sigma,U+03C2 ISOgrk3 --> | |
289 | {"sigma", "963"}, //greek small letter sigma,U+03C3 ISOgrk3 --> | |
290 | {"tau", "964"}, //greek small letter tau, U+03C4 ISOgrk3 --> | |
291 | {"upsilon", "965"}, //greek small letter upsilon,U+03C5 ISOgrk3 --> | |
292 | {"phi", "966"}, //greek small letter phi, U+03C6 ISOgrk3 --> | |
293 | {"chi", "967"}, //greek small letter chi, U+03C7 ISOgrk3 --> | |
294 | {"psi", "968"}, //greek small letter psi, U+03C8 ISOgrk3 --> | |
295 | {"omega", "969"}, //greek small letter omega,U+03C9 ISOgrk3 --> | |
296 | {"thetasym", "977"}, //greek small letter theta symbol,U+03D1 NEW --> | |
297 | {"upsih", "978"}, //greek upsilon with hook symbol,U+03D2 NEW --> | |
298 | {"piv", "982"}, //greek pi symbol, U+03D6 ISOgrk3 --> | |
299 | // <!-- General Punctuation --> | |
300 | {"bull", "8226"}, //bullet = black small circle,U+2022 ISOpub --> | |
301 | // <!-- bullet is NOT the same as bullet operator, U+2219 --> | |
302 | {"hellip", "8230"}, //horizontal ellipsis = three dot leader,U+2026 ISOpub --> | |
303 | {"prime", "8242"}, //prime = minutes = feet, U+2032 ISOtech --> | |
304 | {"Prime", "8243"}, //double prime = seconds = inches,U+2033 ISOtech --> | |
305 | {"oline", "8254"}, //overline = spacing overscore,U+203E NEW --> | |
306 | {"frasl", "8260"}, //fraction slash, U+2044 NEW --> | |
307 | // <!-- Letterlike Symbols --> | |
308 | {"weierp", "8472"}, //script capital P = power set= Weierstrass p, U+2118 ISOamso --> | |
309 | {"image", "8465"}, //blackletter capital I = imaginary part,U+2111 ISOamso --> | |
310 | {"real", "8476"}, //blackletter capital R = real part symbol,U+211C ISOamso --> | |
311 | {"trade", "8482"}, //trade mark sign, U+2122 ISOnum --> | |
312 | {"alefsym", "8501"}, //alef symbol = first transfinite cardinal,U+2135 NEW --> | |
313 | // <!-- alef symbol is NOT the same as hebrew letter alef,U+05D0 although the | |
314 | // same glyph could be used to depict both characters --> | |
315 | // <!-- Arrows --> | |
316 | {"larr", "8592"}, //leftwards arrow, U+2190 ISOnum --> | |
317 | {"uarr", "8593"}, //upwards arrow, U+2191 ISOnum--> | |
318 | {"rarr", "8594"}, //rightwards arrow, U+2192 ISOnum --> | |
319 | {"darr", "8595"}, //downwards arrow, U+2193 ISOnum --> | |
320 | {"harr", "8596"}, //left right arrow, U+2194 ISOamsa --> | |
321 | {"crarr", "8629"}, //downwards arrow with corner leftwards= carriage return, U+21B5 NEW --> | |
322 | {"lArr", "8656"}, //leftwards double arrow, U+21D0 ISOtech --> | |
323 | // <!-- ISO 10646 does not say that lArr is the same as the 'is implied by' | |
324 | // arrow but also does not have any other character for that function. | |
325 | // So ? lArr canbe used for 'is implied by' as ISOtech suggests --> | |
326 | {"uArr", "8657"}, //upwards double arrow, U+21D1 ISOamsa --> | |
327 | {"rArr", "8658"}, //rightwards double arrow,U+21D2 ISOtech --> | |
328 | // <!-- ISO 10646 does not say this is the 'implies' character but does not | |
329 | // have another character with this function so ?rArr can be used for | |
330 | // 'implies' as ISOtech suggests --> | |
331 | {"dArr", "8659"}, //downwards double arrow, U+21D3 ISOamsa --> | |
332 | {"hArr", "8660"}, //left right double arrow,U+21D4 ISOamsa --> | |
333 | // <!-- Mathematical Operators --> | |
334 | {"forall", "8704"}, //for all, U+2200 ISOtech --> | |
335 | {"part", "8706"}, //partial differential, U+2202 ISOtech --> | |
336 | {"exist", "8707"}, //there exists, U+2203 ISOtech --> | |
337 | {"empty", "8709"}, //empty set = null set = diameter,U+2205 ISOamso --> | |
338 | {"nabla", "8711"}, //nabla = backward difference,U+2207 ISOtech --> | |
339 | {"isin", "8712"}, //element of, U+2208 ISOtech --> | |
340 | {"notin", "8713"}, //not an element of, U+2209 ISOtech --> | |
341 | {"ni", "8715"}, //contains as member, U+220B ISOtech --> | |
342 | // <!-- should there be a more memorable name than 'ni'? --> | |
343 | {"prod", "8719"}, //n-ary product = product sign,U+220F ISOamsb --> | |
344 | // <!-- prod is NOT the same character as U+03A0 'greek capital letter pi' | |
345 | // though the same glyph might be used for both --> | |
346 | {"sum", "8721"}, //n-ary summation, U+2211 ISOamsb --> | |
347 | // <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma' | |
348 | // though the same glyph might be used for both --> | |
349 | {"minus", "8722"}, //minus sign, U+2212 ISOtech --> | |
350 | {"lowast", "8727"}, //asterisk operator, U+2217 ISOtech --> | |
351 | {"radic", "8730"}, //square root = radical sign,U+221A ISOtech --> | |
352 | {"prop", "8733"}, //proportional to, U+221D ISOtech --> | |
353 | {"infin", "8734"}, //infinity, U+221E ISOtech --> | |
354 | {"ang", "8736"}, //angle, U+2220 ISOamso --> | |
355 | {"and", "8743"}, //logical and = wedge, U+2227 ISOtech --> | |
356 | {"or", "8744"}, //logical or = vee, U+2228 ISOtech --> | |
357 | {"cap", "8745"}, //intersection = cap, U+2229 ISOtech --> | |
358 | {"cup", "8746"}, //union = cup, U+222A ISOtech --> | |
359 | {"int", "8747"}, //integral, U+222B ISOtech --> | |
360 | {"there4", "8756"}, //therefore, U+2234 ISOtech --> | |
361 | {"sim", "8764"}, //tilde operator = varies with = similar to,U+223C ISOtech --> | |
362 | // <!-- tilde operator is NOT the same character as the tilde, U+007E,although | |
363 | // the same glyph might be used to represent both --> | |
364 | {"cong", "8773"}, //approximately equal to, U+2245 ISOtech --> | |
365 | {"asymp", "8776"}, //almost equal to = asymptotic to,U+2248 ISOamsr --> | |
366 | {"ne", "8800"}, //not equal to, U+2260 ISOtech --> | |
367 | {"equiv", "8801"}, //identical to, U+2261 ISOtech --> | |
368 | {"le", "8804"}, //less-than or equal to, U+2264 ISOtech --> | |
369 | {"ge", "8805"}, //greater-than or equal to,U+2265 ISOtech --> | |
370 | {"sub", "8834"}, //subset of, U+2282 ISOtech --> | |
371 | {"sup", "8835"}, //superset of, U+2283 ISOtech --> | |
372 | // <!-- note that nsup, 'not a superset of, U+2283' is not covered by the | |
373 | // Symbol font encoding and is not included. Should it be, for symmetry? | |
374 | // It is in ISOamsn --> <!ENTITY nsub", "8836"}, | |
375 | // not a subset of, U+2284 ISOamsn --> | |
376 | {"sube", "8838"}, //subset of or equal to, U+2286 ISOtech --> | |
377 | {"supe", "8839"}, //superset of or equal to,U+2287 ISOtech --> | |
378 | {"oplus", "8853"}, //circled plus = direct sum,U+2295 ISOamsb --> | |
379 | {"otimes", "8855"}, //circled times = vector product,U+2297 ISOamsb --> | |
380 | {"perp", "8869"}, //up tack = orthogonal to = perpendicular,U+22A5 ISOtech --> | |
381 | {"sdot", "8901"}, //dot operator, U+22C5 ISOamsb --> | |
382 | // <!-- dot operator is NOT the same character as U+00B7 middle dot --> | |
383 | // <!-- Miscellaneous Technical --> | |
384 | {"lceil", "8968"}, //left ceiling = apl upstile,U+2308 ISOamsc --> | |
385 | {"rceil", "8969"}, //right ceiling, U+2309 ISOamsc --> | |
386 | {"lfloor", "8970"}, //left floor = apl downstile,U+230A ISOamsc --> | |
387 | {"rfloor", "8971"}, //right floor, U+230B ISOamsc --> | |
388 | {"lang", "9001"}, //left-pointing angle bracket = bra,U+2329 ISOtech --> | |
389 | // <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' --> | |
390 | {"rang", "9002"}, //right-pointing angle bracket = ket,U+232A ISOtech --> | |
391 | // <!-- rang is NOT the same character as U+003E 'greater than' or U+203A | |
392 | // 'single right-pointing angle quotation mark' --> | |
393 | // <!-- Geometric Shapes --> | |
394 | {"loz", "9674"}, //lozenge, U+25CA ISOpub --> | |
395 | // <!-- Miscellaneous Symbols --> | |
396 | {"spades", "9824"}, //black spade suit, U+2660 ISOpub --> | |
397 | // <!-- black here seems to mean filled as opposed to hollow --> | |
398 | {"clubs", "9827"}, //black club suit = shamrock,U+2663 ISOpub --> | |
399 | {"hearts", "9829"}, //black heart suit = valentine,U+2665 ISOpub --> | |
400 | {"diams", "9830"}, //black diamond suit, U+2666 ISOpub --> | |
401 | ||
402 | // <!-- Latin Extended-A --> | |
403 | {"OElig", "338"}, // -- latin capital ligature OE,U+0152 ISOlat2 --> | |
404 | {"oelig", "339"}, // -- latin small ligature oe, U+0153 ISOlat2 --> | |
405 | // <!-- ligature is a misnomer, this is a separate character in some languages --> | |
406 | {"Scaron", "352"}, // -- latin capital letter S with caron,U+0160 ISOlat2 --> | |
407 | {"scaron", "353"}, // -- latin small letter s with caron,U+0161 ISOlat2 --> | |
408 | {"Yuml", "376"}, // -- latin capital letter Y with diaeresis,U+0178 ISOlat2 --> | |
409 | // <!-- Spacing Modifier Letters --> | |
410 | {"circ", "710"}, // -- modifier letter circumflex accent,U+02C6 ISOpub --> | |
411 | {"tilde", "732"}, //small tilde, U+02DC ISOdia --> | |
412 | // <!-- General Punctuation --> | |
413 | {"ensp", "8194"}, //en space, U+2002 ISOpub --> | |
414 | {"emsp", "8195"}, //em space, U+2003 ISOpub --> | |
415 | {"thinsp", "8201"}, //thin space, U+2009 ISOpub --> | |
416 | {"zwnj", "8204"}, //zero width non-joiner,U+200C NEW RFC 2070 --> | |
417 | {"zwj", "8205"}, //zero width joiner, U+200D NEW RFC 2070 --> | |
418 | {"lrm", "8206"}, //left-to-right mark, U+200E NEW RFC 2070 --> | |
419 | {"rlm", "8207"}, //right-to-left mark, U+200F NEW RFC 2070 --> | |
420 | {"ndash", "8211"}, //en dash, U+2013 ISOpub --> | |
421 | {"mdash", "8212"}, //em dash, U+2014 ISOpub --> | |
422 | {"lsquo", "8216"}, //left single quotation mark,U+2018 ISOnum --> | |
423 | {"rsquo", "8217"}, //right single quotation mark,U+2019 ISOnum --> | |
424 | {"sbquo", "8218"}, //single low-9 quotation mark, U+201A NEW --> | |
425 | {"ldquo", "8220"}, //left double quotation mark,U+201C ISOnum --> | |
426 | {"rdquo", "8221"}, //right double quotation mark,U+201D ISOnum --> | |
427 | {"bdquo", "8222"}, //double low-9 quotation mark, U+201E NEW --> | |
428 | {"dagger", "8224"}, //dagger, U+2020 ISOpub --> | |
429 | {"Dagger", "8225"}, //double dagger, U+2021 ISOpub --> | |
430 | {"permil", "8240"}, //per mille sign, U+2030 ISOtech --> | |
431 | {"lsaquo", "8249"}, //single left-pointing angle quotation mark,U+2039 ISO proposed --> | |
432 | // <!-- lsaquo is proposed but not yet ISO standardized --> | |
433 | {"rsaquo", "8250"}, //single right-pointing angle quotation mark,U+203A ISO proposed --> | |
434 | // <!-- rsaquo is proposed but not yet ISO standardized --> | |
435 | {"euro", "8364"}, // -- euro sign, U+20AC NEW --> | |
436 | }; | |
437 | ||
438 | // package scoped for testing | |
439 | 0 | private EntityMap map = new Entities.LookupEntityMap(); |
440 | ||
441 | /** | |
442 | * <p> | |
443 | * Adds entities to this entity. | |
444 | * </p> | |
445 | * | |
446 | * @param entityArray array of entities to be added | |
447 | */ | |
448 | public void addEntities(String[][] entityArray) | |
449 | { | |
450 | 0 | for (int i = 0; i < entityArray.length; ++i) |
451 | { | |
452 | 0 | addEntity(entityArray[i][0], Integer.parseInt(entityArray[i][1])); |
453 | } | |
454 | 0 | } |
455 | ||
456 | /** | |
457 | * <p> | |
458 | * Add an entity to this entity. | |
459 | * </p> | |
460 | * | |
461 | * @param name name of the entity | |
462 | * @param value vale of the entity | |
463 | */ | |
464 | public void addEntity(String name, int value) | |
465 | { | |
466 | 0 | map.add(name, value); |
467 | 0 | } |
468 | ||
469 | /** | |
470 | * <p> | |
471 | * Returns the name of the entity identified by the specified value. | |
472 | * </p> | |
473 | * | |
474 | * @param value the value to locate | |
475 | * @return entity name associated with the specified value | |
476 | */ | |
477 | public String entityName(int value) | |
478 | { | |
479 | 0 | return map.name(value); |
480 | } | |
481 | ||
482 | /** | |
483 | * <p> | |
484 | * Returns the value of the entity identified by the specified name. | |
485 | * </p> | |
486 | * | |
487 | * @param name the name to locate | |
488 | * @return entity value associated with the specified name | |
489 | */ | |
490 | public int entityValue(String name) | |
491 | { | |
492 | 0 | return map.value(name); |
493 | } | |
494 | ||
495 | /** | |
496 | * <p> | |
497 | * Escapes the characters in a <code>String</code>. | |
498 | * </p> | |
499 | * <p> | |
500 | * For example, if you have called addEntity("foo", 0xA1), | |
501 | * escape("\u00A1") will return "&foo;" | |
502 | * </p> | |
503 | * | |
504 | * @param str The <code>String</code> to escape. | |
505 | * @return A new escaped <code>String</code>. | |
506 | */ | |
507 | public String escape(String str) | |
508 | { | |
509 | // TODO: rewrite to use a Writer | |
510 | 0 | StringBuffer buf = new StringBuffer(str.length() * 2); |
511 | 0 | for (int i = 0; i < str.length(); ++i) |
512 | { | |
513 | 0 | char ch = str.charAt(i); |
514 | 0 | String entityName = this.entityName(ch); |
515 | 0 | if (entityName == null) |
516 | { | |
517 | 0 | if (ch > 0x7F) |
518 | { | |
519 | 0 | buf.append('&'); |
520 | 0 | buf.append('#'); |
521 | 0 | buf.append((int) ch); |
522 | 0 | buf.append(';'); |
523 | } | |
524 | else | |
525 | { | |
526 | 0 | buf.append(ch); |
527 | } | |
528 | } | |
529 | else | |
530 | { | |
531 | 0 | buf.append('&'); |
532 | 0 | buf.append(entityName); |
533 | 0 | buf.append(';'); |
534 | } | |
535 | } | |
536 | 0 | return buf.toString(); |
537 | } | |
538 | ||
539 | /** | |
540 | * <p> | |
541 | * Escapes the characters in the <code>String</code> passed and writes the | |
542 | * result to the <code>Writer</code> passed. | |
543 | * </p> | |
544 | * | |
545 | * @param writer The <code>Writer</code> to write the results of the | |
546 | * escaping to. Assumed to be a non-null value. | |
547 | * @param str The <code>String</code> to escape. Assumed to be a non-null | |
548 | * value. | |
549 | * @throws IOException when <code>Writer</code> passed throws the exception | |
550 | * from calls to the {@link Writer#write(int)} methods. | |
551 | * @see #escape(String) | |
552 | * @see Writer | |
553 | */ | |
554 | public void escape(Writer writer, String str) throws IOException | |
555 | { | |
556 | 0 | int len = str.length(); |
557 | 0 | for (int i = 0; i < len; i++) |
558 | { | |
559 | 0 | char c = str.charAt(i); |
560 | 0 | String entityName = this.entityName(c); |
561 | 0 | if (entityName == null) |
562 | { | |
563 | 0 | if (c > 0x7F) |
564 | { | |
565 | 0 | writer.write("&#"); |
566 | 0 | writer.write(Integer.toString(c, 10)); |
567 | 0 | writer.write(';'); |
568 | } | |
569 | else | |
570 | { | |
571 | 0 | writer.write(c); |
572 | } | |
573 | } | |
574 | else | |
575 | { | |
576 | 0 | writer.write('&'); |
577 | 0 | writer.write(entityName); |
578 | 0 | writer.write(';'); |
579 | } | |
580 | } | |
581 | 0 | } |
582 | ||
583 | /** | |
584 | * <p> | |
585 | * Unescapes the entities in a <code>String</code>. | |
586 | * </p> | |
587 | * <p> | |
588 | * For example, if you have called addEntity("foo", 0xA1), | |
589 | * unescape("&foo;") will return "\u00A1" | |
590 | * </p> | |
591 | * | |
592 | * @param str The <code>String</code> to escape. | |
593 | * @return A new escaped <code>String</code> or str itself if no unescaping | |
594 | * was necessary. | |
595 | */ | |
596 | public String unescape(String str) | |
597 | { | |
598 | 0 | int firstAmp = str.indexOf('&'); |
599 | 0 | if (firstAmp < 0) |
600 | { | |
601 | 0 | return str; |
602 | } | |
603 | ||
604 | 0 | StringBuffer buf = new StringBuffer(str.length()); |
605 | 0 | buf.append(str.substring(0, firstAmp)); |
606 | 0 | for (int i = firstAmp; i < str.length(); ++i) |
607 | { | |
608 | 0 | char ch = str.charAt(i); |
609 | 0 | if (ch == '&') |
610 | { | |
611 | 0 | int semi = str.indexOf(';', i + 1); |
612 | 0 | if (semi == -1) |
613 | { | |
614 | 0 | buf.append(ch); |
615 | 0 | continue; |
616 | } | |
617 | 0 | int amph = str.indexOf('&', i + 1); |
618 | 0 | if (amph != -1 && amph < semi) |
619 | { | |
620 | // Then the text looks like &...&...; | |
621 | 0 | buf.append(ch); |
622 | 0 | continue; |
623 | } | |
624 | 0 | String entityName = str.substring(i + 1, semi); |
625 | int entityValue; | |
626 | 0 | if (entityName.length() == 0) |
627 | { | |
628 | 0 | entityValue = -1; |
629 | } | |
630 | 0 | else if (entityName.charAt(0) == '#') |
631 | { | |
632 | 0 | if (entityName.length() == 1) |
633 | { | |
634 | 0 | entityValue = -1; |
635 | } | |
636 | else | |
637 | { | |
638 | 0 | char charAt1 = entityName.charAt(1); |
639 | try | |
640 | { | |
641 | 0 | if (charAt1 == 'x' || charAt1 == 'X') |
642 | { | |
643 | 0 | entityValue = Integer.valueOf(entityName.substring(2), 16).intValue(); |
644 | } | |
645 | else | |
646 | { | |
647 | 0 | entityValue = Integer.parseInt(entityName.substring(1)); |
648 | } | |
649 | 0 | if (entityValue > 0xFFFF) |
650 | { | |
651 | 0 | entityValue = -1; |
652 | } | |
653 | } | |
654 | 0 | catch (NumberFormatException ex) |
655 | { | |
656 | 0 | entityValue = -1; |
657 | 0 | } |
658 | 0 | } |
659 | } | |
660 | else | |
661 | { | |
662 | 0 | entityValue = this.entityValue(entityName); |
663 | } | |
664 | 0 | if (entityValue == -1) |
665 | { | |
666 | 0 | buf.append('&'); |
667 | 0 | buf.append(entityName); |
668 | 0 | buf.append(';'); |
669 | } | |
670 | else | |
671 | { | |
672 | 0 | buf.append((char) (entityValue)); |
673 | } | |
674 | 0 | i = semi; |
675 | 0 | } |
676 | else | |
677 | { | |
678 | 0 | buf.append(ch); |
679 | } | |
680 | } | |
681 | 0 | return buf.toString(); |
682 | } | |
683 | ||
684 | /** | |
685 | * <p> | |
686 | * Unescapes the escaped entities in the <code>String</code> passed and | |
687 | * writes the result to the <code>Writer</code> passed. | |
688 | * </p> | |
689 | * | |
690 | * @param writer The <code>Writer</code> to write the results to; assumed | |
691 | * to be non-null. | |
692 | * @param string The <code>String</code> to write the results to; assumed | |
693 | * to be non-null. | |
694 | * @throws IOException when <code>Writer</code> passed throws the exception | |
695 | * from calls to the {@link Writer#write(int)} methods. | |
696 | * @see #escape(String) | |
697 | * @see Writer | |
698 | */ | |
699 | public void unescape(Writer writer, String string) throws IOException | |
700 | { | |
701 | 0 | int firstAmp = string.indexOf('&'); |
702 | 0 | if (firstAmp < 0) |
703 | { | |
704 | 0 | writer.write(string); |
705 | 0 | return; |
706 | } | |
707 | ||
708 | 0 | writer.write(string, 0, firstAmp); |
709 | 0 | int len = string.length(); |
710 | 0 | for (int i = firstAmp; i < len; i++) |
711 | { | |
712 | 0 | char c = string.charAt(i); |
713 | 0 | if (c == '&') |
714 | { | |
715 | 0 | int nextIdx = i + 1; |
716 | 0 | int semiColonIdx = string.indexOf(';', nextIdx); |
717 | 0 | if (semiColonIdx == -1) |
718 | { | |
719 | 0 | writer.write(c); |
720 | 0 | continue; |
721 | } | |
722 | 0 | int amphersandIdx = string.indexOf('&', i + 1); |
723 | 0 | if (amphersandIdx != -1 && amphersandIdx < semiColonIdx) |
724 | { | |
725 | // Then the text looks like &...&...; | |
726 | 0 | writer.write(c); |
727 | 0 | continue; |
728 | } | |
729 | 0 | String entityContent = string.substring(nextIdx, semiColonIdx); |
730 | 0 | int entityValue = -1; |
731 | 0 | int entityContentLen = entityContent.length(); |
732 | 0 | if (entityContentLen > 0) |
733 | { | |
734 | 0 | if (entityContent.charAt(0) == '#') |
735 | { // escaped value content is an integer (decimal or | |
736 | // hexidecimal) | |
737 | 0 | if (entityContentLen > 1) |
738 | { | |
739 | 0 | char isHexChar = entityContent.charAt(1); |
740 | try | |
741 | { | |
742 | 0 | switch (isHexChar) |
743 | { | |
744 | case 'X' : | |
745 | case 'x' : | |
746 | 0 | entityValue = Integer.parseInt(entityContent.substring(2), 16); |
747 | 0 | break; |
748 | default : | |
749 | 0 | entityValue = Integer.parseInt(entityContent.substring(1), 10); |
750 | } | |
751 | 0 | if (entityValue > 0xFFFF) |
752 | { | |
753 | 0 | entityValue = -1; |
754 | } | |
755 | } | |
756 | 0 | catch (NumberFormatException e) |
757 | { | |
758 | 0 | entityValue = -1; |
759 | 0 | } |
760 | 0 | } |
761 | } | |
762 | else | |
763 | { // escaped value content is an entity name | |
764 | 0 | entityValue = this.entityValue(entityContent); |
765 | } | |
766 | } | |
767 | ||
768 | 0 | if (entityValue == -1) |
769 | { | |
770 | 0 | writer.write('&'); |
771 | 0 | writer.write(entityContent); |
772 | 0 | writer.write(';'); |
773 | } | |
774 | else | |
775 | { | |
776 | 0 | writer.write(entityValue); |
777 | } | |
778 | 0 | i = semiColonIdx; // move index up to the semi-colon |
779 | 0 | } |
780 | else | |
781 | { | |
782 | 0 | writer.write(c); |
783 | } | |
784 | } | |
785 | 0 | } |
786 | ||
787 | private static interface EntityMap | |
788 | { | |
789 | /** | |
790 | * <p> | |
791 | * Add an entry to this entity map. | |
792 | * </p> | |
793 | * | |
794 | * @param name the entity name | |
795 | * @param value the entity value | |
796 | */ | |
797 | void add(String name, int value); | |
798 | ||
799 | /** | |
800 | * <p> | |
801 | * Returns the name of the entity identified by the specified value. | |
802 | * </p> | |
803 | * | |
804 | * @param value the value to locate | |
805 | * @return entity name associated with the specified value | |
806 | */ | |
807 | String name(int value); | |
808 | ||
809 | /** | |
810 | * <p> | |
811 | * Returns the value of the entity identified by the specified name. | |
812 | * </p> | |
813 | * | |
814 | * @param name the name to locate | |
815 | * @return entity value associated with the specified name | |
816 | */ | |
817 | int value(String name); | |
818 | } | |
819 | ||
820 | 0 | private static class PrimitiveEntityMap implements EntityMap |
821 | { | |
822 | 0 | private Map mapNameToValue = new HashMap(); |
823 | 0 | private IntHashMap mapValueToName = new IntHashMap(); |
824 | ||
825 | /** | |
826 | * {@inheritDoc} | |
827 | */ | |
828 | public void add(String name, int value) | |
829 | { | |
830 | 0 | mapNameToValue.put(name, new Integer(value)); |
831 | 0 | mapValueToName.put(value, name); |
832 | 0 | } |
833 | ||
834 | /** | |
835 | * {@inheritDoc} | |
836 | */ | |
837 | public String name(int value) | |
838 | { | |
839 | 0 | return (String) mapValueToName.get(value); |
840 | } | |
841 | ||
842 | /** | |
843 | * {@inheritDoc} | |
844 | */ | |
845 | public int value(String name) | |
846 | { | |
847 | 0 | Object value = mapNameToValue.get(name); |
848 | 0 | if (value == null) |
849 | { | |
850 | 0 | return -1; |
851 | } | |
852 | 0 | return ((Integer) value).intValue(); |
853 | } | |
854 | } | |
855 | ||
856 | 0 | private static class LookupEntityMap extends PrimitiveEntityMap |
857 | { | |
858 | private static final int LOOKUP_TABLE_SIZE = 256; | |
859 | private String[] lookupTable; | |
860 | ||
861 | /** | |
862 | * {@inheritDoc} | |
863 | */ | |
864 | public String name(int value) | |
865 | { | |
866 | 0 | if (value < LOOKUP_TABLE_SIZE) |
867 | { | |
868 | 0 | return lookupTable()[value]; |
869 | } | |
870 | 0 | return super.name(value); |
871 | } | |
872 | ||
873 | /** | |
874 | * <p> | |
875 | * Returns the lookup table for this entity map. The lookup table is | |
876 | * created if it has not been previously. | |
877 | * </p> | |
878 | * | |
879 | * @return the lookup table | |
880 | */ | |
881 | private String[] lookupTable() | |
882 | { | |
883 | 0 | if (lookupTable == null) |
884 | { | |
885 | 0 | createLookupTable(); |
886 | } | |
887 | 0 | return lookupTable; |
888 | } | |
889 | ||
890 | /** | |
891 | * <p> | |
892 | * Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, | |
893 | * initialized with entity names. | |
894 | * </p> | |
895 | */ | |
896 | private void createLookupTable() | |
897 | { | |
898 | 0 | lookupTable = new String[LOOKUP_TABLE_SIZE]; |
899 | 0 | for (int i = 0; i < LOOKUP_TABLE_SIZE; ++i) |
900 | { | |
901 | 0 | lookupTable[i] = super.name(i); |
902 | } | |
903 | 0 | } |
904 | } | |
905 | ||
906 | /** | |
907 | * <p> | |
908 | * A hash map that uses primitive ints for the key rather than objects. | |
909 | * </p> | |
910 | * <p> | |
911 | * Note that this class is for internal optimization purposes only, and may | |
912 | * not be supported in future releases of Jakarta Commons Lang. Utilities of | |
913 | * this sort may be included in future releases of Jakarta Commons | |
914 | * Collections. | |
915 | * </p> | |
916 | * | |
917 | * @author Justin Couch | |
918 | * @author Alex Chaffee (alex@apache.org) | |
919 | * @author Stephen Colebourne | |
920 | * @since 2.0 | |
921 | * @version $Revision$ | |
922 | * @see java.util.HashMap | |
923 | */ | |
924 | 0 | private static class IntHashMap |
925 | { | |
926 | ||
927 | /** | |
928 | * The hash table data. | |
929 | */ | |
930 | private transient Entry table[]; | |
931 | ||
932 | /** | |
933 | * The total number of entries in the hash table. | |
934 | */ | |
935 | private transient int count; | |
936 | ||
937 | /** | |
938 | * The table is rehashed when its size exceeds this threshold. (The value | |
939 | * of this field is (int)(capacity * loadFactor).) | |
940 | * | |
941 | * @serial | |
942 | */ | |
943 | private int threshold; | |
944 | ||
945 | /** | |
946 | * The load factor for the hashtable. | |
947 | * | |
948 | * @serial | |
949 | */ | |
950 | private float loadFactor; | |
951 | ||
952 | /** | |
953 | * <p> | |
954 | * Innerclass that acts as a datastructure to create a new entry in the | |
955 | * table. | |
956 | * </p> | |
957 | */ | |
958 | private static class Entry | |
959 | { | |
960 | int hash; | |
961 | int key; | |
962 | Object value; | |
963 | Entry next; | |
964 | ||
965 | /** | |
966 | * <p> | |
967 | * Create a new entry with the given values. | |
968 | * </p> | |
969 | * | |
970 | * @param hash The code used to hash the object with | |
971 | * @param key The key used to enter this in the table | |
972 | * @param value The value for this key | |
973 | * @param next A reference to the next entry in the table | |
974 | */ | |
975 | protected Entry(int hash, int key, Object value, Entry next) | |
976 | 0 | { |
977 | 0 | this.hash = hash; |
978 | 0 | this.key = key; |
979 | 0 | this.value = value; |
980 | 0 | this.next = next; |
981 | 0 | } |
982 | } | |
983 | ||
984 | /** | |
985 | * <p> | |
986 | * Constructs a new, empty hashtable with a default capacity and load | |
987 | * factor, which is <code>20</code> and <code>0.75</code> | |
988 | * respectively. | |
989 | * </p> | |
990 | */ | |
991 | public IntHashMap() | |
992 | { | |
993 | 0 | this(20, 0.75f); |
994 | 0 | } |
995 | ||
996 | /** | |
997 | * <p> | |
998 | * Constructs a new, empty hashtable with the specified initial capacity | |
999 | * and default load factor, which is <code>0.75</code>. | |
1000 | * </p> | |
1001 | * | |
1002 | * @param initialCapacity the initial capacity of the hashtable. | |
1003 | * @throws IllegalArgumentException if the initial capacity is less than | |
1004 | * zero. | |
1005 | */ | |
1006 | public IntHashMap(int initialCapacity) | |
1007 | { | |
1008 | 0 | this(initialCapacity, 0.75f); |
1009 | 0 | } |
1010 | ||
1011 | /** | |
1012 | * <p> | |
1013 | * Constructs a new, empty hashtable with the specified initial capacity | |
1014 | * and the specified load factor. | |
1015 | * </p> | |
1016 | * | |
1017 | * @param initialCapacity the initial capacity of the hashtable. | |
1018 | * @param loadFactor the load factor of the hashtable. | |
1019 | * @throws IllegalArgumentException if the initial capacity is less than | |
1020 | * zero, or if the load factor is nonpositive. | |
1021 | */ | |
1022 | public IntHashMap(int initialCapacity, float loadFactor) | |
1023 | { | |
1024 | 0 | super(); |
1025 | 0 | if (initialCapacity < 0) |
1026 | { | |
1027 | 0 | throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity); |
1028 | } | |
1029 | 0 | if (loadFactor <= 0) |
1030 | { | |
1031 | 0 | throw new IllegalArgumentException("Illegal Load: " + loadFactor); |
1032 | } | |
1033 | 0 | if (initialCapacity == 0) |
1034 | { | |
1035 | 0 | initialCapacity = 1; |
1036 | } | |
1037 | ||
1038 | 0 | this.loadFactor = loadFactor; |
1039 | 0 | table = new Entry[initialCapacity]; |
1040 | 0 | threshold = (int) (initialCapacity * loadFactor); |
1041 | 0 | } |
1042 | ||
1043 | /** | |
1044 | * <p> | |
1045 | * Returns the number of keys in this hashtable. | |
1046 | * </p> | |
1047 | * | |
1048 | * @return the number of keys in this hashtable. | |
1049 | */ | |
1050 | public int size() | |
1051 | { | |
1052 | 0 | return count; |
1053 | } | |
1054 | ||
1055 | /** | |
1056 | * <p> | |
1057 | * Tests if this hashtable maps no keys to values. | |
1058 | * </p> | |
1059 | * | |
1060 | * @return <code>true</code> if this hashtable maps no keys to values; | |
1061 | * <code>false</code> otherwise. | |
1062 | */ | |
1063 | public boolean isEmpty() | |
1064 | { | |
1065 | 0 | return count == 0; |
1066 | } | |
1067 | ||
1068 | /** | |
1069 | * <p> | |
1070 | * Tests if some key maps into the specified value in this hashtable. | |
1071 | * This operation is more expensive than the <code>containsKey</code> | |
1072 | * method. | |
1073 | * </p> | |
1074 | * <p> | |
1075 | * Note that this method is identical in functionality to containsValue, | |
1076 | * (which is part of the Map interface in the collections framework). | |
1077 | * </p> | |
1078 | * | |
1079 | * @param value a value to search for. | |
1080 | * @return <code>true</code> if and only if some key maps to the | |
1081 | * <code>value</code> argument in this hashtable as determined | |
1082 | * by the <tt>equals</tt> method; <code>false</code> | |
1083 | * otherwise. | |
1084 | * @throws NullPointerException if the value is <code>null</code>. | |
1085 | * @see #containsKey(int) | |
1086 | * @see #containsValue(Object) | |
1087 | * @see java.util.Map | |
1088 | */ | |
1089 | public boolean contains(Object value) | |
1090 | { | |
1091 | 0 | if (value == null) |
1092 | { | |
1093 | 0 | throw new IllegalArgumentException("parameter value may not be null"); |
1094 | } | |
1095 | ||
1096 | 0 | Entry tab[] = table; |
1097 | 0 | for (int i = tab.length; i-- > 0;) |
1098 | { | |
1099 | 0 | for (Entry e = tab[i]; e != null; e = e.next) |
1100 | { | |
1101 | 0 | if (e.value.equals(value)) |
1102 | { | |
1103 | 0 | return true; |
1104 | } | |
1105 | } | |
1106 | } | |
1107 | 0 | return false; |
1108 | } | |
1109 | ||
1110 | /** | |
1111 | * <p> | |
1112 | * Returns <code>true</code> if this HashMap maps one or more keys to | |
1113 | * this value. | |
1114 | * </p> | |
1115 | * <p> | |
1116 | * Note that this method is identical in functionality to contains (which | |
1117 | * predates the Map interface). | |
1118 | * </p> | |
1119 | * | |
1120 | * @param value value whose presence in this HashMap is to be tested. | |
1121 | * @return boolean <code>true</code> if the value is contained | |
1122 | * @see java.util.Map | |
1123 | * @since JDK1.2 | |
1124 | */ | |
1125 | public boolean containsValue(Object value) | |
1126 | { | |
1127 | 0 | return contains(value); |
1128 | } | |
1129 | ||
1130 | /** | |
1131 | * <p> | |
1132 | * Tests if the specified object is a key in this hashtable. | |
1133 | * </p> | |
1134 | * | |
1135 | * @param key possible key. | |
1136 | * @return <code>true</code> if and only if the specified object is a | |
1137 | * key in this hashtable, as determined by the <tt>equals</tt> | |
1138 | * method; <code>false</code> otherwise. | |
1139 | * @see #contains(Object) | |
1140 | */ | |
1141 | public boolean containsKey(int key) | |
1142 | { | |
1143 | 0 | Entry tab[] = table; |
1144 | 0 | int hash = key; |
1145 | 0 | int index = (hash & 0x7FFFFFFF) % tab.length; |
1146 | 0 | for (Entry e = tab[index]; e != null; e = e.next) |
1147 | { | |
1148 | 0 | if (e.hash == hash) |
1149 | { | |
1150 | 0 | return true; |
1151 | } | |
1152 | } | |
1153 | 0 | return false; |
1154 | } | |
1155 | ||
1156 | /** | |
1157 | * <p> | |
1158 | * Returns the value to which the specified key is mapped in this map. | |
1159 | * </p> | |
1160 | * | |
1161 | * @param key a key in the hashtable. | |
1162 | * @return the value to which the key is mapped in this hashtable; | |
1163 | * <code>null</code> if the key is not mapped to any value in | |
1164 | * this hashtable. | |
1165 | * @see #put(int, Object) | |
1166 | */ | |
1167 | public Object get(int key) | |
1168 | { | |
1169 | 0 | Entry tab[] = table; |
1170 | 0 | int hash = key; |
1171 | 0 | int index = (hash & 0x7FFFFFFF) % tab.length; |
1172 | 0 | for (Entry e = tab[index]; e != null; e = e.next) |
1173 | { | |
1174 | 0 | if (e.hash == hash) |
1175 | { | |
1176 | 0 | return e.value; |
1177 | } | |
1178 | } | |
1179 | 0 | return null; |
1180 | } | |
1181 | ||
1182 | /** | |
1183 | * <p> | |
1184 | * Increases the capacity of and internally reorganizes this hashtable, | |
1185 | * in order to accommodate and access its entries more efficiently. | |
1186 | * </p> | |
1187 | * <p> | |
1188 | * This method is called automatically when the number of keys in the | |
1189 | * hashtable exceeds this hashtable's capacity and load factor. | |
1190 | * </p> | |
1191 | */ | |
1192 | protected void rehash() | |
1193 | { | |
1194 | 0 | int oldCapacity = table.length; |
1195 | 0 | Entry oldMap[] = table; |
1196 | ||
1197 | 0 | int newCapacity = oldCapacity * 2 + 1; |
1198 | 0 | Entry newMap[] = new Entry[newCapacity]; |
1199 | ||
1200 | 0 | threshold = (int) (newCapacity * loadFactor); |
1201 | 0 | table = newMap; |
1202 | ||
1203 | 0 | for (int i = oldCapacity; i-- > 0;) |
1204 | { | |
1205 | 0 | for (Entry old = oldMap[i]; old != null;) |
1206 | { | |
1207 | 0 | Entry e = old; |
1208 | 0 | old = old.next; |
1209 | ||
1210 | 0 | int index = (e.hash & 0x7FFFFFFF) % newCapacity; |
1211 | 0 | e.next = newMap[index]; |
1212 | 0 | newMap[index] = e; |
1213 | 0 | } |
1214 | } | |
1215 | 0 | } |
1216 | ||
1217 | /** | |
1218 | * <p> | |
1219 | * Maps the specified <code>key</code> to the specified | |
1220 | * <code>value</code> in this hashtable. The key cannot be | |
1221 | * <code>null</code>. | |
1222 | * </p> | |
1223 | * <p> | |
1224 | * The value can be retrieved by calling the <code>get</code> method | |
1225 | * with a key that is equal to the original key. | |
1226 | * </p> | |
1227 | * | |
1228 | * @param key the hashtable key. | |
1229 | * @param value the value. | |
1230 | * @return the previous value of the specified key in this hashtable, or | |
1231 | * <code>null</code> if it did not have one. | |
1232 | * @throws NullPointerException if the key is <code>null</code>. | |
1233 | * @see #get(int) | |
1234 | */ | |
1235 | public Object put(int key, Object value) | |
1236 | { | |
1237 | // Makes sure the key is not already in the hashtable. | |
1238 | 0 | Entry tab[] = table; |
1239 | 0 | int hash = key; |
1240 | 0 | int index = (hash & 0x7FFFFFFF) % tab.length; |
1241 | 0 | for (Entry e = tab[index]; e != null; e = e.next) |
1242 | { | |
1243 | 0 | if (e.hash == hash) |
1244 | { | |
1245 | 0 | Object old = e.value; |
1246 | 0 | e.value = value; |
1247 | 0 | return old; |
1248 | } | |
1249 | } | |
1250 | ||
1251 | 0 | if (count >= threshold) |
1252 | { | |
1253 | // Rehash the table if the threshold is exceeded | |
1254 | 0 | rehash(); |
1255 | ||
1256 | 0 | tab = table; |
1257 | 0 | index = (hash & 0x7FFFFFFF) % tab.length; |
1258 | } | |
1259 | ||
1260 | // Creates the new entry. | |
1261 | 0 | Entry e = new Entry(hash, key, value, tab[index]); |
1262 | 0 | tab[index] = e; |
1263 | 0 | count++; |
1264 | 0 | return null; |
1265 | } | |
1266 | ||
1267 | /** | |
1268 | * <p> | |
1269 | * Removes the key (and its corresponding value) from this hashtable. | |
1270 | * </p> | |
1271 | * <p> | |
1272 | * This method does nothing if the key is not present in the hashtable. | |
1273 | * </p> | |
1274 | * | |
1275 | * @param key the key that needs to be removed. | |
1276 | * @return the value to which the key had been mapped in this hashtable, | |
1277 | * or <code>null</code> if the key did not have a mapping. | |
1278 | */ | |
1279 | public Object remove(int key) | |
1280 | { | |
1281 | 0 | Entry tab[] = table; |
1282 | 0 | int hash = key; |
1283 | 0 | int index = (hash & 0x7FFFFFFF) % tab.length; |
1284 | 0 | for (Entry e = tab[index], prev = null; e != null; prev = e, e = e.next) |
1285 | { | |
1286 | 0 | if (e.hash == hash) |
1287 | { | |
1288 | 0 | if (prev != null) |
1289 | { | |
1290 | 0 | prev.next = e.next; |
1291 | } | |
1292 | else | |
1293 | { | |
1294 | 0 | tab[index] = e.next; |
1295 | } | |
1296 | 0 | count--; |
1297 | 0 | Object oldValue = e.value; |
1298 | 0 | e.value = null; |
1299 | 0 | return oldValue; |
1300 | } | |
1301 | } | |
1302 | 0 | return null; |
1303 | } | |
1304 | ||
1305 | /** | |
1306 | * <p>Clears this hashtable so that it contains no keys.</p> | |
1307 | */ | |
1308 | public synchronized void clear() | |
1309 | { | |
1310 | 0 | Entry tab[] = table; |
1311 | 0 | for (int index = tab.length; --index >= 0;) |
1312 | { | |
1313 | 0 | tab[index] = null; |
1314 | } | |
1315 | 0 | count = 0; |
1316 | 0 | } |
1317 | ||
1318 | } | |
1319 | ||
1320 | } | |
1321 | } |