View Javadoc

1   /*
2    * $Id: XMLEntityCodec.java 19191 2010-08-25 21:05:23Z tcarlson $
3    * --------------------------------------------------------------------------------------
4    * Copyright (c) MuleSoft, Inc.  All rights reserved.  http://www.mulesoft.com
5    *
6    * The software in this package is published under the terms of the CPAL v1.0
7    * license, a copy of which has been included with this distribution in the
8    * LICENSE.txt file.
9    */
10  
11  package org.mule.util;
12  
13  import java.io.IOException;
14  import java.io.Writer;
15  import java.util.HashMap;
16  import java.util.Map;
17  
18  /**
19   * This encoder contains methods that convert characters to Character entities as
20   * defined by http://www.w3.org/TR/REC-html40/sgml/entities.html. More precisely it
21   * combines the functionality of {@link org.apache.commons.lang.StringEscapeUtils#escapeXml(String)} and
22   * {@link org.apache.commons.lang.StringEscapeUtils#escapeHtml(String)} into a single pass.
23   */
24  // @ThreadSafe
25  public final class XMLEntityCodec
26  {
27      private static final Entities MuleEntities = new Entities();
28  
29      static
30      {
31          MuleEntities.addEntities(Entities.APOS_ARRAY);
32          MuleEntities.addEntities(Entities.BASIC_ARRAY);
33          MuleEntities.addEntities(Entities.ISO8859_1_ARRAY);
34          MuleEntities.addEntities(Entities.HTML40_ARRAY);
35      }
36  
37      protected XMLEntityCodec()
38      {
39          // no-op
40      }
41  
42      public static String encodeString(String str)
43      {
44          if (StringUtils.isEmpty(str))
45          {
46              return str;
47          }
48  
49          return MuleEntities.escape(str);
50      }
51  
52      public static String decodeString(String str)
53      {
54          if (StringUtils.isEmpty(str))
55          {
56              return str;
57          }
58  
59          return MuleEntities.unescape(str);
60      }
61  
62      /**
63       * <p>
64       * Returns the name of the entity identified by the specified value.
65       * </p>
66       * 
67       * @param value the value to locate
68       * @return entity name associated with the specified value
69       */
70      public static String entityName(int value)
71      {
72          return MuleEntities.map.name(value);
73      }
74  
75      /**
76       * <p>
77       * Returns the value of the entity identified by the specified name.
78       * </p>
79       * 
80       * @param name the name to locate
81       * @return entity value associated with the specified name
82       */
83      public static int entityValue(String name)
84      {
85          return MuleEntities.map.value(name);
86      }
87  
88  
89      //
90      // everything from here on is copied from commons-lang 2.2 + svn since it is not
91      // extensible and referencing the package-private class can lead to classloader
92      // problems :-(
93      //
94  
95      /*
96       * Licensed to the Apache Software Foundation (ASF) under one or more
97       * contributor license agreements.  See the NOTICE file distributed with
98       * this work for additional information regarding copyright ownership.
99       * The ASF licenses this file to You under the Apache License, Version 2.0
100      * (the "License"); you may not use this file except in compliance with
101      * the License.  You may obtain a copy of the License at
102      * 
103      *      http://www.apache.org/licenses/LICENSE-2.0
104      * 
105      * Unless required by applicable law or agreed to in writing, software
106      * distributed under the License is distributed on an "AS IS" BASIS,
107      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
108      * See the License for the specific language governing permissions and
109      * limitations under the License.
110      */
111 
112     /**
113      * <p>Provides HTML and XML entity utilities.</p>
114      *
115      * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
116      * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
117      * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
118      * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
119      * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
120      *
121      * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
122      * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
123      * @since 2.0
124      * @version $Id: XMLEntityCodec.java 19191 2010-08-25 21:05:23Z tcarlson $
125      */
126     private static class Entities
127     {
128 
129         private static final String[][] BASIC_ARRAY =
130         {
131             {"quot", "34"}, // " - double-quote
132             {"amp", "38"}, // & - ampersand
133             {"lt", "60"}, // < - less-than
134             {"gt", "62"}, // > - greater-than
135         };
136 
137         private static final String[][] APOS_ARRAY =
138         {
139             {"apos", "39"}, // XML apostrophe
140         };
141 
142         // package scoped for testing
143         static final String[][] ISO8859_1_ARRAY =
144         {
145             {"nbsp", "160"}, // non-breaking space
146             {"iexcl", "161"}, //inverted exclamation mark
147             {"cent", "162"}, //cent sign
148             {"pound", "163"}, //pound sign
149             {"curren", "164"}, //currency sign
150             {"yen", "165"}, //yen sign = yuan sign
151             {"brvbar", "166"}, //broken bar = broken vertical bar
152             {"sect", "167"}, //section sign
153             {"uml", "168"}, //diaeresis = spacing diaeresis
154             {"copy", "169"}, // © - copyright sign
155             {"ordf", "170"}, //feminine ordinal indicator
156             {"laquo", "171"}, //left-pointing double angle quotation mark = left pointing guillemet
157             {"not", "172"}, //not sign
158             {"shy", "173"}, //soft hyphen = discretionary hyphen
159             {"reg", "174"}, // ® - registered trademark sign
160             {"macr", "175"}, //macron = spacing macron = overline = APL overbar
161             {"deg", "176"}, //degree sign
162             {"plusmn", "177"}, //plus-minus sign = plus-or-minus sign
163             {"sup2", "178"}, //superscript two = superscript digit two = squared
164             {"sup3", "179"}, //superscript three = superscript digit three = cubed
165             {"acute", "180"}, //acute accent = spacing acute
166             {"micro", "181"}, //micro sign
167             {"para", "182"}, //pilcrow sign = paragraph sign
168             {"middot", "183"}, //middle dot = Georgian comma = Greek middle dot
169             {"cedil", "184"}, //cedilla = spacing cedilla
170             {"sup1", "185"}, //superscript one = superscript digit one
171             {"ordm", "186"}, //masculine ordinal indicator
172             {"raquo", "187"}, //right-pointing double angle quotation mark = right pointing guillemet
173             {"frac14", "188"}, //vulgar fraction one quarter = fraction one quarter
174             {"frac12", "189"}, //vulgar fraction one half = fraction one half
175             {"frac34", "190"}, //vulgar fraction three quarters = fraction three quarters
176             {"iquest", "191"}, //inverted question mark = turned question mark
177             {"Agrave", "192"}, // À - uppercase A, grave accent
178             {"Aacute", "193"}, // Á - uppercase A, acute accent
179             {"Acirc", "194"}, // Â - uppercase A, circumflex accent
180             {"Atilde", "195"}, // Ã - uppercase A, tilde
181             {"Auml", "196"}, // Ä - uppercase A, umlaut
182             {"Aring", "197"}, // Å - uppercase A, ring
183             {"AElig", "198"}, // Æ - uppercase AE
184             {"Ccedil", "199"}, // Ç - uppercase C, cedilla
185             {"Egrave", "200"}, // È - uppercase E, grave accent
186             {"Eacute", "201"}, // É - uppercase E, acute accent
187             {"Ecirc", "202"}, // Ê - uppercase E, circumflex accent
188             {"Euml", "203"}, // Ë - uppercase E, umlaut
189             {"Igrave", "204"}, // Ì - uppercase I, grave accent
190             {"Iacute", "205"}, // Í - uppercase I, acute accent
191             {"Icirc", "206"}, // Î - uppercase I, circumflex accent
192             {"Iuml", "207"}, // Ï - uppercase I, umlaut
193             {"ETH", "208"}, // Ð - uppercase Eth, Icelandic
194             {"Ntilde", "209"}, // Ñ - uppercase N, tilde
195             {"Ograve", "210"}, // Ò - uppercase O, grave accent
196             {"Oacute", "211"}, // Ó - uppercase O, acute accent
197             {"Ocirc", "212"}, // Ô - uppercase O, circumflex accent
198             {"Otilde", "213"}, // Õ - uppercase O, tilde
199             {"Ouml", "214"}, // Ö - uppercase O, umlaut
200             {"times", "215"}, //multiplication sign
201             {"Oslash", "216"}, // Ø - uppercase O, slash
202             {"Ugrave", "217"}, // Ù - uppercase U, grave accent
203             {"Uacute", "218"}, // Ú - uppercase U, acute accent
204             {"Ucirc", "219"}, // Û - uppercase U, circumflex accent
205             {"Uuml", "220"}, // Ü - uppercase U, umlaut
206             {"Yacute", "221"}, // Ý - uppercase Y, acute accent
207             {"THORN", "222"}, // Þ - uppercase THORN, Icelandic
208             {"szlig", "223"}, // ß - lowercase sharps, German
209             {"agrave", "224"}, // à - lowercase a, grave accent
210             {"aacute", "225"}, // á - lowercase a, acute accent
211             {"acirc", "226"}, // â - lowercase a, circumflex accent
212             {"atilde", "227"}, // ã - lowercase a, tilde
213             {"auml", "228"}, // ä - lowercase a, umlaut
214             {"aring", "229"}, // å - lowercase a, ring
215             {"aelig", "230"}, // æ - lowercase ae
216             {"ccedil", "231"}, // ç - lowercase c, cedilla
217             {"egrave", "232"}, // è - lowercase e, grave accent
218             {"eacute", "233"}, // é - lowercase e, acute accent
219             {"ecirc", "234"}, // ê - lowercase e, circumflex accent
220             {"euml", "235"}, // ë - lowercase e, umlaut
221             {"igrave", "236"}, // ì - lowercase i, grave accent
222             {"iacute", "237"}, // í - lowercase i, acute accent
223             {"icirc", "238"}, // î - lowercase i, circumflex accent
224             {"iuml", "239"}, // ï - lowercase i, umlaut
225             {"eth", "240"}, // ð - lowercase eth, Icelandic
226             {"ntilde", "241"}, // ñ - lowercase n, tilde
227             {"ograve", "242"}, // ò - lowercase o, grave accent
228             {"oacute", "243"}, // ó - lowercase o, acute accent
229             {"ocirc", "244"}, // ô - lowercase o, circumflex accent
230             {"otilde", "245"}, // õ - lowercase o, tilde
231             {"ouml", "246"}, // ö - lowercase o, umlaut
232             {"divide", "247"}, // division sign
233             {"oslash", "248"}, // ø - lowercase o, slash
234             {"ugrave", "249"}, // ù - lowercase u, grave accent
235             {"uacute", "250"}, // ú - lowercase u, acute accent
236             {"ucirc", "251"}, // û - lowercase u, circumflex accent
237             {"uuml", "252"}, // ü - lowercase u, umlaut
238             {"yacute", "253"}, // ý - lowercase y, acute accent
239             {"thorn", "254"}, // þ - lowercase thorn, Icelandic
240             {"yuml", "255"}, // ÿ - lowercase y, umlaut
241         };
242 
243         // http://www.w3.org/TR/REC-html40/sgml/entities.html
244         // package scoped for testing
245         static final String[][] HTML40_ARRAY =
246         {
247 //     <!-- Latin Extended-B -->
248             {"fnof", "402"}, //latin small f with hook = function= florin, U+0192 ISOtech -->
249 //     <!-- Greek -->
250             {"Alpha", "913"}, //greek capital letter alpha, U+0391 -->
251             {"Beta", "914"}, //greek capital letter beta, U+0392 -->
252             {"Gamma", "915"}, //greek capital letter gamma,U+0393 ISOgrk3 -->
253             {"Delta", "916"}, //greek capital letter delta,U+0394 ISOgrk3 -->
254             {"Epsilon", "917"}, //greek capital letter epsilon, U+0395 -->
255             {"Zeta", "918"}, //greek capital letter zeta, U+0396 -->
256             {"Eta", "919"}, //greek capital letter eta, U+0397 -->
257             {"Theta", "920"}, //greek capital letter theta,U+0398 ISOgrk3 -->
258             {"Iota", "921"}, //greek capital letter iota, U+0399 -->
259             {"Kappa", "922"}, //greek capital letter kappa, U+039A -->
260             {"Lambda", "923"}, //greek capital letter lambda,U+039B ISOgrk3 -->
261             {"Mu", "924"}, //greek capital letter mu, U+039C -->
262             {"Nu", "925"}, //greek capital letter nu, U+039D -->
263             {"Xi", "926"}, //greek capital letter xi, U+039E ISOgrk3 -->
264             {"Omicron", "927"}, //greek capital letter omicron, U+039F -->
265             {"Pi", "928"}, //greek capital letter pi, U+03A0 ISOgrk3 -->
266             {"Rho", "929"}, //greek capital letter rho, U+03A1 -->
267 //     <!-- there is no Sigmaf, and no U+03A2 character either -->
268             {"Sigma", "931"}, //greek capital letter sigma,U+03A3 ISOgrk3 -->
269             {"Tau", "932"}, //greek capital letter tau, U+03A4 -->
270             {"Upsilon", "933"}, //greek capital letter upsilon,U+03A5 ISOgrk3 -->
271             {"Phi", "934"}, //greek capital letter phi,U+03A6 ISOgrk3 -->
272             {"Chi", "935"}, //greek capital letter chi, U+03A7 -->
273             {"Psi", "936"}, //greek capital letter psi,U+03A8 ISOgrk3 -->
274             {"Omega", "937"}, //greek capital letter omega,U+03A9 ISOgrk3 -->
275             {"alpha", "945"}, //greek small letter alpha,U+03B1 ISOgrk3 -->
276             {"beta", "946"}, //greek small letter beta, U+03B2 ISOgrk3 -->
277             {"gamma", "947"}, //greek small letter gamma,U+03B3 ISOgrk3 -->
278             {"delta", "948"}, //greek small letter delta,U+03B4 ISOgrk3 -->
279             {"epsilon", "949"}, //greek small letter epsilon,U+03B5 ISOgrk3 -->
280             {"zeta", "950"}, //greek small letter zeta, U+03B6 ISOgrk3 -->
281             {"eta", "951"}, //greek small letter eta, U+03B7 ISOgrk3 -->
282             {"theta", "952"}, //greek small letter theta,U+03B8 ISOgrk3 -->
283             {"iota", "953"}, //greek small letter iota, U+03B9 ISOgrk3 -->
284             {"kappa", "954"}, //greek small letter kappa,U+03BA ISOgrk3 -->
285             {"lambda", "955"}, //greek small letter lambda,U+03BB ISOgrk3 -->
286             {"mu", "956"}, //greek small letter mu, U+03BC ISOgrk3 -->
287             {"nu", "957"}, //greek small letter nu, U+03BD ISOgrk3 -->
288             {"xi", "958"}, //greek small letter xi, U+03BE ISOgrk3 -->
289             {"omicron", "959"}, //greek small letter omicron, U+03BF NEW -->
290             {"pi", "960"}, //greek small letter pi, U+03C0 ISOgrk3 -->
291             {"rho", "961"}, //greek small letter rho, U+03C1 ISOgrk3 -->
292             {"sigmaf", "962"}, //greek small letter final sigma,U+03C2 ISOgrk3 -->
293             {"sigma", "963"}, //greek small letter sigma,U+03C3 ISOgrk3 -->
294             {"tau", "964"}, //greek small letter tau, U+03C4 ISOgrk3 -->
295             {"upsilon", "965"}, //greek small letter upsilon,U+03C5 ISOgrk3 -->
296             {"phi", "966"}, //greek small letter phi, U+03C6 ISOgrk3 -->
297             {"chi", "967"}, //greek small letter chi, U+03C7 ISOgrk3 -->
298             {"psi", "968"}, //greek small letter psi, U+03C8 ISOgrk3 -->
299             {"omega", "969"}, //greek small letter omega,U+03C9 ISOgrk3 -->
300             {"thetasym", "977"}, //greek small letter theta symbol,U+03D1 NEW -->
301             {"upsih", "978"}, //greek upsilon with hook symbol,U+03D2 NEW -->
302             {"piv", "982"}, //greek pi symbol, U+03D6 ISOgrk3 -->
303 //     <!-- General Punctuation -->
304             {"bull", "8226"}, //bullet = black small circle,U+2022 ISOpub  -->
305 //     <!-- bullet is NOT the same as bullet operator, U+2219 -->
306             {"hellip", "8230"}, //horizontal ellipsis = three dot leader,U+2026 ISOpub  -->
307             {"prime", "8242"}, //prime = minutes = feet, U+2032 ISOtech -->
308             {"Prime", "8243"}, //double prime = seconds = inches,U+2033 ISOtech -->
309             {"oline", "8254"}, //overline = spacing overscore,U+203E NEW -->
310             {"frasl", "8260"}, //fraction slash, U+2044 NEW -->
311 //     <!-- Letterlike Symbols -->
312             {"weierp", "8472"}, //script capital P = power set= Weierstrass p, U+2118 ISOamso -->
313             {"image", "8465"}, //blackletter capital I = imaginary part,U+2111 ISOamso -->
314             {"real", "8476"}, //blackletter capital R = real part symbol,U+211C ISOamso -->
315             {"trade", "8482"}, //trade mark sign, U+2122 ISOnum -->
316             {"alefsym", "8501"}, //alef symbol = first transfinite cardinal,U+2135 NEW -->
317 //     <!-- alef symbol is NOT the same as hebrew letter alef,U+05D0 although the 
318 //          same glyph could be used to depict both characters -->
319 //     <!-- Arrows -->
320             {"larr", "8592"}, //leftwards arrow, U+2190 ISOnum -->
321             {"uarr", "8593"}, //upwards arrow, U+2191 ISOnum-->
322             {"rarr", "8594"}, //rightwards arrow, U+2192 ISOnum -->
323             {"darr", "8595"}, //downwards arrow, U+2193 ISOnum -->
324             {"harr", "8596"}, //left right arrow, U+2194 ISOamsa -->
325             {"crarr", "8629"}, //downwards arrow with corner leftwards= carriage return, U+21B5 NEW -->
326             {"lArr", "8656"}, //leftwards double arrow, U+21D0 ISOtech -->
327 //     <!-- ISO 10646 does not say that lArr is the same as the 'is implied by' 
328 //          arrow but also does not have any other character for that function. 
329 //          So ? lArr canbe used for 'is implied by' as ISOtech suggests -->
330             {"uArr", "8657"}, //upwards double arrow, U+21D1 ISOamsa -->
331             {"rArr", "8658"}, //rightwards double arrow,U+21D2 ISOtech -->
332 //     <!-- ISO 10646 does not say this is the 'implies' character but does not
333 //          have another character with this function so ?rArr can be used for
334 //          'implies' as ISOtech suggests -->
335             {"dArr", "8659"}, //downwards double arrow, U+21D3 ISOamsa -->
336             {"hArr", "8660"}, //left right double arrow,U+21D4 ISOamsa -->
337 //     <!-- Mathematical Operators -->
338             {"forall", "8704"}, //for all, U+2200 ISOtech -->
339             {"part", "8706"}, //partial differential, U+2202 ISOtech  -->
340             {"exist", "8707"}, //there exists, U+2203 ISOtech -->
341             {"empty", "8709"}, //empty set = null set = diameter,U+2205 ISOamso -->
342             {"nabla", "8711"}, //nabla = backward difference,U+2207 ISOtech -->
343             {"isin", "8712"}, //element of, U+2208 ISOtech -->
344             {"notin", "8713"}, //not an element of, U+2209 ISOtech -->
345             {"ni", "8715"}, //contains as member, U+220B ISOtech -->
346 //     <!-- should there be a more memorable name than 'ni'? -->
347             {"prod", "8719"}, //n-ary product = product sign,U+220F ISOamsb -->
348 //     <!-- prod is NOT the same character as U+03A0 'greek capital letter pi' 
349 //          though the same glyph might be used for both -->
350             {"sum", "8721"}, //n-ary summation, U+2211 ISOamsb -->
351 //     <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
352 //          though the same glyph might be used for both -->
353             {"minus", "8722"}, //minus sign, U+2212 ISOtech -->
354             {"lowast", "8727"}, //asterisk operator, U+2217 ISOtech -->
355             {"radic", "8730"}, //square root = radical sign,U+221A ISOtech -->
356             {"prop", "8733"}, //proportional to, U+221D ISOtech -->
357             {"infin", "8734"}, //infinity, U+221E ISOtech -->
358             {"ang", "8736"}, //angle, U+2220 ISOamso -->
359             {"and", "8743"}, //logical and = wedge, U+2227 ISOtech -->
360             {"or", "8744"}, //logical or = vee, U+2228 ISOtech -->
361             {"cap", "8745"}, //intersection = cap, U+2229 ISOtech -->
362             {"cup", "8746"}, //union = cup, U+222A ISOtech -->
363             {"int", "8747"}, //integral, U+222B ISOtech -->
364             {"there4", "8756"}, //therefore, U+2234 ISOtech -->
365             {"sim", "8764"}, //tilde operator = varies with = similar to,U+223C ISOtech -->
366 //     <!-- tilde operator is NOT the same character as the tilde, U+007E,although
367 //          the same glyph might be used to represent both  -->
368             {"cong", "8773"}, //approximately equal to, U+2245 ISOtech -->
369             {"asymp", "8776"}, //almost equal to = asymptotic to,U+2248 ISOamsr -->
370             {"ne", "8800"}, //not equal to, U+2260 ISOtech -->
371             {"equiv", "8801"}, //identical to, U+2261 ISOtech -->
372             {"le", "8804"}, //less-than or equal to, U+2264 ISOtech -->
373             {"ge", "8805"}, //greater-than or equal to,U+2265 ISOtech -->
374             {"sub", "8834"}, //subset of, U+2282 ISOtech -->
375             {"sup", "8835"}, //superset of, U+2283 ISOtech -->
376 //     <!-- note that nsup, 'not a superset of, U+2283' is not covered by the
377 //          Symbol font encoding and is not included. Should it be, for symmetry?
378 //          It is in ISOamsn  --> <!ENTITY nsub", "8836"},  
379 //          not a subset of, U+2284 ISOamsn -->
380             {"sube", "8838"}, //subset of or equal to, U+2286 ISOtech -->
381             {"supe", "8839"}, //superset of or equal to,U+2287 ISOtech -->
382             {"oplus", "8853"}, //circled plus = direct sum,U+2295 ISOamsb -->
383             {"otimes", "8855"}, //circled times = vector product,U+2297 ISOamsb -->
384             {"perp", "8869"}, //up tack = orthogonal to = perpendicular,U+22A5 ISOtech -->
385             {"sdot", "8901"}, //dot operator, U+22C5 ISOamsb -->
386 //     <!-- dot operator is NOT the same character as U+00B7 middle dot -->
387 //     <!-- Miscellaneous Technical -->
388             {"lceil", "8968"}, //left ceiling = apl upstile,U+2308 ISOamsc  -->
389             {"rceil", "8969"}, //right ceiling, U+2309 ISOamsc  -->
390             {"lfloor", "8970"}, //left floor = apl downstile,U+230A ISOamsc  -->
391             {"rfloor", "8971"}, //right floor, U+230B ISOamsc  -->
392             {"lang", "9001"}, //left-pointing angle bracket = bra,U+2329 ISOtech -->
393 //     <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' -->
394             {"rang", "9002"}, //right-pointing angle bracket = ket,U+232A ISOtech -->
395 //     <!-- rang is NOT the same character as U+003E 'greater than' or U+203A 
396 //          'single right-pointing angle quotation mark' -->
397 //     <!-- Geometric Shapes -->
398             {"loz", "9674"}, //lozenge, U+25CA ISOpub -->
399 //     <!-- Miscellaneous Symbols -->
400             {"spades", "9824"}, //black spade suit, U+2660 ISOpub -->
401 //     <!-- black here seems to mean filled as opposed to hollow -->
402             {"clubs", "9827"}, //black club suit = shamrock,U+2663 ISOpub -->
403             {"hearts", "9829"}, //black heart suit = valentine,U+2665 ISOpub -->
404             {"diams", "9830"}, //black diamond suit, U+2666 ISOpub -->
405 
406 //     <!-- Latin Extended-A -->
407             {"OElig", "338"}, //  -- latin capital ligature OE,U+0152 ISOlat2 -->
408             {"oelig", "339"}, //  -- latin small ligature oe, U+0153 ISOlat2 -->
409 //     <!-- ligature is a misnomer, this is a separate character in some languages -->
410             {"Scaron", "352"}, //  -- latin capital letter S with caron,U+0160 ISOlat2 -->
411             {"scaron", "353"}, //  -- latin small letter s with caron,U+0161 ISOlat2 -->
412             {"Yuml", "376"}, //  -- latin capital letter Y with diaeresis,U+0178 ISOlat2 -->
413 //     <!-- Spacing Modifier Letters -->
414             {"circ", "710"}, //  -- modifier letter circumflex accent,U+02C6 ISOpub -->
415             {"tilde", "732"}, //small tilde, U+02DC ISOdia -->
416 //     <!-- General Punctuation -->
417             {"ensp", "8194"}, //en space, U+2002 ISOpub -->
418             {"emsp", "8195"}, //em space, U+2003 ISOpub -->
419             {"thinsp", "8201"}, //thin space, U+2009 ISOpub -->
420             {"zwnj", "8204"}, //zero width non-joiner,U+200C NEW RFC 2070 -->
421             {"zwj", "8205"}, //zero width joiner, U+200D NEW RFC 2070 -->
422             {"lrm", "8206"}, //left-to-right mark, U+200E NEW RFC 2070 -->
423             {"rlm", "8207"}, //right-to-left mark, U+200F NEW RFC 2070 -->
424             {"ndash", "8211"}, //en dash, U+2013 ISOpub -->
425             {"mdash", "8212"}, //em dash, U+2014 ISOpub -->
426             {"lsquo", "8216"}, //left single quotation mark,U+2018 ISOnum -->
427             {"rsquo", "8217"}, //right single quotation mark,U+2019 ISOnum -->
428             {"sbquo", "8218"}, //single low-9 quotation mark, U+201A NEW -->
429             {"ldquo", "8220"}, //left double quotation mark,U+201C ISOnum -->
430             {"rdquo", "8221"}, //right double quotation mark,U+201D ISOnum -->
431             {"bdquo", "8222"}, //double low-9 quotation mark, U+201E NEW -->
432             {"dagger", "8224"}, //dagger, U+2020 ISOpub -->
433             {"Dagger", "8225"}, //double dagger, U+2021 ISOpub -->
434             {"permil", "8240"}, //per mille sign, U+2030 ISOtech -->
435             {"lsaquo", "8249"}, //single left-pointing angle quotation mark,U+2039 ISO proposed -->
436 //     <!-- lsaquo is proposed but not yet ISO standardized -->
437             {"rsaquo", "8250"}, //single right-pointing angle quotation mark,U+203A ISO proposed -->
438 //     <!-- rsaquo is proposed but not yet ISO standardized -->
439             {"euro", "8364"}, //  -- euro sign, U+20AC NEW -->
440         };
441 
442         // package scoped for testing
443         private EntityMap map = new Entities.LookupEntityMap();
444 
445         /**
446          * <p>
447          * Adds entities to this entity.
448          * </p>
449          * 
450          * @param entityArray array of entities to be added
451          */
452         public void addEntities(String[][] entityArray)
453         {
454             for (int i = 0; i < entityArray.length; ++i)
455             {
456                 addEntity(entityArray[i][0], Integer.parseInt(entityArray[i][1]));
457             }
458         }
459 
460         /**
461          * <p>
462          * Add an entity to this entity.
463          * </p>
464          * 
465          * @param name name of the entity
466          * @param value vale of the entity
467          */
468         public void addEntity(String name, int value)
469         {
470             map.add(name, value);
471         }
472 
473         /**
474          * <p>
475          * Returns the name of the entity identified by the specified value.
476          * </p>
477          * 
478          * @param value the value to locate
479          * @return entity name associated with the specified value
480          */
481         public String entityName(int value)
482         {
483             return map.name(value);
484         }
485 
486         /**
487          * <p>
488          * Returns the value of the entity identified by the specified name.
489          * </p>
490          * 
491          * @param name the name to locate
492          * @return entity value associated with the specified name
493          */
494         public int entityValue(String name)
495         {
496             return map.value(name);
497         }
498 
499         /**
500          * <p>
501          * Escapes the characters in a <code>String</code>.
502          * </p>
503          * <p>
504          * For example, if you have called addEntity(&quot;foo&quot;, 0xA1),
505          * escape(&quot;\u00A1&quot;) will return &quot;&amp;foo;&quot;
506          * </p>
507          * 
508          * @param str The <code>String</code> to escape.
509          * @return A new escaped <code>String</code>.
510          */
511         public String escape(String str)
512         {
513             // TODO: rewrite to use a Writer
514             StringBuffer buf = new StringBuffer(str.length() * 2);
515             for (int i = 0; i < str.length(); ++i)
516             {
517                 char ch = str.charAt(i);
518                 String entityName = this.entityName(ch);
519                 if (entityName == null)
520                 {
521                     if (ch > 0x7F)
522                     {
523                         buf.append('&');
524                         buf.append('#');
525                         buf.append((int) ch);
526                         buf.append(';');
527                     }
528                     else
529                     {
530                         buf.append(ch);
531                     }
532                 }
533                 else
534                 {
535                     buf.append('&');
536                     buf.append(entityName);
537                     buf.append(';');
538                 }
539             }
540             return buf.toString();
541         }
542 
543         /**
544          * <p>
545          * Escapes the characters in the <code>String</code> passed and writes the
546          * result to the <code>Writer</code> passed.
547          * </p>
548          * 
549          * @param writer The <code>Writer</code> to write the results of the
550          *            escaping to. Assumed to be a non-null value.
551          * @param str The <code>String</code> to escape. Assumed to be a non-null
552          *            value.
553          * @throws IOException when <code>Writer</code> passed throws the exception
554          *             from calls to the {@link Writer#write(int)} methods.
555          * @see #escape(String)
556          * @see Writer
557          */
558         public void escape(Writer writer, String str) throws IOException
559         {
560             int len = str.length();
561             for (int i = 0; i < len; i++)
562             {
563                 char c = str.charAt(i);
564                 String entityName = this.entityName(c);
565                 if (entityName == null)
566                 {
567                     if (c > 0x7F)
568                     {
569                         writer.write("&#");
570                         writer.write(Integer.toString(c, 10));
571                         writer.write(';');
572                     }
573                     else
574                     {
575                         writer.write(c);
576                     }
577                 }
578                 else
579                 {
580                     writer.write('&');
581                     writer.write(entityName);
582                     writer.write(';');
583                 }
584             }
585         }
586 
587         /**
588          * <p>
589          * Unescapes the entities in a <code>String</code>.
590          * </p>
591          * <p>
592          * For example, if you have called addEntity(&quot;foo&quot;, 0xA1),
593          * unescape(&quot;&amp;foo;&quot;) will return &quot;\u00A1&quot;
594          * </p>
595          * 
596          * @param str The <code>String</code> to escape.
597          * @return A new escaped <code>String</code> or str itself if no unescaping
598          *         was necessary.
599          */
600         public String unescape(String str)
601         {
602             int firstAmp = str.indexOf('&');
603             if (firstAmp < 0)
604             {
605                 return str;
606             }
607 
608             StringBuffer buf = new StringBuffer(str.length());
609             buf.append(str.substring(0, firstAmp));
610             for (int i = firstAmp; i < str.length(); ++i)
611             {
612                 char ch = str.charAt(i);
613                 if (ch == '&')
614                 {
615                     int semi = str.indexOf(';', i + 1);
616                     if (semi == -1)
617                     {
618                         buf.append(ch);
619                         continue;
620                     }
621                     int amph = str.indexOf('&', i + 1);
622                     if (amph != -1 && amph < semi)
623                     {
624                         // Then the text looks like &...&...;
625                         buf.append(ch);
626                         continue;
627                     }
628                     String entityName = str.substring(i + 1, semi);
629                     int entityValue;
630                     if (entityName.length() == 0)
631                     {
632                         entityValue = -1;
633                     }
634                     else if (entityName.charAt(0) == '#')
635                     {
636                         if (entityName.length() == 1)
637                         {
638                             entityValue = -1;
639                         }
640                         else
641                         {
642                             char charAt1 = entityName.charAt(1);
643                             try
644                             {
645                                 if (charAt1 == 'x' || charAt1 == 'X')
646                                 {
647                                     entityValue = Integer.valueOf(entityName.substring(2), 16).intValue();
648                                 }
649                                 else
650                                 {
651                                     entityValue = Integer.parseInt(entityName.substring(1));
652                                 }
653                                 if (entityValue > 0xFFFF)
654                                 {
655                                     entityValue = -1;
656                                 }
657                             }
658                             catch (NumberFormatException ex)
659                             {
660                                 entityValue = -1;
661                             }
662                         }
663                     }
664                     else
665                     {
666                         entityValue = this.entityValue(entityName);
667                     }
668                     if (entityValue == -1)
669                     {
670                         buf.append('&');
671                         buf.append(entityName);
672                         buf.append(';');
673                     }
674                     else
675                     {
676                         buf.append((char) (entityValue));
677                     }
678                     i = semi;
679                 }
680                 else
681                 {
682                     buf.append(ch);
683                 }
684             }
685             return buf.toString();
686         }
687 
688         /**
689          * <p>
690          * Unescapes the escaped entities in the <code>String</code> passed and
691          * writes the result to the <code>Writer</code> passed.
692          * </p>
693          * 
694          * @param writer The <code>Writer</code> to write the results to; assumed
695          *            to be non-null.
696          * @param string The <code>String</code> to write the results to; assumed
697          *            to be non-null.
698          * @throws IOException when <code>Writer</code> passed throws the exception
699          *             from calls to the {@link Writer#write(int)} methods.
700          * @see #escape(String)
701          * @see Writer
702          */
703         public void unescape(Writer writer, String string) throws IOException
704         {
705             int firstAmp = string.indexOf('&');
706             if (firstAmp < 0)
707             {
708                 writer.write(string);
709                 return;
710             }
711 
712             writer.write(string, 0, firstAmp);
713             int len = string.length();
714             for (int i = firstAmp; i < len; i++)
715             {
716                 char c = string.charAt(i);
717                 if (c == '&')
718                 {
719                     int nextIdx = i + 1;
720                     int semiColonIdx = string.indexOf(';', nextIdx);
721                     if (semiColonIdx == -1)
722                     {
723                         writer.write(c);
724                         continue;
725                     }
726                     int amphersandIdx = string.indexOf('&', i + 1);
727                     if (amphersandIdx != -1 && amphersandIdx < semiColonIdx)
728                     {
729                         // Then the text looks like &...&...;
730                         writer.write(c);
731                         continue;
732                     }
733                     String entityContent = string.substring(nextIdx, semiColonIdx);
734                     int entityValue = -1;
735                     int entityContentLen = entityContent.length();
736                     if (entityContentLen > 0)
737                     {
738                         if (entityContent.charAt(0) == '#')
739                         { // escaped value content is an integer (decimal or
740                             // hexidecimal)
741                             if (entityContentLen > 1)
742                             {
743                                 char isHexChar = entityContent.charAt(1);
744                                 try
745                                 {
746                                     switch (isHexChar)
747                                     {
748                                         case 'X' :
749                                         case 'x' :
750                                             entityValue = Integer.parseInt(entityContent.substring(2), 16);
751                                             break;
752                                         default :
753                                             entityValue = Integer.parseInt(entityContent.substring(1), 10);
754                                     }
755                                     if (entityValue > 0xFFFF)
756                                     {
757                                         entityValue = -1;
758                                     }
759                                 }
760                                 catch (NumberFormatException e)
761                                 {
762                                     entityValue = -1;
763                                 }
764                             }
765                         }
766                         else
767                         { // escaped value content is an entity name
768                             entityValue = this.entityValue(entityContent);
769                         }
770                     }
771 
772                     if (entityValue == -1)
773                     {
774                         writer.write('&');
775                         writer.write(entityContent);
776                         writer.write(';');
777                     }
778                     else
779                     {
780                         writer.write(entityValue);
781                     }
782                     i = semiColonIdx; // move index up to the semi-colon
783                 }
784                 else
785                 {
786                     writer.write(c);
787                 }
788             }
789         }
790         
791         private static interface EntityMap
792         {
793             /**
794              * <p>
795              * Add an entry to this entity map.
796              * </p>
797              * 
798              * @param name the entity name
799              * @param value the entity value
800              */
801             void add(String name, int value);
802 
803             /**
804              * <p>
805              * Returns the name of the entity identified by the specified value.
806              * </p>
807              * 
808              * @param value the value to locate
809              * @return entity name associated with the specified value
810              */
811             String name(int value);
812 
813             /**
814              * <p>
815              * Returns the value of the entity identified by the specified name.
816              * </p>
817              * 
818              * @param name the name to locate
819              * @return entity value associated with the specified name
820              */
821             int value(String name);
822         }
823 
824         private static class PrimitiveEntityMap implements EntityMap
825         {
826             private Map mapNameToValue = new HashMap();
827             private IntHashMap mapValueToName = new IntHashMap();
828 
829             /**
830              * {@inheritDoc}
831              */
832             public void add(String name, int value)
833             {
834                 mapNameToValue.put(name, new Integer(value));
835                 mapValueToName.put(value, name);
836             }
837 
838             /**
839              * {@inheritDoc}
840              */
841             public String name(int value)
842             {
843                 return (String) mapValueToName.get(value);
844             }
845 
846             /**
847              * {@inheritDoc}
848              */
849             public int value(String name)
850             {
851                 Object value = mapNameToValue.get(name);
852                 if (value == null)
853                 {
854                     return -1;
855                 }
856                 return ((Integer) value).intValue();
857             }
858         }
859 
860         private static class LookupEntityMap extends PrimitiveEntityMap
861         {
862             private static final int LOOKUP_TABLE_SIZE = 256;
863             private String[] lookupTable;
864 
865             /**
866              * {@inheritDoc}
867              */
868             public String name(int value)
869             {
870                 if (value < LOOKUP_TABLE_SIZE)
871                 {
872                     return lookupTable()[value];
873                 }
874                 return super.name(value);
875             }
876 
877             /**
878              * <p>
879              * Returns the lookup table for this entity map. The lookup table is
880              * created if it has not been previously.
881              * </p>
882              * 
883              * @return the lookup table
884              */
885             private String[] lookupTable()
886             {
887                 if (lookupTable == null)
888                 {
889                     createLookupTable();
890                 }
891                 return lookupTable;
892             }
893 
894             /**
895              * <p>
896              * Creates an entity lookup table of LOOKUP_TABLE_SIZE elements,
897              * initialized with entity names.
898              * </p>
899              */
900             private void createLookupTable()
901             {
902                 lookupTable = new String[LOOKUP_TABLE_SIZE];
903                 for (int i = 0; i < LOOKUP_TABLE_SIZE; ++i)
904                 {
905                     lookupTable[i] = super.name(i);
906                 }
907             }
908         }
909 
910         /**
911          * <p>
912          * A hash map that uses primitive ints for the key rather than objects.
913          * </p>
914          * <p>
915          * Note that this class is for internal optimization purposes only, and may
916          * not be supported in future releases of Jakarta Commons Lang. Utilities of
917          * this sort may be included in future releases of Jakarta Commons
918          * Collections.
919          * </p>
920          * 
921          * @author Justin Couch
922          * @author Alex Chaffee (alex@apache.org)
923          * @author Stephen Colebourne
924          * @since 2.0
925          * @version $Revision: 19191 $
926          * @see java.util.HashMap
927          */
928         private static class IntHashMap
929         {
930 
931             /**
932              * The hash table data.
933              */
934             private transient Entry table[];
935 
936             /**
937              * The total number of entries in the hash table.
938              */
939             private transient int count;
940 
941             /**
942              * The table is rehashed when its size exceeds this threshold. (The value
943              * of this field is (int)(capacity * loadFactor).)
944              * 
945              * @serial
946              */
947             private int threshold;
948 
949             /**
950              * The load factor for the hashtable.
951              * 
952              * @serial
953              */
954             private float loadFactor;
955 
956             /**
957              * <p>
958              * Innerclass that acts as a datastructure to create a new entry in the
959              * table.
960              * </p>
961              */
962             private static class Entry
963             {
964                 int hash;
965                 int key;
966                 Object value;
967                 Entry next;
968 
969                 /**
970                  * <p>
971                  * Create a new entry with the given values.
972                  * </p>
973                  * 
974                  * @param hash The code used to hash the object with
975                  * @param key The key used to enter this in the table
976                  * @param value The value for this key
977                  * @param next A reference to the next entry in the table
978                  */
979                 protected Entry(int hash, int key, Object value, Entry next)
980                 {
981                     this.hash = hash;
982                     this.key = key;
983                     this.value = value;
984                     this.next = next;
985                 }
986             }
987 
988             /**
989              * <p>
990              * Constructs a new, empty hashtable with a default capacity and load
991              * factor, which is <code>20</code> and <code>0.75</code>
992              * respectively.
993              * </p>
994              */
995             public IntHashMap()
996             {
997                 this(20, 0.75f);
998             }
999 
1000             /**
1001              * <p>
1002              * Constructs a new, empty hashtable with the specified initial capacity
1003              * and default load factor, which is <code>0.75</code>.
1004              * </p>
1005              * 
1006              * @param initialCapacity the initial capacity of the hashtable.
1007              * @throws IllegalArgumentException if the initial capacity is less than
1008              *             zero.
1009              */
1010             public IntHashMap(int initialCapacity)
1011             {
1012                 this(initialCapacity, 0.75f);
1013             }
1014 
1015             /**
1016              * <p>
1017              * Constructs a new, empty hashtable with the specified initial capacity
1018              * and the specified load factor.
1019              * </p>
1020              * 
1021              * @param initialCapacity the initial capacity of the hashtable.
1022              * @param loadFactor the load factor of the hashtable.
1023              * @throws IllegalArgumentException if the initial capacity is less than
1024              *             zero, or if the load factor is nonpositive.
1025              */
1026             public IntHashMap(int initialCapacity, float loadFactor)
1027             {
1028                 super();
1029                 if (initialCapacity < 0)
1030                 {
1031                     throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity);
1032                 }
1033                 if (loadFactor <= 0)
1034                 {
1035                     throw new IllegalArgumentException("Illegal Load: " + loadFactor);
1036                 }
1037                 if (initialCapacity == 0)
1038                 {
1039                     initialCapacity = 1;
1040                 }
1041 
1042                 this.loadFactor = loadFactor;
1043                 table = new Entry[initialCapacity];
1044                 threshold = (int) (initialCapacity * loadFactor);
1045             }
1046 
1047             /**
1048              * <p>
1049              * Returns the number of keys in this hashtable.
1050              * </p>
1051              * 
1052              * @return the number of keys in this hashtable.
1053              */
1054             public int size()
1055             {
1056                 return count;
1057             }
1058 
1059             /**
1060              * <p>
1061              * Tests if this hashtable maps no keys to values.
1062              * </p>
1063              * 
1064              * @return <code>true</code> if this hashtable maps no keys to values;
1065              *         <code>false</code> otherwise.
1066              */
1067             public boolean isEmpty()
1068             {
1069                 return count == 0;
1070             }
1071 
1072             /**
1073              * <p>
1074              * Tests if some key maps into the specified value in this hashtable.
1075              * This operation is more expensive than the <code>containsKey</code>
1076              * method.
1077              * </p>
1078              * <p>
1079              * Note that this method is identical in functionality to containsValue,
1080              * (which is part of the Map interface in the collections framework).
1081              * </p>
1082              * 
1083              * @param value a value to search for.
1084              * @return <code>true</code> if and only if some key maps to the
1085              *         <code>value</code> argument in this hashtable as determined
1086              *         by the <tt>equals</tt> method; <code>false</code>
1087              *         otherwise.
1088              * @throws NullPointerException if the value is <code>null</code>.
1089              * @see #containsKey(int)
1090              * @see #containsValue(Object)
1091              * @see java.util.Map
1092              */
1093             public boolean contains(Object value)
1094             {
1095                 if (value == null)
1096                 {
1097                     throw new IllegalArgumentException("parameter value may not be null");
1098                 }
1099 
1100                 Entry tab[] = table;
1101                 for (int i = tab.length; i-- > 0;)
1102                 {
1103                     for (Entry e = tab[i]; e != null; e = e.next)
1104                     {
1105                         if (e.value.equals(value))
1106                         {
1107                             return true;
1108                         }
1109                     }
1110                 }
1111                 return false;
1112             }
1113 
1114             /**
1115              * <p>
1116              * Returns <code>true</code> if this HashMap maps one or more keys to
1117              * this value.
1118              * </p>
1119              * <p>
1120              * Note that this method is identical in functionality to contains (which
1121              * predates the Map interface).
1122              * </p>
1123              * 
1124              * @param value value whose presence in this HashMap is to be tested.
1125              * @return boolean <code>true</code> if the value is contained
1126              * @see java.util.Map
1127              * @since JDK1.2
1128              */
1129             public boolean containsValue(Object value)
1130             {
1131                 return contains(value);
1132             }
1133 
1134             /**
1135              * <p>
1136              * Tests if the specified object is a key in this hashtable.
1137              * </p>
1138              * 
1139              * @param key possible key.
1140              * @return <code>true</code> if and only if the specified object is a
1141              *         key in this hashtable, as determined by the <tt>equals</tt>
1142              *         method; <code>false</code> otherwise.
1143              * @see #contains(Object)
1144              */
1145             public boolean containsKey(int key)
1146             {
1147                 Entry tab[] = table;
1148                 int hash = key;
1149                 int index = (hash & 0x7FFFFFFF) % tab.length;
1150                 for (Entry e = tab[index]; e != null; e = e.next)
1151                 {
1152                     if (e.hash == hash)
1153                     {
1154                         return true;
1155                     }
1156                 }
1157                 return false;
1158             }
1159 
1160             /**
1161              * <p>
1162              * Returns the value to which the specified key is mapped in this map.
1163              * </p>
1164              * 
1165              * @param key a key in the hashtable.
1166              * @return the value to which the key is mapped in this hashtable;
1167              *         <code>null</code> if the key is not mapped to any value in
1168              *         this hashtable.
1169              * @see #put(int, Object)
1170              */
1171             public Object get(int key)
1172             {
1173                 Entry tab[] = table;
1174                 int hash = key;
1175                 int index = (hash & 0x7FFFFFFF) % tab.length;
1176                 for (Entry e = tab[index]; e != null; e = e.next)
1177                 {
1178                     if (e.hash == hash)
1179                     {
1180                         return e.value;
1181                     }
1182                 }
1183                 return null;
1184             }
1185 
1186             /**
1187              * <p>
1188              * Increases the capacity of and internally reorganizes this hashtable,
1189              * in order to accommodate and access its entries more efficiently.
1190              * </p>
1191              * <p>
1192              * This method is called automatically when the number of keys in the
1193              * hashtable exceeds this hashtable's capacity and load factor.
1194              * </p>
1195              */
1196             protected void rehash()
1197             {
1198                 int oldCapacity = table.length;
1199                 Entry oldMap[] = table;
1200 
1201                 int newCapacity = oldCapacity * 2 + 1;
1202                 Entry newMap[] = new Entry[newCapacity];
1203 
1204                 threshold = (int) (newCapacity * loadFactor);
1205                 table = newMap;
1206 
1207                 for (int i = oldCapacity; i-- > 0;)
1208                 {
1209                     for (Entry old = oldMap[i]; old != null;)
1210                     {
1211                         Entry e = old;
1212                         old = old.next;
1213 
1214                         int index = (e.hash & 0x7FFFFFFF) % newCapacity;
1215                         e.next = newMap[index];
1216                         newMap[index] = e;
1217                     }
1218                 }
1219             }
1220 
1221             /**
1222              * <p>
1223              * Maps the specified <code>key</code> to the specified
1224              * <code>value</code> in this hashtable. The key cannot be
1225              * <code>null</code>.
1226              * </p>
1227              * <p>
1228              * The value can be retrieved by calling the <code>get</code> method
1229              * with a key that is equal to the original key.
1230              * </p>
1231              * 
1232              * @param key the hashtable key.
1233              * @param value the value.
1234              * @return the previous value of the specified key in this hashtable, or
1235              *         <code>null</code> if it did not have one.
1236              * @throws NullPointerException if the key is <code>null</code>.
1237              * @see #get(int)
1238              */
1239             public Object put(int key, Object value)
1240             {
1241                 // Makes sure the key is not already in the hashtable.
1242                 Entry tab[] = table;
1243                 int hash = key;
1244                 int index = (hash & 0x7FFFFFFF) % tab.length;
1245                 for (Entry e = tab[index]; e != null; e = e.next)
1246                 {
1247                     if (e.hash == hash)
1248                     {
1249                         Object old = e.value;
1250                         e.value = value;
1251                         return old;
1252                     }
1253                 }
1254 
1255                 if (count >= threshold)
1256                 {
1257                     // Rehash the table if the threshold is exceeded
1258                     rehash();
1259 
1260                     tab = table;
1261                     index = (hash & 0x7FFFFFFF) % tab.length;
1262                 }
1263 
1264                 // Creates the new entry.
1265                 Entry e = new Entry(hash, key, value, tab[index]);
1266                 tab[index] = e;
1267                 count++;
1268                 return null;
1269             }
1270 
1271             /**
1272              * <p>
1273              * Removes the key (and its corresponding value) from this hashtable.
1274              * </p>
1275              * <p>
1276              * This method does nothing if the key is not present in the hashtable.
1277              * </p>
1278              * 
1279              * @param key the key that needs to be removed.
1280              * @return the value to which the key had been mapped in this hashtable,
1281              *         or <code>null</code> if the key did not have a mapping.
1282              */
1283             public Object remove(int key)
1284             {
1285                 Entry tab[] = table;
1286                 int hash = key;
1287                 int index = (hash & 0x7FFFFFFF) % tab.length;
1288                 for (Entry e = tab[index], prev = null; e != null; prev = e, e = e.next)
1289                 {
1290                     if (e.hash == hash)
1291                     {
1292                         if (prev != null)
1293                         {
1294                             prev.next = e.next;
1295                         }
1296                         else
1297                         {
1298                             tab[index] = e.next;
1299                         }
1300                         count--;
1301                         Object oldValue = e.value;
1302                         e.value = null;
1303                         return oldValue;
1304                     }
1305                 }
1306                 return null;
1307             }
1308 
1309             /**
1310              * <p>Clears this hashtable so that it contains no keys.</p>
1311              */
1312             public synchronized void clear()
1313             {
1314                 Entry tab[] = table;
1315                 for (int index = tab.length; --index >= 0;)
1316                 {
1317                     tab[index] = null;
1318                 }
1319                 count = 0;
1320             }
1321 
1322         }
1323 
1324     }
1325 }