punycode.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. /*! http://mths.be/punycode v1.2.0 by @mathias */
  2. ;(function(root) {
  3. /**
  4. * The `punycode` object.
  5. * @name punycode
  6. * @type Object
  7. */
  8. var punycode,
  9. /** Detect free variables `define`, `exports`, `module` and `require` */
  10. freeDefine = typeof define == 'function' && typeof define.amd == 'object' &&
  11. define.amd && define,
  12. freeExports = typeof exports == 'object' && exports,
  13. freeModule = typeof module == 'object' && module,
  14. freeRequire = typeof require == 'function' && require,
  15. /** Highest positive signed 32-bit float value */
  16. maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
  17. /** Bootstring parameters */
  18. base = 36,
  19. tMin = 1,
  20. tMax = 26,
  21. skew = 38,
  22. damp = 700,
  23. initialBias = 72,
  24. initialN = 128, // 0x80
  25. delimiter = '-', // '\x2D'
  26. /** Regular expressions */
  27. regexPunycode = /^xn--/,
  28. regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars
  29. regexSeparators = /\x2E|\u3002|\uFF0E|\uFF61/g, // RFC 3490 separators
  30. /** Error messages */
  31. errors = {
  32. 'overflow': 'Overflow: input needs wider integers to process',
  33. 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
  34. 'invalid-input': 'Invalid input'
  35. },
  36. /** Convenience shortcuts */
  37. baseMinusTMin = base - tMin,
  38. floor = Math.floor,
  39. stringFromCharCode = String.fromCharCode,
  40. /** Temporary variable */
  41. key;
  42. /*--------------------------------------------------------------------------*/
  43. /**
  44. * A generic error utility function.
  45. * @private
  46. * @param {String} type The error type.
  47. * @returns {Error} Throws a `RangeError` with the applicable error message.
  48. */
  49. function error(type) {
  50. throw RangeError(errors[type]);
  51. }
  52. /**
  53. * A generic `Array#map` utility function.
  54. * @private
  55. * @param {Array} array The array to iterate over.
  56. * @param {Function} callback The function that gets called for every array
  57. * item.
  58. * @returns {Array} A new array of values returned by the callback function.
  59. */
  60. function map(array, fn) {
  61. var length = array.length;
  62. while (length--) {
  63. array[length] = fn(array[length]);
  64. }
  65. return array;
  66. }
  67. /**
  68. * A simple `Array#map`-like wrapper to work with domain name strings.
  69. * @private
  70. * @param {String} domain The domain name.
  71. * @param {Function} callback The function that gets called for every
  72. * character.
  73. * @returns {Array} A new string of characters returned by the callback
  74. * function.
  75. */
  76. function mapDomain(string, fn) {
  77. return map(string.split(regexSeparators), fn).join('.');
  78. }
  79. /**
  80. * Creates an array containing the decimal code points of each Unicode
  81. * character in the string. While JavaScript uses UCS-2 internally,
  82. * this function will convert a pair of surrogate halves (each of which
  83. * UCS-2 exposes as separate characters) into a single code point,
  84. * matching UTF-16.
  85. * @see `punycode.ucs2.encode`
  86. * @see <http://mathiasbynens.be/notes/javascript-encoding>
  87. * @memberOf punycode.ucs2
  88. * @name decode
  89. * @param {String} string The Unicode input string (UCS-2).
  90. * @returns {Array} The new array of code points.
  91. */
  92. function ucs2decode(string) {
  93. var output = [],
  94. counter = 0,
  95. length = string.length,
  96. value,
  97. extra;
  98. while (counter < length) {
  99. value = string.charCodeAt(counter++);
  100. if ((value & 0xF800) == 0xD800 && counter < length) {
  101. // high surrogate, and there is a next character
  102. extra = string.charCodeAt(counter++);
  103. if ((extra & 0xFC00) == 0xDC00) { // low surrogate
  104. output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
  105. } else {
  106. output.push(value, extra);
  107. }
  108. } else {
  109. output.push(value);
  110. }
  111. }
  112. return output;
  113. }
  114. /**
  115. * Creates a string based on an array of decimal code points.
  116. * @see `punycode.ucs2.decode`
  117. * @memberOf punycode.ucs2
  118. * @name encode
  119. * @param {Array} codePoints The array of decimal code points.
  120. * @returns {String} The new Unicode string (UCS-2).
  121. */
  122. function ucs2encode(array) {
  123. return map(array, function(value) {
  124. var output = '';
  125. if (value > 0xFFFF) {
  126. value -= 0x10000;
  127. output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
  128. value = 0xDC00 | value & 0x3FF;
  129. }
  130. output += stringFromCharCode(value);
  131. return output;
  132. }).join('');
  133. }
  134. /**
  135. * Converts a basic code point into a digit/integer.
  136. * @see `digitToBasic()`
  137. * @private
  138. * @param {Number} codePoint The basic (decimal) code point.
  139. * @returns {Number} The numeric value of a basic code point (for use in
  140. * representing integers) in the range `0` to `base - 1`, or `base` if
  141. * the code point does not represent a value.
  142. */
  143. function basicToDigit(codePoint) {
  144. return codePoint - 48 < 10
  145. ? codePoint - 22
  146. : codePoint - 65 < 26
  147. ? codePoint - 65
  148. : codePoint - 97 < 26
  149. ? codePoint - 97
  150. : base;
  151. }
  152. /**
  153. * Converts a digit/integer into a basic code point.
  154. * @see `basicToDigit()`
  155. * @private
  156. * @param {Number} digit The numeric value of a basic code point.
  157. * @returns {Number} The basic code point whose value (when used for
  158. * representing integers) is `digit`, which needs to be in the range
  159. * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
  160. * used; else, the lowercase form is used. The behavior is undefined
  161. * if flag is non-zero and `digit` has no uppercase form.
  162. */
  163. function digitToBasic(digit, flag) {
  164. // 0..25 map to ASCII a..z or A..Z
  165. // 26..35 map to ASCII 0..9
  166. return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
  167. }
  168. /**
  169. * Bias adaptation function as per section 3.4 of RFC 3492.
  170. * http://tools.ietf.org/html/rfc3492#section-3.4
  171. * @private
  172. */
  173. function adapt(delta, numPoints, firstTime) {
  174. var k = 0;
  175. delta = firstTime ? floor(delta / damp) : delta >> 1;
  176. delta += floor(delta / numPoints);
  177. for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
  178. delta = floor(delta / baseMinusTMin);
  179. }
  180. return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
  181. }
  182. /**
  183. * Converts a basic code point to lowercase if `flag` is falsy, or to
  184. * uppercase if `flag` is truthy. The code point is unchanged if it's
  185. * caseless. The behavior is undefined if `codePoint` is not a basic code
  186. * point.
  187. * @private
  188. * @param {Number} codePoint The numeric value of a basic code point.
  189. * @returns {Number} The resulting basic code point.
  190. */
  191. function encodeBasic(codePoint, flag) {
  192. codePoint -= (codePoint - 97 < 26) << 5;
  193. return codePoint + (!flag && codePoint - 65 < 26) << 5;
  194. }
  195. /**
  196. * Converts a Punycode string of ASCII code points to a string of Unicode
  197. * code points.
  198. * @memberOf punycode
  199. * @param {String} input The Punycode string of ASCII code points.
  200. * @returns {String} The resulting string of Unicode code points.
  201. */
  202. function decode(input) {
  203. // Don't use UCS-2
  204. var output = [],
  205. inputLength = input.length,
  206. out,
  207. i = 0,
  208. n = initialN,
  209. bias = initialBias,
  210. basic,
  211. j,
  212. index,
  213. oldi,
  214. w,
  215. k,
  216. digit,
  217. t,
  218. length,
  219. /** Cached calculation results */
  220. baseMinusT;
  221. // Handle the basic code points: let `basic` be the number of input code
  222. // points before the last delimiter, or `0` if there is none, then copy
  223. // the first basic code points to the output.
  224. basic = input.lastIndexOf(delimiter);
  225. if (basic < 0) {
  226. basic = 0;
  227. }
  228. for (j = 0; j < basic; ++j) {
  229. // if it's not a basic code point
  230. if (input.charCodeAt(j) >= 0x80) {
  231. error('not-basic');
  232. }
  233. output.push(input.charCodeAt(j));
  234. }
  235. // Main decoding loop: start just after the last delimiter if any basic code
  236. // points were copied; start at the beginning otherwise.
  237. for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
  238. // `index` is the index of the next character to be consumed.
  239. // Decode a generalized variable-length integer into `delta`,
  240. // which gets added to `i`. The overflow checking is easier
  241. // if we increase `i` as we go, then subtract off its starting
  242. // value at the end to obtain `delta`.
  243. for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
  244. if (index >= inputLength) {
  245. error('invalid-input');
  246. }
  247. digit = basicToDigit(input.charCodeAt(index++));
  248. if (digit >= base || digit > floor((maxInt - i) / w)) {
  249. error('overflow');
  250. }
  251. i += digit * w;
  252. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  253. if (digit < t) {
  254. break;
  255. }
  256. baseMinusT = base - t;
  257. if (w > floor(maxInt / baseMinusT)) {
  258. error('overflow');
  259. }
  260. w *= baseMinusT;
  261. }
  262. out = output.length + 1;
  263. bias = adapt(i - oldi, out, oldi == 0);
  264. // `i` was supposed to wrap around from `out` to `0`,
  265. // incrementing `n` each time, so we'll fix that now:
  266. if (floor(i / out) > maxInt - n) {
  267. error('overflow');
  268. }
  269. n += floor(i / out);
  270. i %= out;
  271. // Insert `n` at position `i` of the output
  272. output.splice(i++, 0, n);
  273. }
  274. return ucs2encode(output);
  275. }
  276. /**
  277. * Converts a string of Unicode code points to a Punycode string of ASCII
  278. * code points.
  279. * @memberOf punycode
  280. * @param {String} input The string of Unicode code points.
  281. * @returns {String} The resulting Punycode string of ASCII code points.
  282. */
  283. function encode(input) {
  284. var n,
  285. delta,
  286. handledCPCount,
  287. basicLength,
  288. bias,
  289. j,
  290. m,
  291. q,
  292. k,
  293. t,
  294. currentValue,
  295. output = [],
  296. /** `inputLength` will hold the number of code points in `input`. */
  297. inputLength,
  298. /** Cached calculation results */
  299. handledCPCountPlusOne,
  300. baseMinusT,
  301. qMinusT;
  302. // Convert the input in UCS-2 to Unicode
  303. input = ucs2decode(input);
  304. // Cache the length
  305. inputLength = input.length;
  306. // Initialize the state
  307. n = initialN;
  308. delta = 0;
  309. bias = initialBias;
  310. // Handle the basic code points
  311. for (j = 0; j < inputLength; ++j) {
  312. currentValue = input[j];
  313. if (currentValue < 0x80) {
  314. output.push(stringFromCharCode(currentValue));
  315. }
  316. }
  317. handledCPCount = basicLength = output.length;
  318. // `handledCPCount` is the number of code points that have been handled;
  319. // `basicLength` is the number of basic code points.
  320. // Finish the basic string - if it is not empty - with a delimiter
  321. if (basicLength) {
  322. output.push(delimiter);
  323. }
  324. // Main encoding loop:
  325. while (handledCPCount < inputLength) {
  326. // All non-basic code points < n have been handled already. Find the next
  327. // larger one:
  328. for (m = maxInt, j = 0; j < inputLength; ++j) {
  329. currentValue = input[j];
  330. if (currentValue >= n && currentValue < m) {
  331. m = currentValue;
  332. }
  333. }
  334. // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
  335. // but guard against overflow
  336. handledCPCountPlusOne = handledCPCount + 1;
  337. if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
  338. error('overflow');
  339. }
  340. delta += (m - n) * handledCPCountPlusOne;
  341. n = m;
  342. for (j = 0; j < inputLength; ++j) {
  343. currentValue = input[j];
  344. if (currentValue < n && ++delta > maxInt) {
  345. error('overflow');
  346. }
  347. if (currentValue == n) {
  348. // Represent delta as a generalized variable-length integer
  349. for (q = delta, k = base; /* no condition */; k += base) {
  350. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  351. if (q < t) {
  352. break;
  353. }
  354. qMinusT = q - t;
  355. baseMinusT = base - t;
  356. output.push(
  357. stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
  358. );
  359. q = floor(qMinusT / baseMinusT);
  360. }
  361. output.push(stringFromCharCode(digitToBasic(q, 0)));
  362. bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
  363. delta = 0;
  364. ++handledCPCount;
  365. }
  366. }
  367. ++delta;
  368. ++n;
  369. }
  370. return output.join('');
  371. }
  372. /**
  373. * Converts a Punycode string representing a domain name to Unicode. Only the
  374. * Punycoded parts of the domain name will be converted, i.e. it doesn't
  375. * matter if you call it on a string that has already been converted to
  376. * Unicode.
  377. * @memberOf punycode
  378. * @param {String} domain The Punycode domain name to convert to Unicode.
  379. * @returns {String} The Unicode representation of the given Punycode
  380. * string.
  381. */
  382. function toUnicode(domain) {
  383. return mapDomain(domain, function(string) {
  384. return regexPunycode.test(string)
  385. ? decode(string.slice(4).toLowerCase())
  386. : string;
  387. });
  388. }
  389. /**
  390. * Converts a Unicode string representing a domain name to Punycode. Only the
  391. * non-ASCII parts of the domain name will be converted, i.e. it doesn't
  392. * matter if you call it with a domain that's already in ASCII.
  393. * @memberOf punycode
  394. * @param {String} domain The domain name to convert, as a Unicode string.
  395. * @returns {String} The Punycode representation of the given domain name.
  396. */
  397. function toASCII(domain) {
  398. return mapDomain(domain, function(string) {
  399. return regexNonASCII.test(string)
  400. ? 'xn--' + encode(string)
  401. : string;
  402. });
  403. }
  404. /*--------------------------------------------------------------------------*/
  405. /** Define the public API */
  406. punycode = {
  407. /**
  408. * A string representing the current Punycode.js version number.
  409. * @memberOf punycode
  410. * @type String
  411. */
  412. 'version': '1.2.0',
  413. /**
  414. * An object of methods to convert from JavaScript's internal character
  415. * representation (UCS-2) to decimal Unicode code points, and back.
  416. * @see <http://mathiasbynens.be/notes/javascript-encoding>
  417. * @memberOf punycode
  418. * @type Object
  419. */
  420. 'ucs2': {
  421. 'decode': ucs2decode,
  422. 'encode': ucs2encode
  423. },
  424. 'decode': decode,
  425. 'encode': encode,
  426. 'toASCII': toASCII,
  427. 'toUnicode': toUnicode
  428. };
  429. /** Expose `punycode` */
  430. if (freeExports) {
  431. if (freeModule && freeModule.exports == freeExports) {
  432. // in Node.js or Ringo 0.8+
  433. freeModule.exports = punycode;
  434. } else {
  435. // in Narwhal or Ringo 0.7-
  436. for (key in punycode) {
  437. punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]);
  438. }
  439. }
  440. } else if (freeDefine) {
  441. // via curl.js or RequireJS
  442. define('punycode', punycode);
  443. } else {
  444. // in a browser or Rhino
  445. root.punycode = punycode;
  446. }
  447. }(this));