Files

1612 lines
73 KiB
JavaScript
Raw Permalink Normal View History

2025-05-19 20:36:21 -05:00
// created 2023-09-25T01:01:55.148Z
// compressed base64-encoded blob for include-ens data
// source: https://github.com/adraffy/ens-normalize.js/blob/main/src/make.js
// see: https://github.com/adraffy/ens-normalize.js#security
// SHA-256: 0565ed049b9cf1614bb9e11ba7d8ac6a6fb96c893253d890f7e2b2884b9ded32
var COMPRESSED$1 = 'AEEUdwmgDS8BxQKKAP4BOgDjATAAngDUAIMAoABoAOAAagCOAEQAhABMAHIAOwA9ACsANgAmAGIAHgAuACgAJwAXAC0AGgAjAB8ALwAUACkAEgAeAAkAGwARABkAFgA5ACgALQArADcAFQApABAAHgAiABAAGgAeABMAGAUhBe8BFxREN8sF2wC5AK5HAW8ArQkDzQCuhzc3NzcBP68NEfMABQdHBuw5BV8FYAA9MzkI9r4ZBg7QyQAWA9CeOwLNCjcCjqkChuA/lm+RAsXTAoP6ASfnEQDytQFJAjWVCkeXAOsA6godAB/cwdAUE0WlBCN/AQUCQRjFD/MRBjHxDQSJbw0jBzUAswBxme+tnIcAYwabAysG8QAjAEMMmxcDqgPKQyDXCMMxA7kUQwD3NXOrAKmFIAAfBC0D3x4BJQDBGdUFAhEgVD8JnwmQJiNWYUzrg0oAGwAUAB0AFnNcACkAFgBP9h3gPfsDOWDKneY2ChglX1UDYD30ABsAFAAdABZzIGRAnwDD8wAjAEEMzRbDqgMB2sAFYwXqAtCnAsS4AwpUJKRtFHsadUz9AMMVbwLpABM1NJEX0ZkCgYMBEyMAxRVvAukAEzUBUFAtmUwSAy4DBTER33EftQHfSwB5MxJ/AjkWKQLzL8E/cwBB6QH9LQDPDtO9ASNriQC5DQANAwCK21EFI91zHwCoL9kBqQcHBwcHKzUDowBvAQohPvU3fAQgHwCyAc8CKQMA5zMSezr7ULgFmDp/LzVQBgEGAi8FYQVgt8AFcTtlQhpCWEmfe5tmZ6IAExsDzQ8t+X8rBKtTAltbAn0jsy8Bl6utPWMDTR8Ei2kRANkDBrNHNysDBzECQWUAcwFpJ3kAiyUhAJ0BUb8AL3EfAbfNAz81KUsFWwF3YQZtAm0A+VEfAzEJDQBRSQCzAQBlAHsAM70GD/v3IZWHBwARKQAxALsjTwHZAeMPEzmXgIHwABIAGQA8AEUAQDt3gdvIEGcQZAkGTRFMdEIVEwK0D64L7REdDNkq09PgADSxB/MDWwfzA1sDWwfzB/MDWwfzA1sDWwNbA1scEvAi28gQZw9QBHUFlgWTBN4IiyZREYkHMAjaVBV0JhxPA00BBCMtSSQ7mzMTJUpMFE0LCAQ2SmyvfUADTzGzVP2QqgPTMlc5dAkGHnkSqAAyD3skNb1OhnpPcagKU0+2tYdJak5vAsY6sEAACikJm2/Dd1YGRRAfJ6kQ+ww3AbkBPw3xS9wE9QY/BM0fgRkdD9GVoAipLeEM8SbnLqWAXiP5KocF8Uv4POELUVFsD10LaQnnOmeBUgMlAREijwrhDT0IcRD3Cs1vDekRSQc9A9lJngCpBwULFR05FbkmFGKwCw05ewb/GvoLkyazEy17AAXXGiUGUQEtGwMA0y7rhbRaNVwgT2MGBwspI8sUrFAkDSlAu3hMGh8HGSWtApVDdEqLUToelyH6PEENai4XUYAH+TwJGVMLhTyiRq9FEhHWPpE9TCJNTDAEOYMsMyePCdMPiQy9fHYBXQklCbUMdRM1ERs3yQg9Bx0xlygnGQglRplgngT7owP3E9UDDwVDCUUHFwO5HDETMhUtBRGBKNsC9zbZLrcCk1aEARsFzw8pH+MQVEfkDu0InwJpA4cl7wAxFSUAGyKfCEdnAGOP3FMJLs8Iy2pwI3gDaxTrZRF3B5UOWwerHDcVwxzlcMxeD4YMKKezCV8BeQmdAWME5wgNNV+MpCBFZ1eLXBifIGVBQ14AAjUMaRWjRMGHfAKPD28SHwE5AXcHPQ0FAnsR8RFvEJkI74YINbkz/DopBFMhhyAVCisDU2zSCysm/Qz8bQGnEmYDEDRBd/Jnr2C6KBgBBx0yyUFkIfULlk/RDKAaxRhGVDIZ6AfDA/ca9yfuQVsGAwOnBxc6UTPyBMELbQiPCUMATQ6nGwfbGG4KdYzUATWPAbudA1uVhwJzkwY7Bw8Aaw+LBX3pACECqwinAAkA0wNbAD0CsQehAB0AiUUBQQMrMwEl6QKTA5cINc8BmTMB9y0EH8cMGQD7O25OAsO1AoBuZqYF4VwCkgJNOQFRKQQJUktVA7N15QDfAE8GF+NLARmvTs8e50cB43MvAMsA/wAJOQcJRQHRAfdxALsBYws1Caa3uQFR7S0AhwAZbwHbAo0A4QA5AIP1AVcAUQVd/QXXAlNNARU1HC9bZQG/AyMBNwERAH0Gz5GpzQsjBHEH1wIQHxXlAu8yB7kFAyLjE9FCyQK94lkAMhoKPAqrCqpgX2Q3CjV2PVQAEh+sPss/UgVVO1c7XDtXO1w7VztcO1c7XDtXO1wDm8Pmw+YKcF9JYe8Mqg3YRMw6TRPfYFVgNhPMLbsUxRXSJVoZQRrAJwkl6FUNDwgt12Y0CDA0eRfAAEMpbINFY4oeNApPHOtTlVT8LR8AtUumM7MNsBsZREQFS3XxYi4WEgomAmSFAmJGX1GzAV83JAKh+wJonAJmDQKfiDgfDwJmPwJmKgRyBIMDfxcDfpY5Cjl7GzmGOicnAmwhAjI6OA4CbcsCbbLzjgM3a0kvAWsA4gDlAE4JB5wMkQECD8YAEbkCdzMCdqZDAnlPRwJ4viFg30WyRvcCfEMCeswCfQ0CfPRIBEiBZygALxlJXEpfGRtK0ALRBQLQ0EsrA4hTA4fqRMmRNgLypV0HAwOyS9JMMSkH001QTbMCi0MCitzFHwshR2sJuwKOOwKOYESbhQKO3QKOYHxRuFM5AQ5S2FSJApP/ApMQAO0AIFUiVbNV1AosHymZijLleGpFPz0Cl6MC77ZYJawAXSkClpMCloCgAK1ZsFoNhVEAPwKWuQKWUlxIXNUCmc8CmWhczl0LHQKcnznGOqECnBoCn58CnryOACETNS4TAp31Ap6WALlBYThh8wKe1wKgcgGtAp6jIwKeUqljzGQrKS8CJ7MCJoICoP8CoFDbAqYzAqXSAqgDAIECp/ZogGi1AAdNaiBq1QKs5wKssgKtawKtBgJXIQJV4AKx5dsDH1JsmwKywRECsuwbbORtZ21MYwMl0QK2YD9DbpQDKUkCuGICuUsZArkue3A6cOUCvR0DLbYDMhUCvoxyBgMzdQK+HnMmc1MCw88CwwhzhnRPOUl05AM8qwEDPJ4DPcMCxYACxksCxhSNAshtVQLISALJUwLJMgJkoQLd1nh9ZXiyeSlL1AMYp2cGAmH4GfeVKHsPXpZevxUCz28Cz3AzT1fW9xejAMqxAs93AS3uA04Wfk8JAtwrAtuOAtJTA1JgA1NjAQUDVZCAjUMEzxrxZEl5A4LSg5EC2ssC2eKEFIRNp0ADhqkAMwNkEoZ1Xf0AWQLfaQLevHd7AuIz7RgB8zQrAfSfAfLWiwLr9wLpdH0DAur9AuroAP1LAb0C7o0C66CWrpcHAu5DA4XkmH1w5HGlAvMHAG0DjhqZlwL3FwORcgOSiwL3nAL53QL4apogmq+/O5siA52HAv7+AR8APZ8gAZ+3AwWRA6ZuA6bdANXJAwZuoYyiCQ0DDE0BEwEjB3EGZb1rCQC/BG/DFY8etxEAG3k9ACcDNxJRA42DAWcrJQCM8wAlAOanC6OVCLsGI6fJBgCvBRnDBvElRUYFFoAFcD9GSDNCKUK8X3kZX8QAls0FOgCQVCGbwTsuYDoZutcONxjOGJHJ/gVfBWAFXwVgBWsFYAVfBWAFXwVgBV8FYAVfBWBOHQjfjW8KCgoKbF7xMwTRA7kGN8PDAMMEr8MA70gxFroFTj5xPnhCR0K+X30/X/AAWBkzswCNBsxzzASm70aCRS4rDDMeLz49fnXfcsH5GcoscQFz13Y4HwVnBXLJycnACNdRYwgICAqEXoWTxgA7P4kACxbZBu21Kw0AjMsTAwkVAOVtJUUsJ1JCuULESUArXy9gPi9AKwnJRQYKTD9LPoA+iT54PnkCkULEUUpDX9NWV3JVEjQAc1w3A3IBE3YnX+g7QiMJb6MKaiszRCUuQrNCxDPMCcwEX9EWJzYREBEEBwIHKn6l33JCNVIfybPJtAltydPUCmhBZw/tEKsZAJOVJU1CLRuxbUHOQAo7P0s+eEJ
const FENCED = new Map([[8217,"apostrophe"],[8260,"fraction slash"],[12539,"middle dot"]]);
const NSM_MAX = 4;
function decode_arithmetic(bytes) {
let pos = 0;
function u16() { return (bytes[pos++] << 8) | bytes[pos++]; }
// decode the frequency table
let symbol_count = u16();
let total = 1;
let acc = [0, 1]; // first symbol has frequency 1
for (let i = 1; i < symbol_count; i++) {
acc.push(total += u16());
}
// skip the sized-payload that the last 3 symbols index into
let skip = u16();
let pos_payload = pos;
pos += skip;
let read_width = 0;
let read_buffer = 0;
function read_bit() {
if (read_width == 0) {
// this will read beyond end of buffer
// but (undefined|0) => zero pad
read_buffer = (read_buffer << 8) | bytes[pos++];
read_width = 8;
}
return (read_buffer >> --read_width) & 1;
}
const N = 31;
const FULL = 2**N;
const HALF = FULL >>> 1;
const QRTR = HALF >> 1;
const MASK = FULL - 1;
// fill register
let register = 0;
for (let i = 0; i < N; i++) register = (register << 1) | read_bit();
let symbols = [];
let low = 0;
let range = FULL; // treat like a float
while (true) {
let value = Math.floor((((register - low + 1) * total) - 1) / range);
let start = 0;
let end = symbol_count;
while (end - start > 1) { // binary search
let mid = (start + end) >>> 1;
if (value < acc[mid]) {
end = mid;
} else {
start = mid;
}
}
if (start == 0) break; // first symbol is end mark
symbols.push(start);
let a = low + Math.floor(range * acc[start] / total);
let b = low + Math.floor(range * acc[start+1] / total) - 1;
while (((a ^ b) & HALF) == 0) {
register = (register << 1) & MASK | read_bit();
a = (a << 1) & MASK;
b = (b << 1) & MASK | 1;
}
while (a & ~b & QRTR) {
register = (register & HALF) | ((register << 1) & (MASK >>> 1)) | read_bit();
a = (a << 1) ^ HALF;
b = ((b ^ HALF) << 1) | HALF | 1;
}
low = a;
range = 1 + b - a;
}
let offset = symbol_count - 4;
return symbols.map(x => { // index into payload
switch (x - offset) {
case 3: return offset + 0x10100 + ((bytes[pos_payload++] << 16) | (bytes[pos_payload++] << 8) | bytes[pos_payload++]);
case 2: return offset + 0x100 + ((bytes[pos_payload++] << 8) | bytes[pos_payload++]);
case 1: return offset + bytes[pos_payload++];
default: return x - 1;
}
});
}
// returns an iterator which returns the next symbol
function read_payload(v) {
let pos = 0;
return () => v[pos++];
}
function read_compressed_payload(s) {
return read_payload(decode_arithmetic(unsafe_atob(s)));
}
// unsafe in the sense:
// expected well-formed Base64 w/o padding
// 20220922: added for https://github.com/adraffy/ens-normalize.js/issues/4
function unsafe_atob(s) {
let lookup = [];
[...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'].forEach((c, i) => lookup[c.charCodeAt(0)] = i);
let n = s.length;
let ret = new Uint8Array((6 * n) >> 3);
for (let i = 0, pos = 0, width = 0, carry = 0; i < n; i++) {
carry = (carry << 6) | lookup[s.charCodeAt(i)];
width += 6;
if (width >= 8) {
ret[pos++] = (carry >> (width -= 8));
}
}
return ret;
}
// eg. [0,1,2,3...] => [0,-1,1,-2,...]
function signed(i) {
return (i & 1) ? (~i >> 1) : (i >> 1);
}
function read_deltas(n, next) {
let v = Array(n);
for (let i = 0, x = 0; i < n; i++) v[i] = x += signed(next());
return v;
}
// [123][5] => [0 3] [1 1] [0 0]
function read_sorted(next, prev = 0) {
let ret = [];
while (true) {
let x = next();
let n = next();
if (!n) break;
prev += x;
for (let i = 0; i < n; i++) {
ret.push(prev + i);
}
prev += n + 1;
}
return ret;
}
function read_sorted_arrays(next) {
return read_array_while(() => {
let v = read_sorted(next);
if (v.length) return v;
});
}
// returns map of x => ys
function read_mapped(next) {
let ret = [];
while (true) {
let w = next();
if (w == 0) break;
ret.push(read_linear_table(w, next));
}
while (true) {
let w = next() - 1;
if (w < 0) break;
ret.push(read_replacement_table(w, next));
}
return ret.flat();
}
// read until next is falsy
// return array of read values
function read_array_while(next) {
let v = [];
while (true) {
let x = next(v.length);
if (!x) break;
v.push(x);
}
return v;
}
// read w columns of length n
// return as n rows of length w
function read_transposed(n, w, next) {
let m = Array(n).fill().map(() => []);
for (let i = 0; i < w; i++) {
read_deltas(n, next).forEach((x, j) => m[j].push(x));
}
return m;
}
// returns [[x, ys], [x+dx, ys+dy], [x+2*dx, ys+2*dy], ...]
// where dx/dy = steps, n = run size, w = length of y
function read_linear_table(w, next) {
let dx = 1 + next();
let dy = next();
let vN = read_array_while(next);
let m = read_transposed(vN.length, 1+w, next);
return m.flatMap((v, i) => {
let [x, ...ys] = v;
return Array(vN[i]).fill().map((_, j) => {
let j_dy = j * dy;
return [x + j * dx, ys.map(y => y + j_dy)];
});
});
}
// return [[x, ys...], ...]
// where w = length of y
function read_replacement_table(w, next) {
let n = 1 + next();
let m = read_transposed(n, 1+w, next);
return m.map(v => [v[0], v.slice(1)]);
}
function read_trie(next) {
let ret = [];
let sorted = read_sorted(next);
expand(decode([]), []);
return ret; // not sorted
function decode(Q) { // characters that lead into this node
let S = next(); // state: valid, save, check
let B = read_array_while(() => { // buckets leading to new nodes
let cps = read_sorted(next).map(i => sorted[i]);
if (cps.length) return decode(cps);
});
return {S, B, Q};
}
function expand({S, B}, cps, saved) {
if (S & 4 && saved === cps[cps.length-1]) return;
if (S & 2) saved = cps[cps.length-1];
if (S & 1) ret.push(cps);
for (let br of B) {
for (let cp of br.Q) {
expand(br, [...cps, cp], saved);
}
}
}
}
function hex_cp(cp) {
return cp.toString(16).toUpperCase().padStart(2, '0');
}
function quote_cp(cp) {
return `{${hex_cp(cp)}}`; // raffy convention: like "\u{X}" w/o the "\u"
}
/*
export function explode_cp(s) {
return [...s].map(c => c.codePointAt(0));
}
*/
function explode_cp(s) { // this is about 2x faster
let cps = [];
for (let pos = 0, len = s.length; pos < len; ) {
let cp = s.codePointAt(pos);
pos += cp < 0x10000 ? 1 : 2;
cps.push(cp);
}
return cps;
}
function str_from_cps(cps) {
const chunk = 4096;
let len = cps.length;
if (len < chunk) return String.fromCodePoint(...cps);
let buf = [];
for (let i = 0; i < len; ) {
buf.push(String.fromCodePoint(...cps.slice(i, i += chunk)));
}
return buf.join('');
}
function compare_arrays(a, b) {
let n = a.length;
let c = n - b.length;
for (let i = 0; c == 0 && i < n; i++) c = a[i] - b[i];
return c;
}
function random_choice(v, rng = Math.random) {
return v[rng() * v.length|0];
}
function random_sample(v, n, rng = Math.random) {
v = v.slice(); // make copy
if (v.length > n) {
for (let i = 0; i < n; i++) { // shuffle prefix n
let temp = v[i];
let j = Math.floor(i + rng() * (v.length - i));
v[i] = v[j];
v[j] = temp;
}
v = v.slice(0, n); // truncate
}
return v;
}
function run_tests(fn, tests) {
let errors = [];
for (let test of tests) {
let {name, norm, error} = test;
if (typeof norm !== 'string') norm = name;
try {
let result = fn(name);
if (error) {
errors.push({type: 'expected error', result, ...test});
} else if (result != norm) {
errors.push({type: 'wrong norm', result, ...test});
}
} catch (err) {
if (!error) {
errors.push({type: 'unexpected error', result: err.message, ...test});
}
}
}
return errors;
}
// created 2023-09-25T01:01:55.148Z
// compressed base64-encoded blob for include-nf data
// source: https://github.com/adraffy/ens-normalize.js/blob/main/src/make.js
// see: https://github.com/adraffy/ens-normalize.js#security
// SHA-256: a974b6f8541fc29d919bc85118af0a44015851fab5343f8679cb31be2bdb209e
var COMPRESSED = 'AEUDTAHBCFQATQDRADAAcgAgADQAFAAsABQAHwAOACQADQARAAoAFwAHABIACAAPAAUACwAFAAwABAAQAAMABwAEAAoABQAIAAIACgABAAQAFAALAAIACwABAAIAAQAHAAMAAwAEAAsADAAMAAwACgANAA0AAwAKAAkABAAdAAYAZwDSAdsDJgC0CkMB8xhZAqfoC190UGcThgBurwf7PT09Pb09AjgJum8OjDllxHYUKXAPxzq6tABAxgK8ysUvWAgMPT09PT09PSs6LT2HcgWXWwFLoSMEEEl5RFVMKvO0XQ8ExDdJMnIgsj26PTQyy8FfEQ8AY8IPAGcEbwRwBHEEcgRzBHQEdQR2BHcEeAR6BHsEfAR+BIAEgfndBQoBYgULAWIFDAFiBNcE2ATZBRAFEQUvBdALFAsVDPcNBw13DYcOMA4xDjMB4BllHI0B2grbAMDpHLkQ7QHVAPRNQQFnGRUEg0yEB2uaJF8AJpIBpob5AERSMAKNoAXqaQLUBMCzEiACnwRZEkkVsS7tANAsBG0RuAQLEPABv9HICTUBXigPZwRBApMDOwAamhtaABqEAY8KvKx3LQ4ArAB8UhwEBAVSagD8AEFZADkBIadVj2UMUgx5Il4ANQC9AxIB1BlbEPMAs30CGxlXAhwZKQIECBc6EbsCoxngzv7UzRQA8M0BawL6ZwkN7wABAD33OQRcsgLJCjMCjqUChtw/km+NAsXPAoP2BT84PwURAK0RAvptb6cApQS/OMMey5HJS84UdxpxTPkCogVFITaTOwERAK5pAvkNBOVyA7q3BKlOJSALAgUIBRcEdASpBXqzABXFSWZOawLCOqw//AolCZdvv3dSBkEQGyelEPcMMwG1ATsN7UvYBPEGOwTJH30ZGQ/NlZwIpS3dDO0m4y6hgFoj9SqDBe1L9DzdC01RaA9ZC2UJ4zpjgU4DIQENIosK3Q05CG0Q8wrJaw3lEUUHOQPVSZoApQcBCxEdNRW1JhBirAsJOXcG+xr2C48mrxMpevwF0xohBk0BKRr/AM8u54WwWjFcHE9fBgMLJSPHFKhQIA0lQLd4SBobBxUlqQKRQ3BKh1E2HpMh9jw9DWYuE1F8B/U8BRlPC4E8nkarRQ4R0j6NPUgiSUwsBDV/LC8niwnPD4UMuXxyAVkJIQmxDHETMREXN8UIOQcZLZckJxUIIUaVYJoE958D8xPRAwsFPwlBBxMDtRwtEy4VKQUNgSTXAvM21S6zAo9WgAEXBcsPJR/fEFBH4A7pCJsCZQODJesALRUhABcimwhDYwBfj9hTBS7LCMdqbCN0A2cU52ERcweRDlcHpxwzFb8c4XDIXguGCCijrwlbAXUJmQFfBOMICTVbjKAgQWdTi1gYmyBhQT9d/AIxDGUVn0S9h3gCiw9rEhsBNQFzBzkNAQJ3Ee0RaxCVCOuGBDW1M/g6JQRPIYMgEQonA09szgsnJvkM+GkBoxJiAww0PXfuZ6tgtiQX/QcZMsVBYCHxC5JPzQycGsEYQlQuGeQHvwPzGvMn6kFXBf8DowMTOk0z7gS9C2kIiwk/AEkOoxcH1xhqCnGM0AExiwG3mQNXkYMCb48GNwcLAGcLhwV55QAdAqcIowAFAM8DVwA5Aq0HnQAZAIVBAT0DJy8BIeUCjwOTCDHLAZUvAfMpBBvDDBUA9zduSgLDsQKAamaiBd1YAo4CSTUBTSUEBU5HUQOvceEA2wBLBhPfRwEVq0rLGuNDAd9vKwDHAPsABTUHBUEBzQHzbQC3AV8LMQmis7UBTekpAIMAFWsB1wKJAN0ANQB/8QFTAE0FWfkF0wJPSQERMRgrV2EBuwMfATMBDQB5BsuNpckHHwRtB9MCEBsV4QLvLge1AQMi3xPNQsUCvd5VoWACZIECYkJbTa9bNyACofcCaJgCZgkCn4Q4GwsCZjsCZiYEbgR/A38TA36SOQY5dxc5gjojIwJsHQIyNjgKAm3HAm2u74ozZ0UrAWcA3gDhAEoFB5gMjQD+C8IADbUCdy8CdqI/AnlLQwJ4uh1c20WuRtcCfD8CesgCfQkCfPAFWQUgSABIfWMkAoFtAoAAAoAFAn+uSVhKWxUXSswC0QEC0MxLJwOITwOH5kTFkTIC8qFdAwMDrkvOTC0lA89NTE2vAos/AorYwRsHHUNnBbcCjjcCjlxAl4ECjtkCjlx4UbRTNQpS1FSFApP7ApMMAOkAHFUeVa9V0AYsGymVhjLheGZFOzkCl58C77JYIagAWSUClo8ClnycAKlZrFoJgU0AOwKWtQKWTlxEXNECmcsCmWRcyl0HGQKcmznCOp0CnBYCn5sCnriKAB0PMSoPAp3xAp6SALU9YTRh7wKe0wKgbgGpAp6fHwKeTqVjyGQnJSsCJ68CJn4CoPsCoEwCot0CocQCpi8Cpc4Cp/8AfQKn8mh8aLEAA0lqHGrRAqzjAqyuAq1nAq0CAlcdAlXcArHh1wMfTmyXArK9DQKy6Bds4G1jbUhfAyXNArZcOz9ukAMpRQK4XgK5RxUCuSp3cDZw4QK9GQK72nCWAzIRAr6IcgIDM3ECvhpzInNPAsPLAsMEc4J0SzVFdOADPKcDPJoDPb8CxXwCxkcCxhCJAshpUQLIRALJTwLJLgJknQLd0nh5YXiueSVL0AMYo2cCAmH0GfOVJHsLXpJeuxECz2sCz2wvS1PS8xOfAMatAs9zASnqA04SfksFAtwnAtuKAtJPA1JcA1NfAQEDVYyAiT8AyxbtYEWCHILTgs6DjQLaxwLZ3oQQhEmnPAOGpQAvA2QOhnFZ+QBVAt9lAt64c3cC4i/tFAHzMCcB9JsB8tKHAuvzAulweQLq+QLq5AD5RwG5Au6JAuuclqqXAwLuPwOF4Jh5cOBxoQLzAwBpA44WmZMC9xMDkW4DkocC95gC+dkC+GaaHJqruzebHgOdgwL++gEbADmfHJ+zAwWNA6ZqA6bZANHFAwZqoYiiBQkDDEkCwAA/AwDhQRdTARHzA2sHl2cFAJMtK7evvdsBiZkUfxEEOQH7KQUhDp0JnwCS/SlXxQL3AZ0AtwW5AG8LbUEuFCaNLgFDAYD8AbUmAHUDDgRtACwCFgyhAAAKAj0CagPdA34EkQEgRQUhfAoABQBEABMANhICdwEABdUDa+8KxQIA9wqfJ7+xt+UBkSFBQgHpFH8RNMCJAAQAGwBaAkUChIsABjpTOpSNbQC4Oo860ACNOME63AClAOgAywE6gTo7Ofw5+Tt2iTpbO56JOm85GAFWATMBbAUvNV01njWtNWY1dTW2NcU1gjWRNdI14TWeNa017jX9NbI1wTYCNhE1xjXVNhY2JzXeNe02LjY9Ni41LSE2OjY9Njw2yTcIBJA8VzY4Nt03IDcPNsogN4k3MAoEsDxnNiQ3GTdsOo03IULUQwdC4EMLHA8PCZsobShRVQYA6X8A6bABFCnXAukBowC9BbcAbwNzBL8MDAMMAQgDAAkKCwsLCQoGBAVVBI/DvwDz9b29kaUCb0QtsRTNLt4eGBcSHAMZFhYZEhYEARAEBUEcQRxBHEEcQRxBHEEaQRxBHEFCSTxBPElISUhBNkM2QTYbNklISVmBVIgBFLWZAu0BhQCjBcEAbykBvwGJAaQcEZ0ePCklMAAhMvAIMAL54gC7Bm8EescjzQMpARQpKgDUABavAj626xQAJP0A3etzuf4NNRA7efy2Z9NQrCnC0OSyANz5BBIbJ5IFDR6miIavYS6tprjjmuKebxm5C74Q225X1pkaYYPb6f1DK4k3xMEBb9S2WMjEibTNWhsRJIA+vwNVEiXTE5iXs/wezV66oFLfp9NZGYW+Gk19J2+bCT6Ye2w6LDYdgzKMUabk595eLBCXANz9HUpWbATq9vqXVx9XDg+Pc9Xp4+bsS005SVM/BJBM4687WUuf+Uj9dEi8aDNaPxtpbDxcG1THTImUMZq4UCaaNYpsVqraNyKLJXDYsFZ/5jl7bLRtO88t7P3xZaAxhb5OdPMXqsSkp1WCieG8jXm1U99+blvLlXzPCS+M93VnJCiK+09LfaSaBAVBomyDgJua8dfUzR7ga34Iv
// https://unicode.org/reports/tr15/
// for reference implementation
// see: /derive/nf.js
// algorithmic hangul
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf (page 144)
const S0 = 0xAC00;
const L0 = 0x1100;
const V0 = 0x1161;
const T0 = 0x11A7;
const L_COUNT = 19;
const V_COUNT = 21;
const T_COUNT = 28;
const N_COUNT = V_COUNT * T_COUNT;
const S_COUNT = L_COUNT * N_COUNT;
const S1 = S0 + S_COUNT;
const L1 = L0 + L_COUNT;
const V1 = V0 + V_COUNT;
const T1 = T0 + T_COUNT;
function unpack_cc(packed) {
return (packed >> 24) & 0xFF;
}
function unpack_cp(packed) {
return packed & 0xFFFFFF;
}
let SHIFTED_RANK, EXCLUSIONS, DECOMP, RECOMP;
function init$1() {
//console.time('nf');
let r = read_compressed_payload(COMPRESSED);
SHIFTED_RANK = new Map(read_sorted_arrays(r).flatMap((v, i) => v.map(x => [x, (i+1) << 24]))); // pre-shifted
EXCLUSIONS = new Set(read_sorted(r));
DECOMP = new Map();
RECOMP = new Map();
for (let [cp, cps] of read_mapped(r)) {
if (!EXCLUSIONS.has(cp) && cps.length == 2) {
let [a, b] = cps;
let bucket = RECOMP.get(a);
if (!bucket) {
bucket = new Map();
RECOMP.set(a, bucket);
}
bucket.set(b, cp);
}
DECOMP.set(cp, cps.reverse()); // stored reversed
}
//console.timeEnd('nf');
// 20230905: 11ms
}
function is_hangul(cp) {
return cp >= S0 && cp < S1;
}
function compose_pair(a, b) {
if (a >= L0 && a < L1 && b >= V0 && b < V1) {
return S0 + (a - L0) * N_COUNT + (b - V0) * T_COUNT;
} else if (is_hangul(a) && b > T0 && b < T1 && (a - S0) % T_COUNT == 0) {
return a + (b - T0);
} else {
let recomp = RECOMP.get(a);
if (recomp) {
recomp = recomp.get(b);
if (recomp) {
return recomp;
}
}
return -1;
}
}
function decomposed(cps) {
if (!SHIFTED_RANK) init$1();
let ret = [];
let buf = [];
let check_order = false;
function add(cp) {
let cc = SHIFTED_RANK.get(cp);
if (cc) {
check_order = true;
cp |= cc;
}
ret.push(cp);
}
for (let cp of cps) {
while (true) {
if (cp < 0x80) {
ret.push(cp);
} else if (is_hangul(cp)) {
let s_index = cp - S0;
let l_index = s_index / N_COUNT | 0;
let v_index = (s_index % N_COUNT) / T_COUNT | 0;
let t_index = s_index % T_COUNT;
add(L0 + l_index);
add(V0 + v_index);
if (t_index > 0) add(T0 + t_index);
} else {
let mapped = DECOMP.get(cp);
if (mapped) {
buf.push(...mapped);
} else {
add(cp);
}
}
if (!buf.length) break;
cp = buf.pop();
}
}
if (check_order && ret.length > 1) {
let prev_cc = unpack_cc(ret[0]);
for (let i = 1; i < ret.length; i++) {
let cc = unpack_cc(ret[i]);
if (cc == 0 || prev_cc <= cc) {
prev_cc = cc;
continue;
}
let j = i-1;
while (true) {
let tmp = ret[j+1];
ret[j+1] = ret[j];
ret[j] = tmp;
if (!j) break;
prev_cc = unpack_cc(ret[--j]);
if (prev_cc <= cc) break;
}
prev_cc = unpack_cc(ret[i]);
}
}
return ret;
}
function composed_from_decomposed(v) {
let ret = [];
let stack = [];
let prev_cp = -1;
let prev_cc = 0;
for (let packed of v) {
let cc = unpack_cc(packed);
let cp = unpack_cp(packed);
if (prev_cp == -1) {
if (cc == 0) {
prev_cp = cp;
} else {
ret.push(cp);
}
} else if (prev_cc > 0 && prev_cc >= cc) {
if (cc == 0) {
ret.push(prev_cp, ...stack);
stack.length = 0;
prev_cp = cp;
} else {
stack.push(cp);
}
prev_cc = cc;
} else {
let composed = compose_pair(prev_cp, cp);
if (composed >= 0) {
prev_cp = composed;
} else if (prev_cc == 0 && cc == 0) {
ret.push(prev_cp);
prev_cp = cp;
} else {
stack.push(cp);
prev_cc = cc;
}
}
}
if (prev_cp >= 0) {
ret.push(prev_cp, ...stack);
}
return ret;
}
// note: cps can be iterable
function nfd(cps) {
return decomposed(cps).map(unpack_cp);
}
function nfc(cps) {
return composed_from_decomposed(decomposed(cps));
}
const HYPHEN = 0x2D;
const STOP = 0x2E;
const STOP_CH = '.';
const FE0F = 0xFE0F;
const UNIQUE_PH = 1;
// 20230913: replace [...v] with Array_from(v) to avoid large spreads
const Array_from = x => Array.from(x); // Array.from.bind(Array);
function group_has_cp(g, cp) {
// 20230913: keep primary and secondary distinct instead of creating valid union
return g.P.has(cp) || g.Q.has(cp);
}
class Emoji extends Array {
get is_emoji() { return true; } // free tagging system
}
let MAPPED, IGNORED, CM, NSM, ESCAPE, NFC_CHECK, GROUPS, WHOLE_VALID, WHOLE_MAP, VALID, EMOJI_LIST, EMOJI_ROOT;
function init() {
if (MAPPED) return;
let r = read_compressed_payload(COMPRESSED$1);
const read_sorted_array = () => read_sorted(r);
const read_sorted_set = () => new Set(read_sorted_array());
const set_add_many = (set, v) => v.forEach(x => set.add(x));
MAPPED = new Map(read_mapped(r));
IGNORED = read_sorted_set(); // ignored characters are not valid, so just read raw codepoints
/*
// direct include from payload is smaller than the decompression code
const FENCED = new Map(read_array_while(() => {
let cp = r();
if (cp) return [cp, read_str(r())];
}));
*/
// 20230217: we still need all CM for proper error formatting
// but norm only needs NSM subset that are potentially-valid
CM = read_sorted_array();
NSM = new Set(read_sorted_array().map(i => CM[i]));
CM = new Set(CM);
ESCAPE = read_sorted_set(); // characters that should not be printed
NFC_CHECK = read_sorted_set(); // only needed to illustrate ens_tokenize() transformations
let chunks = read_sorted_arrays(r);
let unrestricted = r();
//const read_chunked = () => new Set(read_sorted_array().flatMap(i => chunks[i]).concat(read_sorted_array()));
const read_chunked = () => {
// 20230921: build set in parts, 2x faster
let set = new Set();
read_sorted_array().forEach(i => set_add_many(set, chunks[i]));
set_add_many(set, read_sorted_array());
return set;
};
GROUPS = read_array_while(i => {
// minifier property mangling seems unsafe
// so these are manually renamed to single chars
let N = read_array_while(r).map(x => x+0x60);
if (N.length) {
let R = i >= unrestricted; // unrestricted then restricted
N[0] -= 32; // capitalize
N = str_from_cps(N);
if (R) N=`Restricted[${N}]`;
let P = read_chunked(); // primary
let Q = read_chunked(); // secondary
let M = !r(); // not-whitelisted, check for NSM
// *** this code currently isn't needed ***
/*
let V = [...P, ...Q].sort((a, b) => a-b); // derive: sorted valid
let M = r()-1; // number of combining mark
if (M < 0) { // whitelisted
M = new Map(read_array_while(() => {
let i = r();
if (i) return [V[i-1], read_array_while(() => {
let v = read_array_while(r);
if (v.length) return v.map(x => x-1);
})];
}));
}*/
return {N, P, Q, M, R};
}
});
// decode compressed wholes
WHOLE_VALID = read_sorted_set();
WHOLE_MAP = new Map();
let wholes = read_sorted_array().concat(Array_from(WHOLE_VALID)).sort((a, b) => a-b); // must be sorted
wholes.forEach((cp, i) => {
let d = r();
let w = wholes[i] = d ? wholes[i-d] : {V: [], M: new Map()};
w.V.push(cp); // add to member set
if (!WHOLE_VALID.has(cp)) {
WHOLE_MAP.set(cp, w); // register with whole map
}
});
// compute confusable-extent complements
// usage: WHOLE_MAP.get(cp).M.get(cp) = complement set
for (let {V, M} of new Set(WHOLE_MAP.values())) {
// connect all groups that have each whole character
let recs = [];
for (let cp of V) {
let gs = GROUPS.filter(g => group_has_cp(g, cp));
let rec = recs.find(({G}) => gs.some(g => G.has(g)));
if (!rec) {
rec = {G: new Set(), V: []};
recs.push(rec);
}
rec.V.push(cp);
set_add_many(rec.G, gs);
}
// per character cache groups which are not a member of the extent
let union = recs.flatMap(x => Array_from(x.G)); // all of the groups used by this whole
for (let {G, V} of recs) {
let complement = new Set(union.filter(g => !G.has(g))); // groups not covered by the extent
for (let cp of V) {
M.set(cp, complement); // this is the same reference
}
}
}
// compute valid set
// 20230924: VALID was union but can be re-used
VALID = new Set(); // exists in 1+ groups
let multi = new Set(); // exists in 2+ groups
const add_to_union = cp => VALID.has(cp) ? multi.add(cp) : VALID.add(cp);
for (let g of GROUPS) {
for (let cp of g.P) add_to_union(cp);
for (let cp of g.Q) add_to_union(cp);
}
// dual purpose WHOLE_MAP: return placeholder if unique non-confusable
for (let cp of VALID) {
if (!WHOLE_MAP.has(cp) && !multi.has(cp)) {
WHOLE_MAP.set(cp, UNIQUE_PH);
}
}
// add all decomposed parts
// see derive: "Valid is Closed (via Brute-force)"
set_add_many(VALID, nfd(VALID));
// decode emoji
// 20230719: emoji are now fully-expanded to avoid quirk logic
EMOJI_LIST = read_trie(r).map(v => Emoji.from(v)).sort(compare_arrays);
EMOJI_ROOT = new Map(); // this has approx 7K nodes (2+ per emoji)
for (let cps of EMOJI_LIST) {
// 20230719: change to *slightly* stricter algorithm which disallows
// insertion of misplaced FE0F in emoji sequences (matching ENSIP-15)
// example: beautified [A B] (eg. flag emoji)
// before: allow: [A FE0F B], error: [A FE0F FE0F B]
// after: error: both
// note: this code now matches ENSNormalize.{cs,java} logic
let prev = [EMOJI_ROOT];
for (let cp of cps) {
let next = prev.map(node => {
let child = node.get(cp);
if (!child) {
// should this be object?
// (most have 1-2 items, few have many)
// 20230719: no, v8 default map is 4?
child = new Map();
node.set(cp, child);
}
return child;
});
if (cp === FE0F) {
prev.push(...next); // less than 20 elements
} else {
prev = next;
}
}
for (let x of prev) {
x.V = cps;
}
}
}
// if escaped: {HEX}
// else: "x" {HEX}
function quoted_cp(cp) {
return (should_escape(cp) ? '' : `${bidi_qq(safe_str_from_cps([cp]))} `) + quote_cp(cp);
}
// 20230211: some messages can be mixed-directional and result in spillover
// use 200E after a quoted string to force the remainder of a string from
// acquring the direction of the quote
// https://www.w3.org/International/questions/qa-bidi-unicode-controls#exceptions
function bidi_qq(s) {
return `"${s}"\u200E`; // strong LTR
}
function check_label_extension(cps) {
if (cps.length >= 4 && cps[2] == HYPHEN && cps[3] == HYPHEN) {
throw new Error(`invalid label extension: "${str_from_cps(cps.slice(0, 4))}"`); // this can only be ascii so cant be bidi
}
}
function check_leading_underscore(cps) {
const UNDERSCORE = 0x5F;
for (let i = cps.lastIndexOf(UNDERSCORE); i > 0; ) {
if (cps[--i] !== UNDERSCORE) {
throw new Error('underscore allowed only at start');
}
}
}
// check that a fenced cp is not leading, trailing, or touching another fenced cp
function check_fenced(cps) {
let cp = cps[0];
let prev = FENCED.get(cp);
if (prev) throw error_placement(`leading ${prev}`);
let n = cps.length;
let last = -1; // prevents trailing from throwing
for (let i = 1; i < n; i++) {
cp = cps[i];
let match = FENCED.get(cp);
if (match) {
// since cps[0] isn't fenced, cps[1] cannot throw
if (last == i) throw error_placement(`${prev} + ${match}`);
last = i + 1;
prev = match;
}
}
if (last == n) throw error_placement(`trailing ${prev}`);
}
// create a safe to print string
// invisibles are escaped
// leading cm uses placeholder
// if cps exceed max, middle truncate with ellipsis
// quoter(cp) => string, eg. 3000 => "{3000}"
// note: in html, you'd call this function then replace [<>&] with entities
function safe_str_from_cps(cps, max = Infinity, quoter = quote_cp) {
//if (Number.isInteger(cps)) cps = [cps];
//if (!Array.isArray(cps)) throw new TypeError(`expected codepoints`);
let buf = [];
if (is_combining_mark(cps[0])) buf.push('◌');
if (cps.length > max) {
max >>= 1;
cps = [...cps.slice(0, max), 0x2026, ...cps.slice(-max)];
}
let prev = 0;
let n = cps.length;
for (let i = 0; i < n; i++) {
let cp = cps[i];
if (should_escape(cp)) {
buf.push(str_from_cps(cps.slice(prev, i)));
buf.push(quoter(cp));
prev = i + 1;
}
}
buf.push(str_from_cps(cps.slice(prev, n)));
return buf.join('');
}
// note: set(s) cannot be exposed because they can be modified
// note: Object.freeze() doesn't work
function is_combining_mark(cp) {
init();
return CM.has(cp);
}
function should_escape(cp) {
init();
return ESCAPE.has(cp);
}
// return all supported emoji as fully-qualified emoji
// ordered by length then lexicographic
function ens_emoji() {
init();
return EMOJI_LIST.map(x => x.slice()); // emoji are exposed so copy
}
function ens_normalize_fragment(frag, decompose) {
init();
let nf = decompose ? nfd : nfc;
return frag.split(STOP_CH).map(label => str_from_cps(tokens_from_str(explode_cp(label), nf, filter_fe0f).flat())).join(STOP_CH);
}
function ens_normalize(name) {
return flatten(split(name, nfc, filter_fe0f));
}
function ens_beautify(name) {
let labels = split(name, nfc, x => x); // emoji not exposed
for (let {type, output, error} of labels) {
if (error) break; // flatten will throw
// replace leading/trailing hyphen
// 20230121: consider beautifing all or leading/trailing hyphen to unicode variant
// not exactly the same in every font, but very similar: "-" vs ""
/*
const UNICODE_HYPHEN = 0x2010;
// maybe this should replace all for visual consistancy?
// `node tools/reg-count.js regex ^-\{2,\}` => 592
//for (let i = 0; i < output.length; i++) if (output[i] == 0x2D) output[i] = 0x2010;
if (output[0] == HYPHEN) output[0] = UNICODE_HYPHEN;
let end = output.length-1;
if (output[end] == HYPHEN) output[end] = UNICODE_HYPHEN;
*/
// 20230123: WHATWG URL uses "CheckHyphens" false
// https://url.spec.whatwg.org/#idna
// update ethereum symbol
// ξ => Ξ if not greek
if (type !== 'Greek') array_replace(output, 0x3BE, 0x39E);
// 20221213: fixes bidi subdomain issue, but breaks invariant (200E is disallowed)
// could be fixed with special case for: 2D (.) + 200E (LTR)
// https://discuss.ens.domains/t/bidi-label-ordering-spoof/15824
//output.splice(0, 0, 0x200E);
}
return flatten(labels);
}
function array_replace(v, a, b) {
let prev = 0;
while (true) {
let next = v.indexOf(a, prev);
if (next < 0) break;
v[next] = b;
prev = next + 1;
}
}
function ens_split(name, preserve_emoji) {
return split(name, nfc, preserve_emoji ? x => x.slice() : filter_fe0f); // emoji are exposed so copy
}
function split(name, nf, ef) {
if (!name) return []; // 20230719: empty name allowance
init();
let offset = 0;
// https://unicode.org/reports/tr46/#Validity_Criteria
// 4.) "The label must not contain a U+002E ( . ) FULL STOP."
return name.split(STOP_CH).map(label => {
let input = explode_cp(label);
let info = {
input,
offset, // codepoint, not substring!
};
offset += input.length + 1; // + stop
try {
// 1.) "The label must be in Unicode Normalization Form NFC"
let tokens = info.tokens = tokens_from_str(input, nf, ef);
let token_count = tokens.length;
let type;
if (!token_count) { // the label was effectively empty (could of had ignored characters)
//norm = [];
//type = 'None'; // use this instead of next match, "ASCII"
// 20230120: change to strict
// https://discuss.ens.domains/t/ens-name-normalization-2nd/14564/59
throw new Error(`empty label`);
}
let norm = info.output = tokens.flat();
check_leading_underscore(norm);
let emoji = info.emoji = token_count > 1 || tokens[0].is_emoji; // same as: tokens.some(x => x.is_emoji);
if (!emoji && norm.every(cp => cp < 0x80)) { // special case for ascii
// 20230123: matches matches WHATWG, see note 3.3
check_label_extension(norm); // only needed for ascii
// cant have fenced
// cant have cm
// cant have wholes
// see derive: "Fastpath ASCII"
type = 'ASCII';
} else {
let chars = tokens.flatMap(x => x.is_emoji ? [] : x); // all of the nfc tokens concat together
if (!chars.length) { // theres no text, just emoji
type = 'Emoji';
} else {
// 5.) "The label must not begin with a combining mark, that is: General_Category=Mark."
if (CM.has(norm[0])) throw error_placement('leading combining mark');
for (let i = 1; i < token_count; i++) { // we've already checked the first token
let cps = tokens[i];
if (!cps.is_emoji && CM.has(cps[0])) { // every text token has emoji neighbors, eg. EtEEEtEt...
// bidi_qq() not needed since emoji is LTR and cps is a CM
throw error_placement(`emoji + combining mark: "${str_from_cps(tokens[i-1])} + ${safe_str_from_cps([cps[0]])}"`);
}
}
check_fenced(norm);
let unique = Array_from(new Set(chars));
let [g] = determine_group(unique); // take the first match
// see derive: "Matching Groups have Same CM Style"
// alternative: could form a hybrid type: Latin/Japanese/...
check_group(g, chars); // need text in order
check_whole(g, unique); // only need unique text (order would be required for multiple-char confusables)
type = g.N;
// 20230121: consider exposing restricted flag
// it's simpler to just check for 'Restricted'
// or even better: type.endsWith(']')
//if (g.R) info.restricted = true;
}
}
info.type = type;
} catch (err) {
info.error = err; // use full error object
}
return info;
});
}
function check_whole(group, unique) {
let maker;
let shared = [];
for (let cp of unique) {
let whole = WHOLE_MAP.get(cp);
if (whole === UNIQUE_PH) return; // unique, non-confusable
if (whole) {
let set = whole.M.get(cp); // groups which have a character that look-like this character
maker = maker ? maker.filter(g => set.has(g)) : Array_from(set);
if (!maker.length) return; // confusable intersection is empty
} else {
shared.push(cp);
}
}
if (maker) {
// we have 1+ confusable
// check if any of the remaining groups
// contain the shared characters too
for (let g of maker) {
if (shared.every(cp => group_has_cp(g, cp))) {
throw new Error(`whole-script confusable: ${group.N}/${g.N}`);
}
}
}
}
// assumption: unique.size > 0
// returns list of matching groups
function determine_group(unique) {
let groups = GROUPS;
for (let cp of unique) {
// note: we need to dodge CM that are whitelisted
// but that code isn't currently necessary
let gs = groups.filter(g => group_has_cp(g, cp));
if (!gs.length) {
if (!GROUPS.some(g => group_has_cp(g, cp))) {
// the character was composed of valid parts
// but it's NFC form is invalid
// 20230716: change to more exact statement, see: ENSNormalize.{cs,java}
// note: this doesn't have to be a composition
// 20230720: change to full check
throw error_disallowed(cp); // this should be rare
} else {
// there is no group that contains all these characters
// throw using the highest priority group that matched
// https://www.unicode.org/reports/tr39/#mixed_script_confusables
throw error_group_member(groups[0], cp);
}
}
groups = gs;
if (gs.length == 1) break; // there is only one group left
}
// there are at least 1 group(s) with all of these characters
return groups;
}
// throw on first error
function flatten(split) {
return split.map(({input, error, output}) => {
if (error) {
// don't print label again if just a single label
let msg = error.message;
// bidi_qq() only necessary if msg is digits
throw new Error(split.length == 1 ? msg : `Invalid label ${bidi_qq(safe_str_from_cps(input, 63))}: ${msg}`);
}
return str_from_cps(output);
}).join(STOP_CH);
}
function error_disallowed(cp) {
// TODO: add cp to error?
return new Error(`disallowed character: ${quoted_cp(cp)}`);
}
function error_group_member(g, cp) {
let quoted = quoted_cp(cp);
let gg = GROUPS.find(g => g.P.has(cp)); // only check primary
if (gg) {
quoted = `${gg.N} ${quoted}`;
}
return new Error(`illegal mixture: ${g.N} + ${quoted}`);
}
function error_placement(where) {
return new Error(`illegal placement: ${where}`);
}
// assumption: cps.length > 0
// assumption: cps[0] isn't a CM
// assumption: the previous character isn't an emoji
function check_group(g, cps) {
for (let cp of cps) {
if (!group_has_cp(g, cp)) {
// for whitelisted scripts, this will throw illegal mixture on invalid cm, eg. "e{300}{300}"
// at the moment, it's unnecessary to introduce an extra error type
// until there exists a whitelisted multi-character
// eg. if (M < 0 && is_combining_mark(cp)) { ... }
// there are 3 cases:
// 1. illegal cm for wrong group => mixture error
// 2. illegal cm for same group => cm error
// requires set of whitelist cm per group:
// eg. new Set([...g.P, ...g.Q].flatMap(nfc).filter(cp => CM.has(cp)))
// 3. wrong group => mixture error
throw error_group_member(g, cp);
}
}
//if (M >= 0) { // we have a known fixed cm count
if (g.M) { // we need to check for NSM
let decomposed = nfd(cps);
for (let i = 1, e = decomposed.length; i < e; i++) { // see: assumption
// 20230210: bugfix: using cps instead of decomposed h/t Carbon225
/*
if (CM.has(decomposed[i])) {
let j = i + 1;
while (j < e && CM.has(decomposed[j])) j++;
if (j - i > M) {
throw new Error(`too many combining marks: ${g.N} ${bidi_qq(str_from_cps(decomposed.slice(i-1, j)))} (${j-i}/${M})`);
}
i = j;
}
*/
// 20230217: switch to NSM counting
// https://www.unicode.org/reports/tr39/#Optional_Detection
if (NSM.has(decomposed[i])) {
let j = i + 1;
for (let cp; j < e && NSM.has(cp = decomposed[j]); j++) {
// a. Forbid sequences of the same nonspacing mark.
for (let k = i; k < j; k++) { // O(n^2) but n < 100
if (decomposed[k] == cp) {
throw new Error(`duplicate non-spacing marks: ${quoted_cp(cp)}`);
}
}
}
// parse to end so we have full nsm count
// b. Forbid sequences of more than 4 nonspacing marks (gc=Mn or gc=Me).
if (j - i > NSM_MAX) {
// note: this slice starts with a base char or spacing-mark cm
throw new Error(`excessive non-spacing marks: ${bidi_qq(safe_str_from_cps(decomposed.slice(i-1, j)))} (${j-i}/${NSM_MAX})`);
}
i = j;
}
}
}
// *** this code currently isn't needed ***
/*
let cm_whitelist = M instanceof Map;
for (let i = 0, e = cps.length; i < e; ) {
let cp = cps[i++];
let seqs = cm_whitelist && M.get(cp);
if (seqs) {
// list of codepoints that can follow
// if this exists, this will always be 1+
let j = i;
while (j < e && CM.has(cps[j])) j++;
let cms = cps.slice(i, j);
let match = seqs.find(seq => !compare_arrays(seq, cms));
if (!match) throw new Error(`disallowed combining mark sequence: "${safe_str_from_cps([cp, ...cms])}"`);
i = j;
} else if (!V.has(cp)) {
// https://www.unicode.org/reports/tr39/#mixed_script_confusables
let quoted = quoted_cp(cp);
for (let cp of cps) {
let u = UNIQUE.get(cp);
if (u && u !== g) {
// if both scripts are restricted this error is confusing
// because we don't differentiate RestrictedA from RestrictedB
if (!u.R) quoted = `${quoted} is ${u.N}`;
break;
}
}
throw new Error(`disallowed ${g.N} character: ${quoted}`);
//throw new Error(`disallowed character: ${quoted} (expected ${g.N})`);
//throw new Error(`${g.N} does not allow: ${quoted}`);
}
}
if (!cm_whitelist) {
let decomposed = nfd(cps);
for (let i = 1, e = decomposed.length; i < e; i++) { // we know it can't be cm leading
if (CM.has(decomposed[i])) {
let j = i + 1;
while (j < e && CM.has(decomposed[j])) j++;
if (j - i > M) {
throw new Error(`too many combining marks: "${str_from_cps(decomposed.slice(i-1, j))}" (${j-i}/${M})`);
}
i = j;
}
}
}
*/
}
// given a list of codepoints
// returns a list of lists, where emoji are a fully-qualified (as Array subclass)
// eg. explode_cp("abc💩d") => [[61, 62, 63], Emoji[1F4A9, FE0F], [64]]
// 20230818: rename for 'process' name collision h/t Javarome
// https://github.com/adraffy/ens-normalize.js/issues/23
function tokens_from_str(input, nf, ef) {
let ret = [];
let chars = [];
input = input.slice().reverse(); // flip so we can pop
while (input.length) {
let emoji = consume_emoji_reversed(input);
if (emoji) {
if (chars.length) {
ret.push(nf(chars));
chars = [];
}
ret.push(ef(emoji));
} else {
let cp = input.pop();
if (VALID.has(cp)) {
chars.push(cp);
} else {
let cps = MAPPED.get(cp);
if (cps) {
chars.push(...cps); // less than 10 elements
} else if (!IGNORED.has(cp)) {
// 20230912: unicode 15.1 changed the order of processing such that
// disallowed parts are only rejected after NFC
// https://unicode.org/reports/tr46/#Validity_Criteria
// this doesn't impact normalization as of today
// technically, this error can be removed as the group logic will apply similar logic
// however the error type might be less clear
throw error_disallowed(cp);
}
}
}
}
if (chars.length) {
ret.push(nf(chars));
}
return ret;
}
function filter_fe0f(cps) {
return cps.filter(cp => cp != FE0F);
}
// given array of codepoints
// returns the longest valid emoji sequence (or undefined if no match)
// *MUTATES* the supplied array
// disallows interleaved ignored characters
// fills (optional) eaten array with matched codepoints
function consume_emoji_reversed(cps, eaten) {
let node = EMOJI_ROOT;
let emoji;
let pos = cps.length;
while (pos) {
node = node.get(cps[--pos]);
if (!node) break;
let {V} = node;
if (V) { // this is a valid emoji (so far)
emoji = V;
if (eaten) eaten.push(...cps.slice(pos).reverse()); // (optional) copy input, used for ens_tokenize()
cps.length = pos; // truncate
}
}
return emoji;
}
// ************************************************************
// tokenizer
const TY_VALID = 'valid';
const TY_MAPPED = 'mapped';
const TY_IGNORED = 'ignored';
const TY_DISALLOWED = 'disallowed';
const TY_EMOJI = 'emoji';
const TY_NFC = 'nfc';
const TY_STOP = 'stop';
function ens_tokenize(name, {
nf = true, // collapse unnormalized runs into a single token
} = {}) {
init();
let input = explode_cp(name).reverse();
let eaten = [];
let tokens = [];
while (input.length) {
let emoji = consume_emoji_reversed(input, eaten);
if (emoji) {
tokens.push({
type: TY_EMOJI,
emoji: emoji.slice(), // copy emoji
input: eaten,
cps: filter_fe0f(emoji)
});
eaten = []; // reset buffer
} else {
let cp = input.pop();
if (cp == STOP) {
tokens.push({type: TY_STOP, cp});
} else if (VALID.has(cp)) {
tokens.push({type: TY_VALID, cps: [cp]});
} else if (IGNORED.has(cp)) {
tokens.push({type: TY_IGNORED, cp});
} else {
let cps = MAPPED.get(cp);
if (cps) {
tokens.push({type: TY_MAPPED, cp, cps: cps.slice()});
} else {
tokens.push({type: TY_DISALLOWED, cp});
}
}
}
}
if (nf) {
for (let i = 0, start = -1; i < tokens.length; i++) {
let token = tokens[i];
if (is_valid_or_mapped(token.type)) {
if (requires_check(token.cps)) { // normalization might be needed
let end = i + 1;
for (let pos = end; pos < tokens.length; pos++) { // find adjacent text
let {type, cps} = tokens[pos];
if (is_valid_or_mapped(type)) {
if (!requires_check(cps)) break;
end = pos + 1;
} else if (type !== TY_IGNORED) { // || type !== TY_DISALLOWED) {
break;
}
}
if (start < 0) start = i;
let slice = tokens.slice(start, end);
let cps0 = slice.flatMap(x => is_valid_or_mapped(x.type) ? x.cps : []); // strip junk tokens
let cps = nfc(cps0);
if (compare_arrays(cps, cps0)) { // bundle into an nfc token
tokens.splice(start, end - start, {
type: TY_NFC,
input: cps0, // there are 3 states: tokens0 ==(process)=> input ==(nfc)=> tokens/cps
cps,
tokens0: collapse_valid_tokens(slice),
tokens: ens_tokenize(str_from_cps(cps), {nf: false})
});
i = start;
} else {
i = end - 1; // skip to end of slice
}
start = -1; // reset
} else {
start = i; // remember last
}
} else if (token.type !== TY_IGNORED) { // 20221024: is this correct?
start = -1; // reset
}
}
}
return collapse_valid_tokens(tokens);
}
function is_valid_or_mapped(type) {
return type == TY_VALID || type == TY_MAPPED;
}
function requires_check(cps) {
return cps.some(cp => NFC_CHECK.has(cp));
}
function collapse_valid_tokens(tokens) {
for (let i = 0; i < tokens.length; i++) {
if (tokens[i].type == TY_VALID) {
let j = i + 1;
while (j < tokens.length && tokens[j].type == TY_VALID) j++;
tokens.splice(i, j - i, {type: TY_VALID, cps: tokens.slice(i, j).flatMap(x => x.cps)});
}
}
return tokens;
}
function hex_seq(cps) {
return cps.map(hex_cp).join(' ');
}
function create_arrow_span() {
let span = document.createElement('span');
span.classList.add('arrow');
span.innerHTML = '➔'; // '→';
return span;
}
function span_from_cp(cp, in_emoji) {
let span = document.createElement('span');
if (cp == 0x200D) {
span.classList.add('mod', 'zwj');
span.innerText = 'ZWJ';
} else if (cp == 0x200C) {
span.classList.add('mod', 'zwj');
span.innerText = 'ZWNJ';
} else if (cp == 0xFE0F) {
span.classList.add('mod', 'dropped', 'style');
span.innerText = 'FE0F';
} else if (cp == 0x20E3) {
span.classList.add('mod', 'keycap');
span.innerText = 'Keycap';
} else if (cp >= 0xE0021 && cp <= 0xE007E) { // printable ascii tag
span.classList.add('mod', 'tag');
span.innerText = String.fromCodePoint(cp - 0xE0000);
} else if (cp == 0xE007F) { // tag end
span.classList.add('mod', 'tag', 'end');
span.innerText = '⌫'; // 🏷️
} else if (!in_emoji && should_escape(cp)) {
span.classList.add('code');
span.innerText = hex_cp(cp);
} else {
span.innerText = safe_str_from_cps([cp]);
}
return span;
}
// idea
//export function dom_from_token(token) {
function format_tooltip(obj, extra) {
let lines = Object.entries(obj).map(([k, v]) => `${k}: ${v}`);
if (Array.isArray(extra)) lines.push(...extra);
return lines.join('\n');
}
function isolated_safe(cps) {
return cps.map(cp => safe_str_from_cps([cp])).join('\u{200B}')
}
// TODO: these options are shit, fix this
function dom_from_tokens(tokens, {
before = false,
tld_class = true,
components = false,
emoji_url = 'https://emojipedia.org/%s',
extra = () => {},
} = {}) {
let div = document.createElement('div');
div.classList.add('tokens');
/*
if (before) {
// dont use normalized form unless its simple
tokens = tokens.flatMap(token => {
if (token.type === 'nfc' && !token.tokens.every(t => t.type == 'valid')) {
return token.tokens;
} else {
return token;
}
});
}
*/
div.append(...tokens.map((token, i) => {
let el;
switch (token.type) {
case 'emoji': {
el = document.createElement(emoji_url ? 'a' : 'span');
if (emoji_url) el.href = emoji_url.replace('%s', String.fromCodePoint(...token.emoji));
let cps = before ? token.input : token.cps;
if (components) {
el.append(...cps.map(cp => span_from_cp(cp, true)));
} else {
el.innerText = String.fromCodePoint(...token.emoji); // use fully-qualified form
}
el.title = format_tooltip({
Type: 'Emoji',
Hex: hex_seq(cps),
Beautified: hex_seq(token.emoji),
}, extra(token.type, cps));
break;
}
case 'nfc': {
el = document.createElement('div');
// get the cps from the original tokens
let cps0 = token.tokens0.flatMap(t => t.type === 'valid' ? t.cps : t.cp); // this can only be mapped/ignored/valid
// break every valid token into individual characters
let lhs = dom_from_tokens(token.tokens0.flatMap(t => t.type === 'valid' ? t.cps.map(cp => ({type: 'valid', cps: [cp]})) : t), {components, before, emoji_url, extra});
lhs.title = format_tooltip({
Type: 'NFC (Unnormalized)',
Hex: hex_seq(cps0),
}, extra(token.type, cps0));
el.append(lhs);
if (!before) {
let rhs = dom_from_tokens(token.tokens, {components, emoji_url, extra});
rhs.title = format_tooltip({
Type: 'NFC (Normalized)',
Hex: hex_seq(token.cps),
}, extra(token.type, token.cps));
el.append(create_arrow_span(), rhs);
}
break;
}
case 'valid': {
el = document.createElement('span');
let form = safe_str_from_cps(token.cps);
if (tld_class && (tokens.length == 1 || (i === tokens.length-1 && tokens[i-1].type === 'stop')) && /[a-z]/.test(form)) {
// theres just 1 token/or we're the last token with a stop before us
//el.classList.add(form);
// 20230909: this triggered for stupid things
el.dataset.tld = form;
}
el.innerText = form;
el.title = format_tooltip({
Type: 'Valid',
Hex: hex_seq(token.cps),
}, extra(token.type, token.cps));
break;
}
case 'mapped': {
el = document.createElement('div');
let span_src = document.createElement('span');
span_src.classList.add('before');
span_src.innerText = safe_str_from_cps([token.cp]); // isolate ? isolated_safe([token.cp]) :
span_src.title = format_tooltip({
Type: 'Mapped (Match)',
Hex: hex_cp(token.cp),
}, extra(token.type, [token.cp]));
el.append(span_src);
if (!before) {
let span_dst = document.createElement('span');
span_dst.innerText = isolated_safe(token.cps); // safe_str_from_cps(token.cps);
span_dst.title = format_tooltip({
Type: 'Mapped (Replacement)',
Hex: hex_seq(token.cps),
}, extra(token.type, token.cps));
el.append(create_arrow_span(), span_dst);
}
break;
}
case 'stop':
case 'ignored':
case 'disallowed': {
el = span_from_cp(token.cp);
el.title = format_tooltip({
Type: token.type,
Hex: hex_cp(token.cp),
}, extra(token.type, [token.cp]));
break;
}
default: throw new TypeError(`unknown token type: ${token.type}`);
}
el.classList.add(token.type);
return el;
}));
return div;
}
function use_default_style() {
let style = document.createElement('style');
style.innerText = `
.tokens {
display: flex;
flex-wrap: wrap;
gap: 2px;
}
.tokens > * {
padding: 2px 4px;
display: flex;
align-items: center;
gap: 4px;
border-radius: 5px;
overflow: hidden;
}
.tokens a {
text-decoration: none;
}
.tokens a:hover {
border-color: #00f;
}
.tokens .valid {
background: #cfc;
border: 2px solid #0a0;
line-break: anywhere;
}
.tokens [data-tld="eth"].valid {
color: #fff;
background: #58f;
border-color: #58f;
}
.tokens [data-tld="art"].valid {
color: #fff;
background: #333;
border-color: #333;
}
.tokens [data-tld="box"].valid {
color: #fff;
background: #666;
border-color: #666;
}
.tokens [data-tld="com"].valid,
.tokens [data-tld="net"].valid,
.tokens [data-tld="org"].valid,
.tokens [data-tld="io"].valid,
.tokens [data-tld="cash"].valid,
.tokens [data-tld="xyz"].valid {
color: #fff;
background: #0a0;
border-color: #0a0;
}
.tokens .ignored {
color: #fff;
background: #aaa;
font-size: 75%;
font-family: monospace;
}
.tokens .disallowed {
background: #c00;
min-width: 5px;
min-height: 1em;
border-radius: 5px;
color: #fff;
}
.tokens .disallowed.code {
font-size: 75%;
background: #800;
}
.tokens .disallowed.mod {
border: 2px solid #800;
font-size: 80%;
}
.tokens .disallowed.mod.tag {
background: #f00;
color: #000;
}
.tokens .mapped {
display: flex;
border: 2px solid #66f;
background: #ccf;
}
.tokens .mapped span:first-child {
margin-bottom: -4px;
border-bottom: 4px solid #000;
text-align: center;
min-width: 0.5rem;
}
.tokens .stop {
font-weight: bold;
background: linear-gradient(#fff, #ff0);
padding-bottom: 0;
border: 1px solid #ccc;
}
.tokens .emoji {
border: 2px solid #0aa;
background: #cff;
color: #000;
}
.tokens .mod {
color: #fff;
}
.tokens * .mod {
font-size: 70%;
padding: 2px;
border-radius: 3px;
}
.tokens .emoji .mod {
background: #333;
}
.tokens .emoji .mod.zwj {
background: #0aa;
}
.tokens .emoji .mod.tag {
background: #0aa;
}
.tokens .emoji .mod.tag.end {
background: #066;
}
.tokens .emoji .mod.dropped {
background: #aaa;
}
.tokens .arrow {
color: rgba(0, 0, 0, 0.35);
user-select: none;
margin: 0 -2px;
}
.tokens .code {
font-family: monospace;
}
.tokens .nfc {
display: flex;
border: 2px solid #c80;
background: #fd8;
border-radius: 5px;
padding: 2px;
}`;
document.body.append(style);
}
// see: https://github.com/adraffy/ens-normalize.js#security
const derived = "2023-09-06T06:00:29.074Z";
const unicode = "15.1.0 (2023-09-06T02:58:19.261Z)";
const cldr = "43.1 (2023-09-03T21:58:22.687Z)";
const base64_ens_hash = "0565ed049b9cf1614bb9e11ba7d8ac6a6fb96c893253d890f7e2b2884b9ded32";
const base64_nf_hash = "a974b6f8541fc29d919bc85118af0a44015851fab5343f8679cb31be2bdb209e";
const spec_hash = "1f6d3bdb7a724fe3b91f6d73ab14defcb719e0f4ab79022089c940e7e9c56b9c";
const built = "2023-09-25T01:01:55.148Z";
const version = "1.10.1";
var includeVersions = /*#__PURE__*/Object.freeze({
__proto__: null,
base64_ens_hash: base64_ens_hash,
base64_nf_hash: base64_nf_hash,
built: built,
cldr: cldr,
derived: derived,
spec_hash: spec_hash,
unicode: unicode,
version: version
});
export { compare_arrays, dom_from_tokens, ens_beautify, ens_emoji, ens_normalize, ens_normalize_fragment, ens_split, ens_tokenize, explode_cp, hex_cp, is_combining_mark, nfc, nfd, quote_cp, random_choice, random_sample, run_tests, safe_str_from_cps, should_escape, str_from_cps, use_default_style, includeVersions as versions };