/** * Chat parser * Pokemon Showdown - http://pokemonshowdown.com/ * * Parses formate. * * @license MIT */ /* REGEXFREE SOURCE FOR LINKREGEX ( ( # When using http://, allow any domain https?:\/\/ [a-z0-9-]+ ( \. [a-z0-9-]+ )* | # When using www., expect at least one more dot www \. [a-z0-9-]+ ( \. [a-z0-9-]+ )+ | # Otherwise, allow any domain, but only if \b [a-z0-9-]+ ( \. [a-z0-9-]+ )* \. ( # followed either a common TLD... ( com? | org | net | edu | info | us | jp ) \b | # or any 2-3 letter TLD followed by a port or / [a-z]{2,3} (?= :[0-9] | / ) ) ) # possible custom port ( : [0-9]+ )? ( \/ ( # characters allowed inside URL paths ( [^\s()&<>] | & | " | # parentheses in URLs should be matched, so they're not confused # for parentheses around URLs \( ( [^\\s()<>&] | & )* \) )* # URLs usually don't end with punctuation, so don't allow # punctuation symbols that probably arent related to URL. ( [^\s()[\]{}\".,!?;:&<>*`^~\\] | # annoyingly, Wikipedia URLs often end in ) \( ( [^\s()<>&] | & )* \) ) )? )? | # email address [a-z0-9.]+ @ [a-z0-9-]+ ( \. [a-z0-9-]+ )* \. [a-z]{2,} ) (?! [^ ]*> ) */ export const linkRegex = /(?:(?:https?:\/\/[a-z0-9-]+(?:\.[a-z0-9-]+)*|www\.[a-z0-9-]+(?:\.[a-z0-9-]+)+|\b[a-z0-9-]+(?:\.[a-z0-9-]+)*\.(?:(?:com?|org|net|edu|info|us|jp)\b|[a-z]{2,3}(?=:[0-9]|\/)))(?::[0-9]+)?(?:\/(?:(?:[^\s()&<>]|&|"|\((?:[^\\s()<>&]|&)*\))*(?:[^\s()[\]{}".,!?;:&<>*`^~\\]|\((?:[^\s()<>&]|&)*\)))?)?|[a-z0-9.]+@[a-z0-9-]+(?:\.[a-z0-9-]+)*\.[a-z]{2,})(?![^ ]*>)/ig; type SpanType = '_' | '*' | '~' | '^' | '\\' | '|' | '<' | '[' | '`' | 'a' | 'spoiler' | '>' | '('; type FormatSpan = [SpanType, number]; class TextFormatter { readonly str: string; readonly buffers: string[]; readonly stack: FormatSpan[]; readonly isTrusted: boolean; readonly replaceLinebreaks: boolean; /** offset of str that's been parsed so far */ offset: number; constructor(str: string, isTrusted = false, replaceLinebreaks = false) { // escapeHTML, without escaping / str = `${str}` .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // filter links first str = str.replace(linkRegex, uri => { let fulluri; if (/^[a-z0-9.]+@/ig.test(uri)) { fulluri = 'mailto:' + uri; } else { fulluri = uri.replace(/^([a-z]*[^a-z:])/g, 'http://$1'); if (uri.substr(0, 24) === 'https://docs.google.com/' || uri.substr(0, 16) === 'docs.google.com/') { if (uri.startsWith('https')) uri = uri.slice(8); if (uri.substr(-12) === '?usp=sharing' || uri.substr(-12) === '&usp=sharing') uri = uri.slice(0, -12); if (uri.substr(-6) === '#gid=0') uri = uri.slice(0, -6); let slashIndex = uri.lastIndexOf('/'); if (uri.length - slashIndex > 18) slashIndex = uri.length; if (slashIndex - 4 > 19 + 3) { uri = uri.slice(0, 19) + '' + uri.slice(19, slashIndex - 4) + '' + uri.slice(slashIndex - 4); } } } return `${uri}`; }); // (links don't have any specific syntax, they're just a pattern, so we detect them in a separate pass) this.str = str; this.buffers = []; this.stack = []; this.isTrusted = isTrusted; this.replaceLinebreaks = this.isTrusted || replaceLinebreaks; this.offset = 0; } // debugAt(i=0, j=i+1) { console.log(`${this.slice(0, i)}[${this.slice(i, j)}]${this.slice(j, this.str.length)}`); } slice(start: number, end: number) { return this.str.slice(start, end); } at(start: number) { return this.str.charAt(start); } pushSpan(spanType: SpanType, start: number, end: number) { this.pushSlice(start); this.stack.push([spanType, this.buffers.length]); this.buffers.push(this.slice(start, end)); this.offset = end; } pushSlice(end: number) { if (end !== this.offset) { this.buffers.push(this.slice(this.offset, end)); this.offset = end; } } closeParenSpan(start: number) { let stackPosition = -1; for (let i = this.stack.length - 1; i >= 0; i--) { const span = this.stack[i]; if (span[0] === '(') { stackPosition = i; break; } if (span[0] !== 'spoiler') break; } if (stackPosition === -1) return false; this.pushSlice(start); while (this.stack.length > stackPosition) this.popSpan(start); this.offset = start; return true; } /** * Attempt to close a span. */ closeSpan(spanType: SpanType, start: number, end: number) { // loop backwards let stackPosition = -1; for (let i = this.stack.length - 1; i >= 0; i--) { const span = this.stack[i]; if (span[0] === spanType) { stackPosition = i; break; } } if (stackPosition === -1) return false; this.pushSlice(start); while (this.stack.length > stackPosition + 1) this.popSpan(start); const span = this.stack.pop()!; const startIndex = span[1]; let tagName = ''; let attrs = ''; switch (spanType) { case '_': tagName = 'i'; break; case '*': tagName = 'b'; break; case '~': tagName = 's'; break; case '^': tagName = 'sup'; break; case '\\': tagName = 'sub'; break; case '|': tagName = 'span'; attrs = ' class="spoiler"'; break; } if (tagName) { this.buffers[startIndex] = `<${tagName}${attrs}>`; this.buffers.push(``); this.offset = end; } return true; } /** * Ends a span without an ending symbol. For most spans, this means * they don't take effect, but certain spans like spoiler tags don't * require ending symbols. */ popSpan(end: number) { const span = this.stack.pop(); if (!span) return false; this.pushSlice(end); switch (span[0]) { case 'spoiler': this.buffers.push(``); this.buffers[span[1]] = ``; break; case '>': this.buffers.push(``); this.buffers[span[1]] = ``; break; default: // do nothing break; } return true; } popAllSpans(end: number) { while (this.stack.length) this.popSpan(end); this.pushSlice(end); } toUriComponent(html: string) { const component = html.replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, '\'') .replace(/&/g, '&'); return encodeURIComponent(component); } runLookahead(spanType: SpanType, start: number) { switch (spanType) { case '`': { let delimLength = 0; let i = start; while (this.at(i) === '`') { delimLength++; i++; } let curDelimLength = 0; while (i < this.str.length) { const char = this.at(i); if (char === '\n') break; if (char === '`') { curDelimLength++; } else { if (curDelimLength === delimLength) break; curDelimLength = 0; } i++; } if (curDelimLength !== delimLength) return false; // matching delims found this.pushSlice(start); this.buffers.push(``); let innerStart = start + delimLength; let innerEnd = i - delimLength; if (innerStart + 1 >= innerEnd) { // no special whitespace handling } else if (this.at(innerStart) === ' ' && this.at(innerEnd - 1) === ' ') { innerStart++; // strip starting and ending space innerEnd--; } else if (this.at(innerStart) === ' ' && this.at(innerStart + 1) === '`') { innerStart++; // strip starting space } else if (this.at(innerEnd - 1) === ' ' && this.at(innerEnd - 2) === '`') { innerEnd--; // strip ending space } this.buffers.push(this.slice(innerStart, innerEnd)); this.buffers.push(``); this.offset = i; } return true; case '[': { if (this.slice(start, start + 2) !== '[[') return false; let i = start + 2; let colonPos = -1; // `:` let anglePos = -1; // `<` while (i < this.str.length) { const char = this.at(i); if (char === ']' || char === '\n') break; if (char === ':' && colonPos < 0) colonPos = i; if (char === '&' && this.slice(i, i + 4) === '<') anglePos = i; i++; } if (this.slice(i, i + 2) !== ']]') return false; let termEnd = i; let uri = ''; if (anglePos >= 0 && this.slice(i - 4, i) === '>') { // `>` uri = this.slice(anglePos + 4, i - 4); termEnd = anglePos; if (this.at(termEnd - 1) === ' ') termEnd--; uri = encodeURI(uri.replace(/^([a-z]*[^a-z:])/g, 'http://$1')); } let term = this.slice(start + 2, termEnd).replace(/<\/?a(?: [^>]+)?>/g, ''); if (uri && !this.isTrusted) { const shortUri = uri.replace(/^https?:\/\//, '').replace(/^www\./, '').replace(/\/$/, ''); term += ` <${shortUri}>`; uri += '" rel="noopener'; } if (colonPos > 0) { const key = this.slice(start + 2, colonPos).toLowerCase(); switch (key) { case 'w': case 'wiki': term = term.slice(term.charAt(key.length + 1) === ' ' ? key.length + 2 : key.length + 1); uri = `//en.wikipedia.org/w/index.php?title=Special:Search&search=${this.toUriComponent(term)}`; term = `wiki: ${term}`; break; case 'pokemon': case 'item': case 'type': case 'category': term = term.slice(term.charAt(key.length + 1) === ' ' ? key.length + 2 : key.length + 1); let display = ''; if (this.isTrusted) { display = ``; } else { display = `[${term}]`; } let dir = key; if (key === 'item') dir += 's'; if (key === 'category') dir = 'categories' as 'category'; uri = `//dex.pokemonshowdown.com/${dir}/${toID(term)}`; term = display; } } if (!uri) { uri = `//www.google.com/search?ie=UTF-8&btnI&q=${this.toUriComponent(term)}`; } this.pushSlice(start); this.buffers.push(`${term}`); this.offset = i + 2; } return true; case '<': { if (this.slice(start, start + 8) !== '<<') return false; // << let i = start + 8; while (/[a-z0-9-]/.test(this.at(i))) i++; if (this.slice(i, i + 8) !== '>>') return false; // >> this.pushSlice(start); const roomid = this.slice(start + 8, i); this.buffers.push(`«${roomid}»`); this.offset = i + 8; } return true; case 'a': { let i = start + 1; while (this.at(i) !== '/' || this.at(i + 1) !== 'a' || this.at(i + 2) !== '>') i++; // i += 3; this.pushSlice(i); } return true; } return false; } get() { let beginningOfLine = this.offset; // main loop! i tracks our position for (let i = beginningOfLine; i < this.str.length; i++) { const char = this.at(i); switch (char) { case '_': case '*': case '~': case '^': case '\\': case '|': if (this.at(i + 1) === char && this.at(i + 2) !== char) { if (!(this.at(i - 1) !== ' ' && this.closeSpan(char, i, i + 2))) { if (this.at(i + 2) !== ' ') this.pushSpan(char, i, i + 2); } if (i < this.offset) { i = this.offset - 1; break; } } while (this.at(i + 1) === char) i++; break; case '(': this.stack.push(['(', -1]); break; case ')': this.closeParenSpan(i); if (i < this.offset) { i = this.offset - 1; break; } break; case '`': if (this.at(i + 1) === '`') this.runLookahead('`', i); if (i < this.offset) { i = this.offset - 1; break; } while (this.at(i + 1) === '`') i++; break; case '[': this.runLookahead('[', i); if (i < this.offset) { i = this.offset - 1; break; } while (this.at(i + 1) === '[') i++; break; case ':': if (i < 7) break; if (this.slice(i - 7, i + 1).toLowerCase() === 'spoiler:' || this.slice(i - 8, i + 1).toLowerCase() === 'spoilers:') { if (this.at(i + 1) === ' ') i++; this.pushSpan('spoiler', i + 1, i + 1); } break; case '&': // escaped '<' or '>' if (i === beginningOfLine && this.slice(i, i + 4) === '>') { if (!"._/=:;".includes(this.at(i + 4)) && !['w<', 'w>'].includes(this.slice(i + 4, i + 9))) { this.pushSpan('>', i, i); } } else { this.runLookahead('<', i); } if (i < this.offset) { i = this.offset - 1; break; } while (this.slice(i + 1, i + 5) === 'lt;&') i += 4; break; case '<': // guaranteed to be `); this.offset++; } beginningOfLine = i + 1; break; } } this.popAllSpans(this.str.length); return this.buffers.join(''); } } /** * Takes a string and converts it to HTML by replacing standard chat formatting with the appropriate HTML tags. */ export function formatText(str: string, isTrusted = false, replaceLinebreaks = false) { return new TextFormatter(str, isTrusted, replaceLinebreaks).get(); } /** * Takes a string and strips all standard chat formatting except greentext from it, the text of a link is kept. */ export function stripFormatting(str: string) { // Doesn't match > meme arrows because the angle bracket appears in the chat still. str = str.replace(/\*\*([^\s*]+)\*\*|__([^\s_]+)__|~~([^\s~]+)~~|``([^\s`]+)``|\^\^([^\s^]+)\^\^|\\([^\s\\]+)\\/g, (match, $1, $2, $3, $4, $5, $6) => $1 || $2 || $3 || $4 || $5 || $6); // Remove all of the link expect for the text in [[text]] return str.replace(/\[\[(?:([^<]*)\s*<[^>]+>|([^\]]+))\]\]/g, (match, $1, $2) => $1 || $2 || ''); }