/* ansi_up.js * author : Dru Nelson * license : MIT * http://github.com/drudru/ansi_up */ // // INTERFACES // /* eslint-disable */ interface AU_Color { rgb: number[]; class_name: string; } // Represents the output of process_ansi(): a snapshot of the AnsiUp state machine // at a given point in time, which wraps a fragment of text. This would allow deferred // processing of text fragments and colors, if ever needed. interface TextWithAttr { fg: AU_Color; bg: AU_Color; bold: boolean; italic: boolean; underline: boolean; text: string; } // Used internally when breaking up the raw text into packets enum PacketKind { EOS, Text, Incomplete, // An Incomplete ESC sequence ESC, // A single ESC char - random Unknown, // A valid CSI but not an SGR code SGR, // Select Graphic Rendition OSCURL // Operating System Command } interface TextPacket { kind: PacketKind; text: string; url: string; } // // MAIN CLASS // export default class AnsiUp { VERSION = '5.0.1'; // // *** SEE README ON GITHUB FOR PUBLIC API *** // // 256 Colors Palette // CSS RGB strings - ex. "255, 255, 255" private ansi_colors: AU_Color[][]; private palette_256: AU_Color[]; private fg: AU_Color; private bg: AU_Color; private bold: boolean; private italic: boolean; private underline: boolean; private _use_classes: boolean; private _csi_regex: RegExp; private _osc_st: RegExp; private _osc_regex: RegExp; private _url_whitelist: any = {}; private _buffer: string; constructor() { // All construction occurs here this.setup_palettes(); this.resetStyles(); } set use_classes(arg: boolean) { this._use_classes = arg; } get use_classes(): boolean { return this._use_classes; } set url_whitelist(arg: {}) { this._url_whitelist = arg; } get url_whitelist(): {} { return this._url_whitelist; } private setup_palettes(): void { this.ansi_colors = [ // Normal colors [ { rgb: [0, 0, 0], class_name: 'ansi-black' }, { rgb: [187, 0, 0], class_name: 'ansi-red' }, { rgb: [0, 187, 0], class_name: 'ansi-green' }, { rgb: [187, 187, 0], class_name: 'ansi-yellow' }, { rgb: [0, 0, 187], class_name: 'ansi-blue' }, { rgb: [187, 0, 187], class_name: 'ansi-magenta' }, { rgb: [0, 187, 187], class_name: 'ansi-cyan' }, { rgb: [255, 255, 255], class_name: 'ansi-white' } ], // Bright colors [ { rgb: [85, 85, 85], class_name: 'ansi-bright-black' }, { rgb: [255, 85, 85], class_name: 'ansi-bright-red' }, { rgb: [0, 255, 0], class_name: 'ansi-bright-green' }, { rgb: [255, 255, 85], class_name: 'ansi-bright-yellow' }, { rgb: [85, 85, 255], class_name: 'ansi-bright-blue' }, { rgb: [255, 85, 255], class_name: 'ansi-bright-magenta' }, { rgb: [85, 255, 255], class_name: 'ansi-bright-cyan' }, { rgb: [255, 255, 255], class_name: 'ansi-bright-white' } ] ]; this.palette_256 = []; // Index 0..15 : Ansi-Colors this.ansi_colors.forEach(palette => { palette.forEach(rec => { this.palette_256.push(rec); }); }); // Index 16..231 : RGB 6x6x6 // https://gist.github.com/jasonm23/2868981#file-xterm-256color-yaml const levels = [0, 95, 135, 175, 215, 255]; for (let r = 0; r < 6; ++r) { for (let g = 0; g < 6; ++g) { for (let b = 0; b < 6; ++b) { const col = { rgb: [levels[r], levels[g], levels[b]], class_name: 'truecolor' }; this.palette_256.push(col); } } } // Index 232..255 : Grayscale let grey_level = 8; for (let i = 0; i < 24; ++i, grey_level += 10) { const gry = { rgb: [grey_level, grey_level, grey_level], class_name: 'truecolor' }; this.palette_256.push(gry); } } private escape_txt_for_html(txt: string): string { return txt.replace(/[&<>"']/gm, str => { if (str === '&') { return '&'; } if (str === '<') { return '<'; } if (str === '>') { return '>'; } if (str === '"') { return '"'; } if (str === "'") { return '''; } }); } private append_buffer(txt: string) { const str = this._buffer + txt; this._buffer = str; } private get_next_packet(): TextPacket { const pkt = { kind: PacketKind.EOS, text: '', url: '' }; const len = this._buffer.length; if (len == 0) { return pkt; } const pos = this._buffer.indexOf('\x1B'); // The most common case, no ESC codes if (pos == -1) { pkt.kind = PacketKind.Text; pkt.text = this._buffer; this._buffer = ''; return pkt; } if (pos > 0) { pkt.kind = PacketKind.Text; pkt.text = this._buffer.slice(0, pos); this._buffer = this._buffer.slice(pos); return pkt; } // NOW WE HANDLE ESCAPES if (pos == 0) { if (len == 1) { // Lone ESC in Buffer, We don't know yet pkt.kind = PacketKind.Incomplete; return pkt; } const next_char = this._buffer.charAt(1); // We treat this as a single ESC // Which effecitvely shows if (next_char != '[' && next_char != ']') { // DeMorgan pkt.kind = PacketKind.ESC; pkt.text = this._buffer.slice(0, 1); this._buffer = this._buffer.slice(1); return pkt; } // OK is this an SGR or OSC that we handle // SGR CHECK if (next_char == '[') { // We do this regex initialization here so // we can keep the regex close to its use (Readability) // All ansi codes are typically in the following format. // We parse it and focus specifically on the // graphics commands (SGR) // // CONTROL-SEQUENCE-INTRODUCER CSI (ESC, '[') // PRIVATE-MODE-CHAR (!, <, >, ?) // Numeric parameters separated by semicolons ('0' - '9', ';') // Intermediate-modifiers (0x20 - 0x2f) // COMMAND-CHAR (0x40 - 0x7e) // if (!this._csi_regex) { this._csi_regex = rgx` ^ # beginning of line # # First attempt (?: # legal sequence \x1b\[ # CSI ([\x3c-\x3f]?) # private-mode char ([\d;]*) # any digits or semicolons ([\x20-\x2f]? # an intermediate modifier [\x40-\x7e]) # the command ) | # alternate (second attempt) (?: # illegal sequence \x1b\[ # CSI [\x20-\x7e]* # anything legal ([\x00-\x1f:]) # anything illegal ) `; } const match = this._buffer.match(this._csi_regex); // This match is guaranteed to terminate (even on // invalid input). The key is to match on legal and // illegal sequences. // The first alternate matches everything legal and // the second matches everything illegal. // // If it doesn't match, then we have not received // either the full sequence or an illegal sequence. // If it does match, the presence of field 4 tells // us whether it was legal or illegal. if (match === null) { pkt.kind = PacketKind.Incomplete; return pkt; } // match is an array // 0 - total match // 1 - private mode chars group // 2 - digits and semicolons group // 3 - command // 4 - illegal char if (match[4]) { // Illegal sequence, just remove the ESC pkt.kind = PacketKind.ESC; pkt.text = this._buffer.slice(0, 1); this._buffer = this._buffer.slice(1); return pkt; } // If not a valid SGR, we don't handle if (match[1] != '' || match[3] != 'm') { pkt.kind = PacketKind.Unknown; } else { pkt.kind = PacketKind.SGR; } pkt.text = match[2]; // Just the parameters var rpos = match[0].length; this._buffer = this._buffer.slice(rpos); return pkt; } // OSC CHECK if (next_char == ']') { if (len < 4) { pkt.kind = PacketKind.Incomplete; return pkt; } if (this._buffer.charAt(2) != '8' || this._buffer.charAt(3) != ';') { // This is not a match, so we'll just treat it as ESC pkt.kind = PacketKind.ESC; pkt.text = this._buffer.slice(0, 1); this._buffer = this._buffer.slice(1); return pkt; } // We do this regex initialization here so // we can keep the regex close to its use (Readability) // Matching a Hyperlink OSC with a regex is difficult // because Javascript's regex engine doesn't support // 'partial match' support. // // Therefore, we require the system to match the // string-terminator(ST) before attempting a match. // Once we find it, we attempt the Hyperlink-Begin // match. // If that goes ok, we scan forward for the next // ST. // Finally, we try to match it all and return // the sequence. // Also, it is important to note that we consider // certain control characters as an invalidation of // the entire sequence. // We do regex initializations here so // we can keep the regex close to its use (Readability) // STRING-TERMINATOR // This is likely to terminate in most scenarios // because it will terminate on a newline if (!this._osc_st) { this._osc_st = rgxG` (?: # legal sequence (\x1b\\) # ESC \ | # alternate (\x07) # BEL (what xterm did) ) | # alternate (second attempt) ( # illegal sequence [\x00-\x06] # anything illegal | # alternate [\x08-\x1a] # anything illegal | # alternate [\x1c-\x1f] # anything illegal ) `; } // VERY IMPORTANT // We do a stateful regex match with exec. // If the regex is global, and it used with 'exec', // then it will search starting at the 'lastIndex' // If it matches, the regex can be used again to // find the next match. this._osc_st.lastIndex = 0; { const match = this._osc_st.exec(this._buffer); if (match === null) { pkt.kind = PacketKind.Incomplete; return pkt; } // If an illegal character was found, bail on the match if (match[3]) { // Illegal sequence, just remove the ESC pkt.kind = PacketKind.ESC; pkt.text = this._buffer.slice(0, 1); this._buffer = this._buffer.slice(1); return pkt; } } // OK - we might have the prefix and URI // Lets start our search for the next ST // past this index { const match = this._osc_st.exec(this._buffer); if (match === null) { pkt.kind = PacketKind.Incomplete; return pkt; } // If an illegal character was found, bail on the match if (match[3]) { // Illegal sequence, just remove the ESC pkt.kind = PacketKind.ESC; pkt.text = this._buffer.slice(0, 1); this._buffer = this._buffer.slice(1); return pkt; } } // OK, at this point we should have a FULL match! // // Lets try to match that now if (!this._osc_regex) { this._osc_regex = rgx` ^ # beginning of line # \x1b\]8; # OSC Hyperlink [\x20-\x3a\x3c-\x7e]* # params (excluding ;) ; # end of params ([\x21-\x7e]{0,512}) # URL capture (?: # ST (?:\x1b\\) # ESC \ | # alternate (?:\x07) # BEL (what xterm did) ) ([\x20-\x7e]+) # TEXT capture \x1b\]8;; # OSC Hyperlink End (?: # ST (?:\x1b\\) # ESC \ | # alternate (?:\x07) # BEL (what xterm did) ) `; } const match = this._buffer.match(this._osc_regex); if (match === null) { // Illegal sequence, just remove the ESC pkt.kind = PacketKind.ESC; pkt.text = this._buffer.slice(0, 1); this._buffer = this._buffer.slice(1); return pkt; } // match is an array // 0 - total match // 1 - URL // 2 - Text // If a valid SGR pkt.kind = PacketKind.OSCURL; pkt.url = match[1]; pkt.text = match[2]; var rpos = match[0].length; this._buffer = this._buffer.slice(rpos); return pkt; } } } ansi_to_html(txt: string): string { this.append_buffer(txt); const blocks: string[] = []; while (true) { const packet = this.get_next_packet(); if (packet.kind == PacketKind.EOS || packet.kind == PacketKind.Incomplete) { break; } // Drop single ESC or Unknown CSI if (packet.kind == PacketKind.ESC || packet.kind == PacketKind.Unknown) { continue; } if (packet.kind == PacketKind.Text) { blocks.push(this.transform_to_html(this.with_state(packet))); } else if (packet.kind == PacketKind.SGR) { this.process_ansi(packet); } else if (packet.kind == PacketKind.OSCURL) { blocks.push(this.process_hyperlink(packet)); } } return blocks.join(''); } resetStyles() { this._use_classes = false; this.bold = false; this.italic = false; this.underline = false; this.fg = this.bg = null; this._buffer = ''; this._url_whitelist = { http: 1, https: 1 }; } private with_state(pkt: TextPacket): TextWithAttr { return { bold: this.bold, italic: this.italic, underline: this.underline, fg: this.fg, bg: this.bg, text: pkt.text }; } private process_ansi(pkt: TextPacket) { // Ok - we have a valid "SGR" (Select Graphic Rendition) const sgr_cmds = pkt.text.split(';'); // Each of these params affects the SGR state // Why do we shift through the array instead of a forEach?? // ... because some commands consume the params that follow ! while (sgr_cmds.length > 0) { const sgr_cmd_str = sgr_cmds.shift(); const num = parseInt(sgr_cmd_str, 10); if (isNaN(num) || num === 0) { this.fg = this.bg = null; this.bold = false; this.italic = false; this.underline = false; } else if (num === 1) { this.bold = true; } else if (num === 3) { this.italic = true; } else if (num === 4) { this.underline = true; } else if (num === 22) { this.bold = false; } else if (num === 23) { this.italic = false; } else if (num === 24) { this.underline = false; } else if (num === 39) { this.fg = null; } else if (num === 49) { this.bg = null; } else if (num >= 30 && num < 38) { this.fg = this.ansi_colors[0][num - 30]; } else if (num >= 40 && num < 48) { this.bg = this.ansi_colors[0][num - 40]; } else if (num >= 90 && num < 98) { this.fg = this.ansi_colors[1][num - 90]; } else if (num >= 100 && num < 108) { this.bg = this.ansi_colors[1][num - 100]; } else if (num === 38 || num === 48) { // extended set foreground/background color // validate that param exists if (sgr_cmds.length > 0) { // extend color (38=fg, 48=bg) const is_foreground = num === 38; const mode_cmd = sgr_cmds.shift(); // MODE '5' - 256 color palette if (mode_cmd === '5' && sgr_cmds.length > 0) { const palette_index = parseInt(sgr_cmds.shift(), 10); if (palette_index >= 0 && palette_index <= 255) { if (is_foreground) { this.fg = this.palette_256[palette_index]; } else { this.bg = this.palette_256[palette_index]; } } } // MODE '2' - True Color if (mode_cmd === '2' && sgr_cmds.length > 2) { const r = parseInt(sgr_cmds.shift(), 10); const g = parseInt(sgr_cmds.shift(), 10); const b = parseInt(sgr_cmds.shift(), 10); if (r >= 0 && r <= 255 && g >= 0 && g <= 255 && b >= 0 && b <= 255) { const c = { rgb: [r, g, b], class_name: 'truecolor' }; if (is_foreground) { this.fg = c; } else { this.bg = c; } } } } } } } private transform_to_html(fragment: TextWithAttr): string { const txt = fragment.text; if (txt.length === 0) { return txt; } // txt = this.escape_txt_for_html(txt); // If colors not set, default style is used if (!fragment.bold && !fragment.italic && !fragment.underline && fragment.fg === null && fragment.bg === null) { return txt; } const styles: string[] = []; const classes: string[] = []; const fg = fragment.fg; const bg = fragment.bg; // Note on bold: https://stackoverflow.com/questions/6737005/what-are-some-advantages-to-using-span-style-font-weightbold-rather-than-b?rq=1 if (fragment.bold) { styles.push('font-weight:bold'); } if (fragment.italic) { styles.push('font-style:italic'); } if (fragment.underline) { styles.push('text-decoration:underline'); } if (!this._use_classes) { // USE INLINE STYLES if (fg) { styles.push(`color:rgb(${fg.rgb.join(',')})`); } if (bg) { styles.push(`background-color:rgb(${bg.rgb})`); } } else { // USE CLASSES if (fg) { if (fg.class_name !== 'truecolor') { classes.push(`${fg.class_name}-fg`); } else { styles.push(`color:rgb(${fg.rgb.join(',')})`); } } if (bg) { if (bg.class_name !== 'truecolor') { classes.push(`${bg.class_name}-bg`); } else { styles.push(`background-color:rgb(${bg.rgb.join(',')})`); } } } let class_string = ''; let style_string = ''; if (classes.length) { class_string = ` class="${classes.join(' ')}"`; } if (styles.length) { style_string = ` style="${styles.join(';')}"`; } return `${txt}`; } private process_hyperlink(pkt: TextPacket): string { // Check URL scheme const parts = pkt.url.split(':'); if (parts.length < 1) { return ''; } if (!this._url_whitelist[parts[0]]) { return ''; } const result = `${this.escape_txt_for_html(pkt.text)}`; return result; } } // // PRIVATE FUNCTIONS // // ES5 template string transformer function rgx(tmplObj: any, ...subst: any) { // Use the 'raw' value so we don't have to double backslash in a template string const regexText: string = tmplObj.raw[0]; // Remove white-space and comments const wsrgx = /^\s+|\s+\n|\s*#[\s\S]*?\n|\n/gm; const txt2 = regexText.replace(wsrgx, ''); return new RegExp(txt2); } // ES5 template string transformer // Multi-Line On function rgxG(tmplObj: any, ...subst: any) { // Use the 'raw' value so we don't have to double backslash in a template string const regexText: string = tmplObj.raw[0]; // Remove white-space and comments const wsrgx = /^\s+|\s+\n|\s*#[\s\S]*?\n|\n/gm; const txt2 = regexText.replace(wsrgx, ''); return new RegExp(txt2, 'g'); }