import { expect } from 'chai'; import { RegExpForeignCodeExtractor, getIndexOfCaptureGroup } from './regexp'; let R_CELL_MAGIC_EXISTS = `%%R some text `; let PYTHON_CELL_MAGIC_WITH_H = `%%python h`; let NO_CELL_MAGIC = `%R some text %%R some text `; let R_LINE_MAGICS = `%R df = data.frame() print("df created") %R ggplot(df) print("plotted") `; let HTML_IN_PYTHON = ` x = """ important link """; print(x)`; describe('getIndexOfCaptureGroup', () => { it('extracts index of a captured group', () => { // tests for https://github.com/jupyter-lsp/jupyterlab-lsp/issues/559 let result = getIndexOfCaptureGroup( new RegExp('^%%(python|python2|python3|pypy)( .*?)?\\n([^]*)'), '%%python\nh', 'h' ); expect(result).to.be.equal(9); }); }); describe('RegExpForeignCodeExtractor', () => { let r_cell_extractor = new RegExpForeignCodeExtractor({ language: 'R', pattern: '^%%R( .*?)?\n([^]*)', foreign_capture_groups: [2], keep_in_host: true, is_standalone: false, file_extension: 'R' }); let r_line_extractor = new RegExpForeignCodeExtractor({ language: 'R', pattern: '(^|\n)%R (.*)\n?', foreign_capture_groups: [2], keep_in_host: true, is_standalone: false, file_extension: 'R' }); let python_cell_extractor = new RegExpForeignCodeExtractor({ language: 'python', pattern: '^%%(python|python2|python3|pypy)( .*?)?\\n([^]*)', foreign_capture_groups: [3], keep_in_host: true, is_standalone: true, file_extension: 'py' }); describe('#has_foreign_code()', () => { it('detects cell magics', () => { let result = r_cell_extractor.has_foreign_code(R_CELL_MAGIC_EXISTS); expect(result).to.equal(true); result = r_cell_extractor.has_foreign_code(R_LINE_MAGICS); expect(result).to.equal(false); result = r_cell_extractor.has_foreign_code(NO_CELL_MAGIC); expect(result).to.equal(false); }); it('is not stateful', () => { // stateful implementation of regular expressions in JS can easily lead to // an error manifesting it two consecutive checks giving different results, // as the last index was moved in between: let result = r_cell_extractor.has_foreign_code(R_CELL_MAGIC_EXISTS); expect(result).to.equal(true); result = r_cell_extractor.has_foreign_code(R_CELL_MAGIC_EXISTS); expect(result).to.equal(true); }); it('detects line magics', () => { let result = r_line_extractor.has_foreign_code(R_LINE_MAGICS); expect(result).to.equal(true); result = r_line_extractor.has_foreign_code(R_CELL_MAGIC_EXISTS); expect(result).to.equal(false); }); }); describe('#extract_foreign_code()', () => { it('should correctly return the range', () => { let results = python_cell_extractor.extract_foreign_code( PYTHON_CELL_MAGIC_WITH_H ); expect(results.length).to.equal(1); let result = results[0]; // test against https://github.com/jupyter-lsp/jupyterlab-lsp/issues/559 expect(result.host_code).to.equal(PYTHON_CELL_MAGIC_WITH_H); expect(result.foreign_code).to.equal('h'); expect(result.range!.start.line).to.equal(1); expect(result.range!.start.column).to.equal(0); expect(result.range!.end.line).to.equal(1); expect(result.range!.end.column).to.equal(1); }); it('should work with non-line magic and non-cell magic code snippets as well', () => { // Note: in the real application, one should NOT use regular expressions for HTML extraction let html_extractor = new RegExpForeignCodeExtractor({ language: 'HTML', pattern: '(<(.*?)( .*?)?>([^]*?))', foreign_capture_groups: [1], keep_in_host: false, is_standalone: false, file_extension: 'html' }); let results = html_extractor.extract_foreign_code(HTML_IN_PYTHON); expect(results.length).to.equal(2); let result = results[0]; // TODO: is tolerating the new line added here ok? expect(result.host_code).to.equal('\nx = """\n'); expect(result.foreign_code).to.equal( '\nimportant link\n' ); expect(result.range!.start.line).to.equal(1); expect(result.range!.start.column).to.equal(7); expect(result.range!.end.line).to.equal(3); expect(result.range!.end.column).to.equal(4); let last_bit = results[1]; expect(last_bit.host_code).to.equal('""";\nprint(x)'); }); it('should extract cell magics and keep in host', () => { let results = r_cell_extractor.extract_foreign_code(R_CELL_MAGIC_EXISTS); expect(results.length).to.equal(1); let result = results[0]; expect(result.host_code).to.equal(R_CELL_MAGIC_EXISTS); expect(result.foreign_code).to.equal('some text\n'); expect(result.range!.start.line).to.equal(1); expect(result.range!.start.column).to.equal(0); }); it('should extract and remove from host', () => { let extractor = new RegExpForeignCodeExtractor({ language: 'R', pattern: '^%%R( .*?)?\n([^]*)', foreign_capture_groups: [2], keep_in_host: false, is_standalone: false, file_extension: 'R' }); let results = extractor.extract_foreign_code(R_CELL_MAGIC_EXISTS); expect(results.length).to.equal(1); let result = results[0]; expect(result.foreign_code).to.equal('some text\n'); expect(result.host_code).to.equal(''); }); it('should extract multiple line magics deleting them from host', () => { let r_line_extractor = new RegExpForeignCodeExtractor({ language: 'R', pattern: '(^|\n)%R (.*)\n?', foreign_capture_groups: [2], keep_in_host: false, is_standalone: false, file_extension: 'R' }); let results = r_line_extractor.extract_foreign_code(R_LINE_MAGICS); // 2 line magics to be extracted + the unprocessed host code expect(results.length).to.eq(3); let first_magic = results[0]; expect(first_magic.foreign_code).to.equal('df = data.frame()'); expect(first_magic.host_code).to.equal(''); let second_magic = results[1]; expect(second_magic.foreign_code).to.equal('ggplot(df)'); expect(second_magic.host_code).to.equal('print("df created")\n'); let final_bit = results[2]; expect(final_bit.foreign_code).to.equal(null); expect(final_bit.host_code).to.equal('print("plotted")\n'); }); it('should extract multiple line magics preserving them in host', () => { let results = r_line_extractor.extract_foreign_code(R_LINE_MAGICS); // 2 line magics to be extracted + the unprocessed host code expect(results.length).to.eq(3); let first_magic = results[0]; expect(first_magic.foreign_code).to.equal('df = data.frame()'); expect(first_magic.host_code).to.equal('%R df = data.frame()\n'); expect(first_magic.range!.end.line).to.equal(0); expect(first_magic.range!.end.column).to.equal(20); let second_magic = results[1]; expect(second_magic.foreign_code).to.equal('ggplot(df)'); expect(second_magic.host_code).to.equal( 'print("df created")\n%R ggplot(df)\n' ); let final_bit = results[2]; expect(final_bit.foreign_code).to.equal(null); expect(final_bit.host_code).to.equal('print("plotted")\n'); }); it('should extract single line magic which does not end with a blank line', () => { let results = r_line_extractor.extract_foreign_code('%R test'); expect(results.length).to.eq(1); let result = results[0]; expect(result.foreign_code).to.equal('test'); }); it('should not extract magic-like text from the middle of the cell', () => { let results = r_cell_extractor.extract_foreign_code(NO_CELL_MAGIC); expect(results.length).to.eq(1); let result = results[0]; expect(result.foreign_code).to.equal(null); expect(result.host_code).to.equal(NO_CELL_MAGIC); expect(result.range).to.equal(null); }); }); });