/* * Copyright (c) Minh Loi. * * This file is part of Ulangi which is released under GPL v3.0. * See LICENSE or go to https://www.gnu.org/licenses/gpl-3.0.txt */ import { assertExists } from '@ulangi/assert'; import { WiktionaryExample } from '../interfaces/WiktionaryExample'; import { WiktionaryPage } from '../interfaces/WiktionaryPage'; import { WiktionaryPageConverter } from './WiktionaryPageConverter'; describe('WiktionaryPageConverter', (): void => { it('extract a definition in etymology-wordclass section', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '犬', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'etymology', sectionName: 'etymology', etymology: '', children: [ { kind: 'wordClass', wordClass: 'Noun', children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog]]', children: [], }, ], categories: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.definitions[0].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog]]' ); }); it('extract a definition in word class section', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '犬', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog]]', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.definitions[0].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog]]' ); }); it('extract definitions in pronunciation section', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '犬', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'pronunciation', pronunciation: '', children: [ { kind: 'definitions', children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog]]', children: [], }, ], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.definitions[0].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog]]' ); }); it('extract definitions in language section', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '犬', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'definitions', children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog]]', children: [ // eslint-disable-next-line { source: 'wiktionary', kind: 'example', } as WiktionaryExample, ], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.definitions[0].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog]]' ); }); it('extract categories in language section', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '犬', languages: [ { languageName: 'Chinese', categories: ['Category 1', 'Category 2'], children: [ { kind: 'definitions', children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog]]', children: [ // eslint-disable-next-line { source: 'wiktionary', kind: 'example', } as WiktionaryExample, ], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.categories).toEqual(['Category 1', 'Category 2']); }); it('extract categories in word class section', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '犬', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', categories: ['Category 1', 'Category 2'], children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog 1]]', children: [], }, ], }, { kind: 'wordClass', wordClass: 'Noun', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog 2]]', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.categories).toEqual(['Category 1', 'Category 2']); }); it('extract multiple definitions in same word class section', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '犬', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog 1]]', children: [], }, ], }, { kind: 'wordClass', wordClass: 'Noun', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog 2]]', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.definitions[0].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog 1]]' ); expect(dictionaryEntry.definitions[1].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog 2]]' ); }); it('extract two-level definitions (definition contains sub-definitions) in language section', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '犬', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'definitions', children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}}', children: [ { kind: 'definition', source: 'wiktionary', meaning: '[[dog 1]]', children: [], }, { kind: 'definition', source: 'wiktionary', meaning: '[[dog 2]]', children: [], }, ], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.definitions[0].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}}: [[dog 1]]' ); expect(dictionaryEntry.definitions[1].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}}: [[dog 2]]' ); }); it('extract many different levels of definition in language section', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '犬', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'definitions', children: [ { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}}', children: [ { kind: 'definition', source: 'wiktionary', meaning: '[[dog 1]]', children: [], }, { kind: 'definition', source: 'wiktionary', meaning: '[[dog 2]]', children: [], }, ], }, { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog 3]]', children: [], }, { kind: 'definition', source: 'wiktionary', meaning: '{{lb|zh|archaic|Min Dong|dialectal Wu}}', children: [ { kind: 'definition', source: 'wiktionary', meaning: '[[dog 4]]', children: [], }, ], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.definitions[0].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}}: [[dog 1]]' ); expect(dictionaryEntry.definitions[1].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}}: [[dog 2]]' ); expect(dictionaryEntry.definitions[2].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}} [[dog 3]]' ); expect(dictionaryEntry.definitions[3].meaning).toEqual( '{{lb|zh|archaic|Min Dong|dialectal Wu}}: [[dog 4]]' ); }); it('extract multiple pronunciations', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'test', languages: [ { languageName: 'English', categories: [], children: [ { kind: 'pronunciation', pronunciation: [ '(UK, US) IPA(key): /ɪnˌsaɪ.kləˈpi(ː).di.ə/', 'IPA(key): /θɪˈsɔːɹəs/', '(Sweden) IPA(key): /dɔm/, (formal) IPA(key): /deː/, (dialectal) IPA(key): /diː/, IPA(key): /dɪ/', ].join('\n'), children: [], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.ipa).toEqual([ '(UK, US) /ɪnˌsaɪ.kləˈpi(ː).di.ə/', '/θɪˈsɔːɹəs/', '(Sweden) /dɔm/, (formal) /deː/, (dialectal) /diː/, /dɪ/', ]); }); it('extract a single pronunciation in multiple lines', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'test', languages: [ { languageName: 'English', categories: [], children: [ { kind: 'pronunciation', pronunciation: [ '(Hà Nội) IPA(key): [ŋu˧˧ muəj˧˨ʔ]', '(Huế) IPA(key): [ŋʊw˧˧ muj˨˩ʔ]', '(Hồ Chí Minh City) IPA(key): [ŋʊw˧˧ muj˨˩˨]', ].join('\n'), children: [], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.ipa).toEqual([ '(Hà Nội) [ŋu˧˧ muəj˧˨ʔ]', '(Huế) [ŋʊw˧˧ muj˨˩ʔ]', '(Hồ Chí Minh City) [ŋʊw˧˧ muj˨˩˨]', ]); }); it('extract pinyin', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '愛', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'pronunciation', pronunciation: 'Mandarin\n(Standard)\n(Pinyin): ài (ai4)\n(Zhuyin): ㄞˋ\nFile:zh-ài.ogg\n(Chengdu, SP): ngai4\n(Dungan, Cyrillic): нэ (ne, III)\nCantonese\n(Guangzhou, Jyutping): oi3\n(Taishan, Wiktionary): oi1\nGan (Wiktionary): ngai4\nHakka\n(Sixian, PFS): oi\n(Meixian, Guangdong): oi4\n', children: [], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.pinyin).toEqual(['ài']); }); it('extract multiple pinyin', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '青', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'pronunciation', pronunciation: 'Mandarin\n(Pinyin): qīng, jīng (qing1, jing1)\n(Zhuyin): ㄑㄧㄥ, ㄐㄧㄥ\nCantonese (Jyutping): ceng1, cing1\nHakka\n(Sixian, PFS): chhiâng / chhîn / chhiang\n(Meixian, Guangdong): qiang1\nMin Bei (KCR): cháng\nMin Dong (BUC): chăng / chĭng\nMin Nan\n', children: [], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.pinyin).toEqual(['qīng', 'jīng']); }); it('extract zhuyin', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '愛', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'pronunciation', pronunciation: 'Mandarin\n(Standard)\n(Pinyin): ài (ai4)\n(Zhuyin): ㄞˋ\nFile:zh-ài.ogg\n(Chengdu, SP): ngai4\n(Dungan, Cyrillic): нэ (ne, III)\nCantonese\n(Guangzhou, Jyutping): oi3\n(Taishan, Wiktionary): oi1\nGan (Wiktionary): ngai4\nHakka\n(Sixian, PFS): oi\n(Meixian, Guangdong): oi4\n', children: [], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.zhuyin).toEqual(['ㄞˋ']); }); it('extract simplified form from form', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '鱷梨', languages: [ { languageName: 'Chinese', form: 'crocodile; alligator\n\npear\n\n\ntrad. (鱷梨/鰐梨)\n\n鱷/鰐\n\n梨\n\n\nsimp. (鳄梨)\n\n鳄\n\n梨\n(These forms in the hanzi box are uncreated: "鳄梨", "鰐梨".)', categories: [], children: [ { kind: 'pronunciation', pronunciation: 'test', children: [], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.simplified).toEqual(['鳄梨']); }); it('extract simplified from glyph origin', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '鳥', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'glyphOrigin', glyphOrigin: 'trad.\n\n鳥\n\n\nsimp.\n\n鸟\n(This form in the hanzi box is uncreated: "鸟".)', }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.simplified).toEqual(['鸟']); }); it('extract traditional form from form', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '鳄梨', languages: [ { languageName: 'Chinese', categories: [], form: 'For pronunciation and definitions of 鳄梨 – see 鱷梨.(This term, 鳄梨, is the simplified form of 鱷梨.)\n\n\nNotes:\nSimplified Chinese is mainly used in Mainland China and Singapore.\nTraditional Chinese is mainly used in Hong Kong, Macau, and Taiwan.', children: [], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.traditional).toEqual(['鱷梨']); }); it('extract traditional form from etymology', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '苹', languages: [ { languageName: 'Chinese', children: [ { kind: 'etymology', sectionName: 'Etymology 1', etymology: 'For pronunciation and definitions of 苹 – see 蘋.(This character, 苹, is the simplified form of 蘋.)\n\n\nNotes:\nSimplified Chinese is mainly used in Mainland China and Singapore.\nTraditional Chinese is mainly used in Hong Kong, Macau, and Taiwan.', children: [], }, { kind: 'etymology', sectionName: 'Etymology 2', etymology: 'trad.\n\n苹\n\n\nsimp. #\n\n苹', children: [], }, ], categories: [], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.traditional).toEqual(['蘋']); }); it('extract traditional form from glyph origin (1)', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '鸟', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'glyphOrigin', glyphOrigin: 'Simplified from 鳥. Based on its cursive script form. ', }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.traditional).toEqual(['鳥']); }); it('extract traditional form from glyph origin (2)', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '鸟', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'glyphOrigin', glyphOrigin: 'Simplified from 鳥. Based on its cursive script form. ', }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.traditional).toEqual(['鳥']); }); it('extract traditional form from definitions headword', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '鳄梨', languages: [ { languageName: 'Chinese', categories: [], children: [ { kind: 'definitions', headword: 'For pronunciation and definitions of 猫 – see 貓.(This character, 猫, is the simplified and variant form of 貓.)\n\n\nNotes:\nSimplified Chinese is mainly used in Mainland China and Singapore.\nTraditional Chinese is mainly used in Hong Kong, Macau, and Taiwan.', children: [], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]) ); expect(dictionaryEntry.traditional).toEqual(['貓']); }); it('extract reading (Kun’yomi)', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '苺', languages: [ { languageName: 'Japanese', children: [ { kind: 'pronunciation', pronunciation: 'Kun’yomi\n(Tokyo) いちご [ìchígó] (Heiban – [0])[1][2][3]\n (Tokyo) い​ちご [íꜜchìgò] (Atamadaka – [1])[1][2]\nIPA(key): [it͡ɕiɡo̞]\n(Tohoku) IPA(key): [ɨzɨŋo̞], [ɨzɨᵑɡo̞]', children: [], }, ], categories: [], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.reading).toEqual([ '(Kun’yomi) いちご [ìchígó] (Heiban – [0])', '(Kun’yomi) い​ちご [íꜜchìgò] (Atamadaka – [1])', ]); }); it('extract reading (On’yomi)', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '人', languages: [ { languageName: 'Japanese', children: [ { kind: 'pronunciation', pronunciation: 'On’yomi: Goon\n(Tokyo) に​ん [níꜜǹ] (Atamadaka – [1])\n IPA(key): [ɲ̟ĩɴ]', children: [], }, ], categories: [], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.reading).toEqual([ '(On’yomi) に​ん [níꜜǹ] (Atamadaka – [1])', ]); }); it('extract reading (no reading type)', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '人', languages: [ { languageName: 'Japanese', children: [ { kind: 'pronunciation', pronunciation: '(Tokyo) りんご [rìńgó] (Heiban – [0])[2]', children: [], }, ], categories: [], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.reading).toEqual(['りんご [rìńgó] (Heiban – [0])']); }); it('extract hiragana', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '易しい', languages: [ { languageName: 'Japanese', children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: '易しい (-i inflection, hiragana やさしい, rōmaji yasashii)', children: [ { kind: 'definition', source: 'wiktionary', meaning: 'easy', children: [], }, ], categories: ['basic words'], }, ], categories: [], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.hiragana).toEqual(['やさしい']); }); it('extract romaji at the end of headword', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '易しい', languages: [ { languageName: 'Japanese', children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: '易しい (-i inflection, hiragana やさしい, rōmaji yasashii)', children: [ { kind: 'definition', source: 'wiktionary', meaning: 'easy', children: [], }, ], categories: ['basic words'], }, { kind: 'wordClass', wordClass: 'Adjective', headword: '易しい (-i inflection, rōmaji yasashii, hiragana やさしい, )', children: [ { kind: 'definition', source: 'wiktionary', meaning: 'easy', children: [], }, ], categories: ['basic words'], }, ], categories: [], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.romaji).toEqual(['yasashii']); }); it('extract romaji in the middle of headword', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '易しい', languages: [ { languageName: 'Japanese', children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: '易しい (-i inflection, rōmaji yasashii, hiragana やさしい', children: [ { kind: 'definition', source: 'wiktionary', meaning: 'easy', children: [], }, ], categories: ['basic words'], }, ], categories: [], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.romaji).toEqual(['yasashii']); }); it('extract romanization in pronunciations', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: '글피', languages: [ { languageName: 'Korean', categories: [], children: [ { kind: 'pronunciation', pronunciation: "IPA(key)[kɯɭpʰi]\nPhonetic Hangul[글피]\n\n\n\n\nRevised Romanization?\ngeulpi\nRevised Romanization (translit.)?\ngeulpi\nMcCune–Reischauer?\nkŭlp'i\nYale Romanization?\nkulphi", children: [], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.romanization).toEqual(['geulpi']); }); it('extract gender', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'manzana', languages: [ { languageName: 'Spanish', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', headword: 'manzana f (plural manzanas)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'apple', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.gender).toEqual(['feminine']); }); it('extract gender at the end', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'Panda', languages: [ { languageName: 'German', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', headword: 'Panda m', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'panda', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.gender).toEqual(['masculine']); }); it('extract gender with multiple forms', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'nervioso', languages: [ { languageName: 'Spanish', categories: [], children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: 'nervioso (feminine singular nerviosa, masculine plural nerviosos, feminine plural nerviosas)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'nervous', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.gender).toEqual(['masculine']); }); it('extract plural', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'cansada', languages: [ { languageName: 'Spanish', categories: [], children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: 'cansado m (feminine singular cansada, masculine plural cansados, feminine plural cansadas, comparable)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'Feminine singular of adjective cansado', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.plural).toEqual([ '(masculine) cansados', '(feminine) cansadas', ]); }); it('extract plural with gender', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'nervioso', languages: [ { languageName: 'Spanish', categories: [], children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: 'nervioso (feminine singular nerviosa, masculine plural nerviosos, feminine plural nerviosas) ', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'nervous', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.plural).toEqual([ '(masculine) nerviosos', '(feminine) nerviosas', ]); }); it('extract feminine by female', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'Bär', languages: [ { languageName: 'German', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', headword: 'Bär m (genitive Bären, plural Bären, female Bärin)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'bear', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.feminine).toEqual(['Bärin']); }); it('extract feminine by Feminine:', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'Orso', languages: [ { languageName: 'Italian', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', headword: 'orso m (plural orsi) Feminine: orsa', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'bear', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.feminine).toEqual(['orsa']); }); it('extract feminine', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'gatto', languages: [ { languageName: 'Italian', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', headword: 'gatto m (plural gatti, feminine gatta)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'cat', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.feminine).toEqual(['gatta']); }); it('extract feminine from feminine singular', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'enojado', languages: [ { languageName: 'Spanish', categories: [], children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: 'enojado (feminine singular enojada, masculine plural enojados, feminine plural enojadas)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'angry', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.feminine).toEqual(['enojada']); }); it('should not extract feminine from feminine plural', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'enojado', languages: [ { languageName: 'Spanish', categories: [], children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: 'enojado (masculine plural enojados, feminine plural enojadas)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'angry', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.feminine).toEqual(undefined); }); it('extract masculine from male', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'Bärin', languages: [ { languageName: 'German', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', headword: 'Bärin f (genitive Bärin, plural Bärinnen, male Bär)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'bear', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.masculine).toEqual(['Bär']); }); it('extract masculine', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'gatta', languages: [ { languageName: 'Italian', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', headword: 'gatta f (plural gatte, masculine gatto)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'female cat', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.masculine).toEqual(['gatto']); }); it('extract masculine from definitions', (): void => { const converter = new WiktionaryPageConverter(); const page1: WiktionaryPage = { title: 'enojada', languages: [ { languageName: 'Spanish', categories: [], children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: 'enojada f sg', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'Feminine singular of adjective enojado.', children: [], }, ], }, ], }, ], }; const page2: WiktionaryPage = { title: 'avvocatessa', languages: [ { languageName: 'Italian', categories: [], children: [ { kind: 'wordClass', wordClass: 'Noun', headword: 'avvocatessa f (plural avvocatesse)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'female equivalent of avvocato', children: [], }, ], }, ], }, ], }; const dictionaryEntry1 = assertExists( converter.convertToDictionaryEntry(page1.title, page1.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry1.masculine).toEqual(['enojado']); const dictionaryEntry2 = assertExists( converter.convertToDictionaryEntry(page2.title, page2.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry2.masculine).toEqual(['avvocato']); }); it('should not extract masculine from masculine plural', (): void => { const converter = new WiktionaryPageConverter(); const page: WiktionaryPage = { title: 'enojado', languages: [ { languageName: 'Spanish', categories: [], children: [ { kind: 'wordClass', wordClass: 'Adjective', headword: 'enojado (masculine plural enojados, feminine plural enojadas)', categories: [], children: [ { kind: 'definition', source: 'wiktionary', meaning: 'angry', children: [], }, ], }, ], }, ], }; const dictionaryEntry = assertExists( converter.convertToDictionaryEntry(page.title, page.languages[0]), 'dictionaryEntry should not be null' ); expect(dictionaryEntry.masculine).toEqual(undefined); }); });