import {TextArea, UrlArea} from "../src/entry";
const assert = require('assert');
function expect(result: string | null) {
return {
toBe: function(expected: string | null) {
if (result !== expected) {
throw new Error(result + ' is not equal to ' + expected);
}
}
}
}
describe('BDD style (URL,EMAIL)', function() {
before(function() {
// excuted before test suite
});
after(function() {
// excuted after test suite
});
describe('UrlArea', function() {
it('normalizeUrl', function() {
expect(UrlArea.normalizeUrl("htp/:/abcgermany.,def;:9094 #park//noon??abc=retry").normalizedUrl)
.toBe("http://abcgermany.de:9094#park/noon?abc=retry");
});
it('parseUrl', function() {
expect(UrlArea.parseUrl("xtp://gooppalgo.com/park/tree/?abc=1").onlyUriWithParams)
.toBe("/park/tree/?abc=1");
});
});
describe('TextArea', function() {
const textStr = 'http://[::1]:8000에서 http ://www.example.com/wpstyle/?p=364 is ok \n' +
'HTTP://foo.com/blah_blah_(wikipedia) https://www.google.com/maps/place/USA/@36.2218457,... tnae1ver.com:8000on the internet Asterisk\n ' +
'the packed1book.net. 가나다@apacbook.ac.kr? adssd@asdasd.ac.jp... fakeshouldnotbedetected.url?abc=fake s5houl7十七日dbedetected.jp?japan=go&html=가나다@pacbook.travelersinsurance; abc.com/ad/fg/?kk=5 abc@daum.net' +
'Have you visited http://goasidaio.ac.kr?abd=5안녕하세요?5...,.&kkk=5rk.,, ' +
'http://✪df.ws/123\n' +
'http://142.42.1.1:8080/\n' +
'http://-.~_!$&\'()*+,;=:%40:80%2f::::::a@example.com 가abc@pacbook.net ' +
'Have you visited goasidaio.ac.kr?abd=5hell0?5...&kkk=5rk.,. abc.def 1353aa.liars다';;
it('extractAllUrls', function() {
assert.deepEqual(TextArea.extractAllUrls(textStr), [
{
"value": {
"url": "http://[::1]:8000",
"removedTailOnUrl": "",
"protocol": "http",
"onlyDomain": "[::1]",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "ipV6",
"port": "8000"
},
"area": "text",
"index": {
"start": 0,
"end": 17
}
},
{
"value": {
"url": "http://www.example.com/wpstyle/?p=364",
"removedTailOnUrl": "",
"protocol": "http",
"onlyDomain": "www.example.com",
"onlyParams": "?p=364",
"onlyUri": "/wpstyle/",
"onlyUriWithParams": "/wpstyle/?p=364",
"onlyParamsJsn": {
"p": "364"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 20,
"end": 58
}
},
{
"value": {
"url": "HTTP://foo.com/blah_blah_(wikipedia)",
"removedTailOnUrl": "",
"protocol": "HTTP",
"onlyDomain": "foo.com",
"onlyParams": null,
"onlyUri": "/blah_blah_(wikipedia)",
"onlyUriWithParams": "/blah_blah_(wikipedia)",
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 66,
"end": 102
}
},
{
"value": {
"url": "https://www.google.com/maps/place/USA/@36.2218457,...",
"removedTailOnUrl": "",
"protocol": "https",
"onlyDomain": "www.google.com",
"onlyParams": null,
"onlyUri": "/maps/place/USA/@36.2218457,...",
"onlyUriWithParams": "/maps/place/USA/@36.2218457,...",
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 103,
"end": 156
}
},
{
"value": {
"url": "tnae1ver.com:8000",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": "tnae1ver.com",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "domain",
"port": "8000"
},
"area": "text",
"index": {
"start": 157,
"end": 174
}
},
{
"value": {
"url": "packed1book.net",
"removedTailOnUrl": ".",
"protocol": null,
"onlyDomain": "packed1book.net.",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 205,
"end": 220
}
},
{
"value": {
"url": "s5houl7十七日dbedetected.jp?japan=go&html=가나다@pacbook.travelersinsurance;",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": "s5houl7十七日dbedetected.jp",
"onlyParams": "?japan=go&html=가나다@pacbook.travelersinsurance;",
"onlyUri": null,
"onlyUriWithParams": "?japan=go&html=가나다@pacbook.travelersinsurance;",
"onlyParamsJsn": {
"japan": "go",
"html": "가나다@pacbook.travelersinsurance;"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 301,
"end": 384
}
},
{
"value": {
"url": "abc.com/ad/fg/?kk=5",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": "abc.com",
"onlyParams": "?kk=5",
"onlyUri": "/ad/fg/",
"onlyUriWithParams": "/ad/fg/?kk=5",
"onlyParamsJsn": {
"kk": "5"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 385,
"end": 404
}
},
{
"value": {
"url": "http://goasidaio.ac.kr?abd=5안녕하세요?5...,.&kkk=5rk",
"removedTailOnUrl": ".,,",
"protocol": "http",
"onlyDomain": "goasidaio.ac.kr",
"onlyParams": "?abd=5안녕하세요?5...,.&kkk=5rk.,,",
"onlyUri": null,
"onlyUriWithParams": "?abd=5안녕하세요?5...,.&kkk=5rk.,,",
"onlyParamsJsn": {
"abd": "5안녕하세요?5...,.",
"kkk": "5rk.,,"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 434,
"end": 482
}
},
{
"value": {
"url": "http://✪df.ws/123",
"removedTailOnUrl": "",
"protocol": "http",
"onlyDomain": "✪df.ws",
"onlyParams": null,
"onlyUri": "/123",
"onlyUriWithParams": "/123",
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 486,
"end": 503
}
},
{
"value": {
"url": "http://142.42.1.1:8080",
"removedTailOnUrl": "/",
"protocol": "http",
"onlyDomain": "142.42.1.1",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "ipV4",
"port": "8080"
},
"area": "text",
"index": {
"start": 504,
"end": 526
}
},
{
"value": {
"url": "http://-.~_!$&'()*+,;=:%40:80%2f::::::a@example.com",
"removedTailOnUrl": "",
"protocol": "http",
"onlyDomain": "-.~_!$&'()*+,;=:%40:80%2f::::::a@example.com",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 528,
"end": 579
}
},
{
"value": {
"url": "goasidaio.ac.kr?abd=5hell0?5...&kkk=5rk",
"removedTailOnUrl": ".,.",
"protocol": null,
"onlyDomain": "goasidaio.ac.kr",
"onlyParams": "?abd=5hell0?5...&kkk=5rk.,.",
"onlyUri": null,
"onlyUriWithParams": "?abd=5hell0?5...&kkk=5rk.,.",
"onlyParamsJsn": {
"abd": "5hell0?5...",
"kkk": "5rk.,."
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 621,
"end": 660
}
}
])
});
it('extractAllUrlsWithIntranets', function() {
assert.deepEqual(TextArea.extractAllUrls(textStr, {ipV4 : false, ipV6 :false, localhost : false, intranet : true}), [
{
"value": {
"url": "http://[::1]:8000",
"removedTailOnUrl": "",
"protocol": "http",
"onlyDomain": "[::1]",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "ipV6",
"port": "8000"
},
"area": "text",
"index": {
"start": 0,
"end": 17
}
},
{
"value": {
"url": "http://www.example.com/wpstyle/?p=364",
"removedTailOnUrl": "",
"protocol": "http",
"onlyDomain": "www.example.com",
"onlyParams": "?p=364",
"onlyUri": "/wpstyle/",
"onlyUriWithParams": "/wpstyle/?p=364",
"onlyParamsJsn": {
"p": "364"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 20,
"end": 58
}
},
{
"value": {
"url": "HTTP://foo.com/blah_blah_(wikipedia)",
"removedTailOnUrl": "",
"protocol": "HTTP",
"onlyDomain": "foo.com",
"onlyParams": null,
"onlyUri": "/blah_blah_(wikipedia)",
"onlyUriWithParams": "/blah_blah_(wikipedia)",
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 66,
"end": 102
}
},
{
"value": {
"url": "https://www.google.com/maps/place/USA/@36.2218457,...",
"removedTailOnUrl": "",
"protocol": "https",
"onlyDomain": "www.google.com",
"onlyParams": null,
"onlyUri": "/maps/place/USA/@36.2218457,...",
"onlyUriWithParams": "/maps/place/USA/@36.2218457,...",
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 103,
"end": 156
}
},
{
"value": {
"url": "tnae1ver.com:8000",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": "tnae1ver.com",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "domain",
"port": "8000"
},
"area": "text",
"index": {
"start": 157,
"end": 174
}
},
{
"value": {
"url": "packed1book.net",
"removedTailOnUrl": ".",
"protocol": null,
"onlyDomain": "packed1book.net.",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 205,
"end": 220
}
},
{
"value": {
"url": "fakeshouldnotbedetected.url?abc=fake",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": "fakeshouldnotbedetected.url",
"onlyParams": "?abc=fake",
"onlyUri": null,
"onlyUriWithParams": "?abc=fake",
"onlyParamsJsn": {
"abc": "fake"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 264,
"end": 300
}
},
{
"value": {
"url": "s5houl7十七日dbedetected.jp?japan=go&html=가나다@pacbook.travelersinsurance;",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": "s5houl7十七日dbedetected.jp",
"onlyParams": "?japan=go&html=가나다@pacbook.travelersinsurance;",
"onlyUri": null,
"onlyUriWithParams": "?japan=go&html=가나다@pacbook.travelersinsurance;",
"onlyParamsJsn": {
"html": "가나다@pacbook.travelersinsurance;",
"japan": "go"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 301,
"end": 384
}
},
{
"value": {
"url": "abc.com/ad/fg/?kk=5",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": "abc.com",
"onlyParams": "?kk=5",
"onlyUri": "/ad/fg/",
"onlyUriWithParams": "/ad/fg/?kk=5",
"onlyParamsJsn": {
"kk": "5"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 385,
"end": 404
}
},
{
"value": {
"url": "http://goasidaio.ac.kr?abd=5안녕하세요?5...,.&kkk=5rk",
"removedTailOnUrl": ".,,",
"protocol": "http",
"onlyDomain": "goasidaio.ac.kr",
"onlyParams": "?abd=5안녕하세요?5...,.&kkk=5rk.,,",
"onlyUri": null,
"onlyUriWithParams": "?abd=5안녕하세요?5...,.&kkk=5rk.,,",
"onlyParamsJsn": {
"abd": "5안녕하세요?5...,.",
"kkk": "5rk.,,"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 434,
"end": 482
}
},
{
"value": {
"url": "http://✪df.ws/123",
"removedTailOnUrl": "",
"protocol": "http",
"onlyDomain": "✪df.ws",
"onlyParams": null,
"onlyUri": "/123",
"onlyUriWithParams": "/123",
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 486,
"end": 503
}
},
{
"value": {
"url": "http://142.42.1.1:8080",
"removedTailOnUrl": "/",
"protocol": "http",
"onlyDomain": "142.42.1.1",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "ipV4",
"port": "8080"
},
"area": "text",
"index": {
"start": 504,
"end": 526
}
},
{
"value": {
"url": "http://-.~_!$&'()*+,;=:%40:80%2f::::::a@example.com",
"removedTailOnUrl": "",
"protocol": "http",
"onlyDomain": "-.~_!$&'()*+,;=:%40:80%2f::::::a@example.com",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 528,
"end": 579
}
},
{
"value": {
"url": "goasidaio.ac.kr?abd=5hell0?5...&kkk=5rk",
"removedTailOnUrl": ".,.",
"protocol": null,
"onlyDomain": "goasidaio.ac.kr",
"onlyParams": "?abd=5hell0?5...&kkk=5rk.,.",
"onlyUri": null,
"onlyUriWithParams": "?abd=5hell0?5...&kkk=5rk.,.",
"onlyParamsJsn": {
"abd": "5hell0?5...",
"kkk": "5rk.,."
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 621,
"end": 660
}
},
{
"value": {
"url": "abc.def",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": "abc.def",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 664,
"end": 671
}
},
{
"value": {
"url": "1353aa.liars",
"removedTailOnUrl": "다",
"protocol": null,
"onlyDomain": "1353aa.liars다",
"onlyParams": null,
"onlyUri": null,
"onlyUriWithParams": null,
"onlyParamsJsn": null,
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 672,
"end": 684
}
}
])
});
it('extractAllEmails', function() {
assert.deepEqual(TextArea.extractAllEmails(textStr, true), [
{
"value": {
"email": "가나다@apacbook.ac.kr",
"removedTailOnEmail": null,
"type": "domain"
},
"area": "text",
"index": {
"start": 222,
"end": 240
},
"pass": false
},
{
"value": {
"email": "adssd@asdasd.ac.jp",
"removedTailOnEmail": "...",
"type": "domain",
},
"area": "text",
"index": {
"start": 242,
"end": 260
},
"pass": true
},
{
"value": {
"email": "가나다@pacbook.travelersinsurance",
"removedTailOnEmail": null,
"type": "domain"
},
"area": "text",
"index": {
"start": 346,
"end": 376
},
"pass": false
},
{
"value": {
"email": "a@example.com",
"removedTailOnEmail": null,
"type": "domain"
},
"area": "text",
"index": {
"start": 566,
"end": 579
},
"pass": true
},
{
"value": {
"email": "abc@pacbook.net",
"removedTailOnEmail": null,
"type": "domain"
},
"area": "text",
"index": {
"start": 581,
"end": 596
},
"pass": true
}
])
})
});
});
describe('BDD style (URI)', function() {
const sampleText2 = 'https://google.com/abc/777?a=5&b=7 abc/def 333/kak abc/55에서 abc/53 abc/533/ka abc/53a/ka /123a/abc/556/dd /abc/123?a=5&b=tkt /xyj/asff' +
'a333/kak nice/guy/ bad/or/nice/guy ssh://nice.guy.com/?a=dkdfl';
/**
* @brief
* Distill URIs with certain names from normal text
* @author Andrew Kang
* @param textStr string required
* @param uris array required
* for example, [['a','b'], ['c','d']]
* If you use {number}, this means 'only number' ex) [['a','{number}'], ['c','d']]
* @param endBoundary boolean (default : false)
* @return array
*/
it('extractCertainUris', function() {
const uris = TextArea.extractCertainUris(
sampleText2,
[['{number}', 'kak'], ['nice', 'guy'], ['abc', '{number}']],
true
);
console.log(uris);
assert.deepEqual(uris, [
{
"uriDetected": {
"value": {
"url": "/abc/777?a=5&b=7",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": "",
"onlyParams": "?a=5&b=7",
"onlyUri": "/abc/777",
"onlyUriWithParams": "/abc/777?a=5&b=7",
"onlyParamsJsn": {
"a": "5",
"b": "7"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 18,
"end": 34
}
},
"inWhatUrl": {
"value": {
"url": "https://google.com/abc/777?a=5&b=7",
"removedTailOnUrl": "",
"protocol": "https",
"onlyDomain": "google.com",
"onlyParams": "?a=5&b=7",
"onlyUri": "/abc/777",
"onlyUriWithParams": "/abc/777?a=5&b=7",
"onlyParamsJsn": {
"a": "5",
"b": "7"
},
"type": "domain",
"port": null
},
"area": "text",
"index": {
"start": 0,
"end": 34
}
}
},
{
"uriDetected": {
"value": {
"url": "333/kak",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": null,
"onlyParams": null,
"onlyUri": "333/kak",
"onlyUriWithParams": "333/kak",
"onlyParamsJsn": null,
"type": "uri",
"port": null
},
"area": "text",
"index": {
"start": 43,
"end": 51
}
},
"inWhatUrl": undefined
},
{
"uriDetected": {
"value": {
"url": "abc/53",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": null,
"onlyParams": null,
"onlyUri": "abc/53",
"onlyUriWithParams": "abc/53",
"onlyParamsJsn": null,
"type": "uri",
"port": null
},
"area": "text",
"index": {
"start": 60,
"end": 67
}
},
"inWhatUrl": undefined
},
{
"uriDetected": {
"value": {
"url": "abc/533/ka",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": null,
"onlyParams": null,
"onlyUri": "abc/533/ka",
"onlyUriWithParams": "abc/533/ka",
"onlyParamsJsn": null,
"type": "uri",
"port": null
},
"area": "text",
"index": {
"start": 67,
"end": 77
}
},
"inWhatUrl": undefined
},
{
"uriDetected": {
"value": {
"url": "/123a/abc/556/dd",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": null,
"onlyParams": null,
"onlyUri": "/123a/abc/556/dd",
"onlyUriWithParams": "/123a/abc/556/dd",
"onlyParamsJsn": null,
"type": "uri",
"port": null
},
"area": "text",
"index": {
"start": 89,
"end": 105
}
},
"inWhatUrl": null
},
{
"uriDetected": {
"value": {
"url": "/abc/123?a=5&b=tkt",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": null,
"onlyParams": "?a=5&b=tkt",
"onlyUri": "/abc/123",
"onlyUriWithParams": "/abc/123?a=5&b=tkt",
"onlyParamsJsn": {
"a": "5",
"b": "tkt"
},
"type": "uri",
"port": null
},
"area": "text",
"index": {
"start": 106,
"end": 124
}
},
"inWhatUrl": undefined
},
{
"uriDetected": {
"value": {
"url": "nice/guy",
"removedTailOnUrl": "/",
"protocol": null,
"onlyDomain": null,
"onlyParams": null,
"onlyUri": "nice/guy",
"onlyUriWithParams": "nice/guy",
"onlyParamsJsn": null,
"type": "uri",
"port": null
},
"area": "text",
"index": {
"start": 144,
"end": 153
}
},
"inWhatUrl": undefined
},
{
"uriDetected": {
"value": {
"url": "/or/nice/guy",
"removedTailOnUrl": "",
"protocol": null,
"onlyDomain": null,
"onlyParams": null,
"onlyUri": "/or/nice/guy",
"onlyUriWithParams": "/or/nice/guy",
"onlyParamsJsn": null,
"type": "uri",
"port": null
},
"area": "text",
"index": {
"start": 157,
"end": 170
}
},
"inWhatUrl": undefined
}
]);
});
});