import axios from 'axios'
import https from 'https'
import get from 'lodash-es/get.js'
import size from 'lodash-es/size.js'
import each from 'lodash-es/each.js'
import map from 'lodash-es/map.js'
import trim from 'lodash-es/trim.js'
import values from 'lodash-es/values.js'
import evem from 'wsemi/src/evem.mjs'
import sep from 'wsemi/src/sep.mjs'
import isestr from 'wsemi/src/isestr.mjs'
import ispint from 'wsemi/src/ispint.mjs'
import isbol from 'wsemi/src/isbol.mjs'
import isearr from 'wsemi/src/isearr.mjs'
import cint from 'wsemi/src/cint.mjs'
import waitFun from 'wsemi/src/waitFun.mjs'
import provideServer from './provideServer.mjs'
/**
* 抓取代理伺服器
*
* @class
* @param {Object} [opt={}] 輸入設定物件,預設{}
* @param {String} [opt.src='https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=5000&anonymity=all'] 輸入取得代理伺服器API字串,預設'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=5000&anonymity=all'
* @param {String} [opt.tar='https://httpbin.org/ip'] 輸入檢測目標網址字串,預設'https://httpbin.org/ip'
* @param {Integer} [opt.timeGetProxies=30*1000] 輸入輪循抓取代理伺服器時間間隔整數,單位ms,預設30*1000
* @param {Integer} [opt.timeTestProxies=60*1000] 輸入輪循測試代理伺服器時間間隔整數,單位ms,預設60*1000
* @param {Boolean} [opt.withServer=false] 輸入是否創建供數據供給伺服器布林值,預設false
* @param {Integer} [opt.serverPort=8080] 輸入創建伺服器所用port整數,預設8080
* @param {String} [opt.serverApiName='getProxies'] 輸入供給數據API名稱字串,預設'getProxies'
* @param {Array} [opt.serverCorsOrigins=['*']] 輸入允許跨域網域陣列,若給予['*']代表允許全部,預設['*']
* @returns {Object} 回傳事件物件,提供函數getProxies,可監聽事件getRawProxies、add、delete、change
* @example
*
* import _ from 'lodash-es'
* import wip from './src/WIpProxy.mjs'
*
* let wo = wip({
* // tar: `https://api.binance.com/api/v3/ticker/price?symbol=BTCUSDT`,
* withServer: true,
* serverPort: 9000,
* serverCorsOrigins: ['*'],
* })
* wo.on('getRawProxies', (prxsRaw) => {
* console.log(`已抓取公開代理共 ${_.size(prxsRaw)} 個...`)
* })
* wo.on('add', (p, proxies) => {
* // console.log('新加入代理', { host: p.host, port: p.port }, _.map(proxies, 'proxy'))
* })
* wo.on('delete', (p, proxies) => {
* // console.log('刪除代理', { host: p.host, port: p.port }, _.map(proxies, 'proxy'))
* })
* wo.on('change', (proxies) => {
* // console.log(`有效代理`, _.map(proxies, 'proxy'))
* console.log(`已檢測有效代理共 ${_.size(proxies)} 個...`)
* })
*
* //browser view: http://localhost:9000/getProxies
*
*/
function WIpProxy(opt = {}) {
//src
let src = get(opt, 'src', '')
if (!isestr(src)) {
src = 'https://api.proxyscrape.com/v2/?request=displayproxies' +
`&protocol=http` +
// `&protocol=https` +
`&timeout=5000` +
// `&country=${join(['tw', 'cn', 'hk', 'jp', 'kr', 'sg', 'my', 'in', 'id', 'th', 'vn'])}` +
// `&ssl=yes` +
`&anonymity=all`
}
//tar
let tar = get(opt, 'tar', '')
if (!isestr(tar)) {
tar = 'https://httpbin.org/ip' //'https://www.google.com'
}
//timeGetProxies
let timeGetProxies = get(opt, 'timeGetProxies')
if (!ispint(timeGetProxies)) {
timeGetProxies = 30 * 1000 //30s
}
timeGetProxies = cint(timeGetProxies)
//timeTestProxies
let timeTestProxies = get(opt, 'timeTestProxies')
if (!ispint(timeTestProxies)) {
timeTestProxies = 60 * 1000 //1min
}
timeTestProxies = cint(timeTestProxies)
//withServer
let withServer = get(opt, 'withServer')
if (!isbol(withServer)) {
withServer = false
}
//serverPort
let serverPort = get(opt, 'serverPort')
if (!ispint(serverPort)) {
serverPort = 8080
}
serverPort = cint(serverPort)
//serverApiName
let serverApiName = get(opt, 'serverApiName')
if (!isestr(serverApiName)) {
serverApiName = 'getProxies'
}
//serverCorsOrigins
let serverCorsOrigins = get(opt, 'serverCorsOrigins', [])
if (!isearr(serverCorsOrigins)) {
serverCorsOrigins = ['*']
}
//prxsRaw, kpPrx
let prxsRaw = []
let kpPrx = {}
async function _getProxiesCore(src) {
let ps = []
try {
//res
let res = await axios.get(src)
//c
let c = res.data
// console.log('c', c)
//sep
let ss = sep(c, '\n')
// console.log('ss', ss)
//ps
ps = map(ss, (v) => {
let [host, port] = v.split(':')
port = cint(port)
return {
proxy: v,
host,
port,
}
})
// console.log('ps', ps)
}
catch (err) {
console.log('_getProxiesCore catch', err.message)
}
// console.log('ps', ps)
return ps
}
async function _getProxies() {
// console.log('call getProxies...')
prxsRaw = await _getProxiesCore(src)
.catch(() => {})
// console.log('call getProxies fin')
ev.emit('getRawProxies', prxsRaw)
}
async function _testProxyCore(host, port, url) {
//ret
let ret = (state, msg) => {
return {
state,
msg,
p: {
proxy: `${host}:${port}`,
host,
port,
},
}
}
//r
let r = {}
try {
//agent
let agent = new https.Agent({ rejectUnauthorized: false }) //避免證書錯誤中止
//get
let res = await axios.get(url, {
proxy: {
host,
port,
protocol: 'http',
},
httpsAgent: agent,
timeout: 5000,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/122.0.0.0 Safari/537.36',
'Accept': 'application/json',
},
})
if (res.status === 200) {
r = ret('success', res.data)
}
else {
r = ret('error', `status[${res.status}] error`)
}
}
catch (err) {
r = ret('error', trim(err.message))
// if (r.msg.indexOf(`ssl3_get_record`) >= 0) {
// r.msg = `ssl3_get_record error`
// }
}
// console.log(r)
return r
}
async function _testProxiesCore(prxs) {
//pms
let pms = map(prxs, (p) => {
let pm = _testProxyCore(p.host, p.port, tar)
return pm
})
//rrs, allSettled
let rrs = await Promise.allSettled(pms)
// console.log('rrs', rrs)
//psValid, psInvalid
let psValid = []
let psInvalid = []
each(rrs, (v) => {
let status = get(v, 'status', '')
let b1 = status === 'fulfilled'
let state = get(v, 'value.state', '')
let b2 = state === 'success'
let b = b1 && b2
let p = get(v, 'value.p', null)
if (b) {
psValid.push(p)
}
else {
psInvalid.push(p)
}
})
return {
psValid,
psInvalid,
}
}
async function _testProxies() {
// console.log('call testProxies...')
let r = await _testProxiesCore(prxsRaw)
each(r.psValid, (p) => {
kpPrx[p.proxy] = p
ev.emit('add', p, values(kpPrx))
})
each(r.psInvalid, (p) => {
delete kpPrx[p.proxy]
ev.emit('delete', p, values(kpPrx))
})
ev.emit('change', values(kpPrx))
// console.log('call testProxies fin')
}
async function getProxies() {
let vs
await waitFun(() => {
vs = values(kpPrx)
return size(vs) > 0
})
return vs
}
//ev
let ev = evem()
//取得代理清單
if (true) {
_getProxies()
setInterval(() => {
_getProxies()
}, timeGetProxies)
}
//過濾出有效代理清單, 延遲3s觸發, 給時間抓取
setTimeout(() => {
_testProxies()
setInterval(() => {
_testProxies()
.catch(() => {})
}, timeTestProxies)
}, 3000)
//withServer
if (withServer) {
provideServer(getProxies, {
port: serverPort,
apiName: serverApiName,
corsOrigins: serverCorsOrigins,
})
}
//save
ev.getProxies = getProxies
return ev
}
export default WIpProxy