dr_py/libs/util.ym.js
2023-04-23 21:04:12 +08:00

158 lines
4.6 KiB
JavaScript

import 'assets://js/lib/uri.min.js'
import cheerio from 'assets://js/lib/cheerio.min.js';
import 'assets://js/lib/crypto-js.js'
var charStr = 'abacdefghjklmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ0123456789';
export function randIndex(min, max, i) {
let index = Math.floor(Math.random() * (max - min + 1) + min),
numStart = charStr.length - 10;
if (i == 0 && index >= numStart) {
index = randIndex(min, max, i);
}
return index;
}
export function randomStr(len) {
let min = 0, max = charStr.length - 1, _str = '';
len = len || 15;
for (var i = 0, index; i < len; i++) {
index = randIndex(min, max, i);
_str += charStr[index];
}
return _str;
}
export function urljoin(base, url) {
base = base || '';
url = url || '';
let baseU = new Uri(base.trim().rstrip('/'));
url = url.trim().rstrip('/');
let u = undefined;
if (url.startsWith('http://') || url.startsWith('https://')) {
u = new Uri(url);
} else if (url.startsWith('://')) {
u = new Uri(baseU.protocol() + url);
} else if (url.startsWith('//')) {
u = new Uri(baseU.protocol() + ':' + url);
} else {
u = new Uri(baseU.protocol() + '://' + baseU.host() + (baseU.port() ? ':' + baseU.port() : '') + '/' + url);
}
if ((!u.path() || u.path().trim().length === 0) && baseU.path())
u.path(baseU.path());
if (!u.query() && baseU.query())
u.query(baseU.query());
return u.toString();
}
const DOM_CHECK_ATTR = /(url|src|href|data-original|data-src)$/;
const SELECT_REGEX = /:eq|:lt|:gt|#/g;
const SELECT_REGEX_A = /:eq|:lt|:gt/g;
export function pdfh(html, parse, base_url) {
if (!parse || !parse.trim()) {
return ''
}
let eleFind = typeof html === 'object';
let option = undefined;
if (eleFind && parse.startsWith('body&&')) {
parse = parse.substr(6);
if (parse.indexOf('&&') < 0) {
option = parse.trim();
parse = '*=*';
}
}
if (parse.indexOf('&&') > -1) {
let sp = parse.split('&&');
option = sp[sp.length - 1];
sp.splice(sp.length - 1);
if (sp.length > 1) {
for (let i in sp) {
if (!SELECT_REGEX.test(sp[i])) {
sp[i] = sp[i] + ':eq(0)';
}
}
} else {
if (!SELECT_REGEX.test(sp[0])) {
sp[0] = sp[0] + ':eq(0)';
}
}
parse = sp.join(' ');
}
let result = '';
const $ = eleFind ? html.rr : cheerio.load(html);
let ret = eleFind ? ((parse === '*=*' || $(html.ele).is(parse)) ? html.ele : $(html.ele).find(parse)) : $(parse);
if (option) {
if (option === 'Text') {
result = $(ret).text();
}
else if (option === 'Html') {
result = $(ret).html();
}
else {
result = $(ret).attr(option);
}
if (result && base_url && DOM_CHECK_ATTR.test(option)) {
if (/http/.test(result)) {
result = result.substr(result.indexOf('http'));
} else {
result = urljoin(base_url, result)
}
}
} else {
result = $(ret).toString();
}
return result;
}
export function pdfa(html, parse) {
if (!parse || !parse.trim()) {
return [];
}
let eleFind = typeof html === 'object';
if (parse.indexOf('&&') > -1) {
let sp = parse.split('&&');
for (let i in sp) {
if (!SELECT_REGEX_A.test(sp[i]) && i < sp.length - 1) {
sp[i] = sp[i] + ':eq(0)';
}
}
parse = sp.join(' ');
}
const $ = eleFind ? html.rr : cheerio.load(html);
let ret = eleFind ? ($(html.ele).is(parse) ? html.ele : $(html.ele).find(parse)) : $(parse);
let result = [];
if (ret) {
ret.each(function (idx, ele) {
result.push({ rr: $, ele: ele });
});
}
return result;
}
const defaultParser = {
pdfh:pdfh,
pdfa:pdfa,
pd(html,parse,uri){
let ret = this.pdfh(html,parse);
if(typeof(uri)==='undefined'||!uri){
uri = '';
}
if(DOM_CHECK_ATTR.test(parse)){
if(/http/.test(ret)){
ret = ret.substr(ret.indexOf('http'));
}else{
ret = urljoin(MY_URL,ret)
}
}
return ret
},
};
globalThis.randIndex = randIndex;
globalThis.randomStr = randomStr;
globalThis.urljoin = urljoin;
globalThis.joinUrl = urljoin;
globalThis.defaultParser = defaultParser;
globalThis.pdfa = defaultParser.pdfa;
globalThis.pdfh = defaultParser.pdfh;
globalThis.pd = defaultParser.pd;