在前面两篇《聚BT浏览器扩展自定义网站从入门到精通1-HTTP请求类型GET、GET_CUSTOM》、《聚BT浏览器扩展自定义网站从入门到精通2-HTTP请求类型NEWTAB、NEWTAB_IFRAME》讲解了聚BT浏览器扩展 HTTP请求类的GET、GET_CUSTOM以及NEWTAB、NEWTAB_IFRAME。这篇继续讲解POST。
GET/GET_CUSTOM和NEWTAB/NEWTAB_IFRAME (实际上前面讲的NEWTAB是GET类型的新开标签页),系统都有缺省的searcher或parser,但对于POST类型,searcher和parser都需要自定义。
普通POST的典型例子:好恐怖 https://www.hkb123.com 。值得推荐一下,好恐怖电影站质量不错,尤其是很多冷门篇此站都有收集。
POST类型的请求,在查询时候,浏览器地址栏不会带查询关键词信息,因此在自定义站点页面,网站搜索地址可以只填写官网首页:https://www.hkb123.com ,不需要带searchkeyword关键词。
1、实际请求URL(Request URL)为:https://s.haokongbu.net/e/search,请求类型(Request Method)为:POST
2、服务器返回302重定向请求,重定向地址为 https://s.haokongbu.net/e/search/result/?searchid=582944 ,请求类型为GET
地址栏变为为:https://s.haokongbu.net/e/search/result/?searchid=582944 ,搜索结果返回并展示。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | async function hkb123Searcher(url, keyword) { try { //函数输入参数 url为: https://s.hkb123.com/e/search,由系统带入,当然也可以直接在自定义函数中写死 let url1 = url.replace( "www" , "s" ) + "/e/search/" ; keyword = encodeURIComponent(keyword); let body = "show=title%2Cftitle%2Cactor&keyboard=" + keyword; //console.debug("searcher.js hkb123Searcher body :" + body + ",keyword is:" + keyword); let response1 = await httpPost(url1, "application/x-www-form-urlencoded" , body); //console.debug("searcher.js hkb123Searcher response1 :" + JSON.stringify(response1)); //console.debug("searcher.js hkb123Searcher response1 keys is :" + Object.keys(response1)); if (response1 && Object.keys(response1).length == 2) { return response1.body; } return null ; } catch (err) { console.info( "searcher.js err: " + JSON.stringify(err, Object.getOwnPropertyNames(err), 2)); return null ; } } |
对应的系统内置parser 为hkb123Parser
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | function hkb123Parser(content, url, source, keyword) { let myDocument = documentFactory(content); const elements = myDocument.querySelectorAll( "div.channel-content ul li p a" ); let hrefs = []; for (let element of elements) { let href = element.getAttribute( "href" ); if (href) { href = relativeUrlToAbsolute(href, url); let text = element.innerText; let desc = keyword; //console.debug("parser.js hkb123Parser href is: " + href + ", text is:" + text); if (href && text) { hrefs.push({ link: href, linkText: text, desc: desc, source: source, keyword: keyword, url: url, type: "movie" }); } } } return hrefs; } |
以上举的好恐怖是普通的POST,但假如页面请求涉及复杂的交互逻辑,例如有ajax请求或请求报文有加密请求,此时,为降低抓包分析难度,可以直接使用NEWTAB/NEWTAB_IFRAME 配合自定义searcher的方案。
典型例子:知网 https://www.cnki.net
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | function cnkiSearcher(url, keyword) { //console.debug("parserNewTab.js cnkiSearcher keyword is: " + keyword); if (document.getElementById( "txt_search" )) { let element = document.getElementById( "txt_search" ); element.value = keyword; var handler = setInterval( function () { element = document.getElementById( "txt_search" ); //console.debug("parserNewTab.js cnkiSearcher value1 is: " + element.value); if (element.value == keyword) { //console.debug("parserNewTab.js cnkiSearcher value2 is: " + keyword); clearInterval(handler); document.querySelector( "input.search-btn" ).click(); setTimeout( function () {}, 1000); } else { cnkiSearcher(keyword); } }, 1000); } else { total++; if (total < 7) { cnkiSearcher(url, keyword); } } } |
1、在采用 NEWTAB_IFRAME 模式打开标签页
通过document.getElementById(“txt_search”) 轮询页面txt_search元素
4、模拟点击 input.search-btn完成搜索。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | function cnkiParser(url, source, keyword) { const elements = document.querySelectorAll( "td.name a" ); let hrefs = []; for (let element of elements) { let href = element.getAttribute( "href" ); let match = href.match(/FileName=(.*)&DbName=(.*)&DbCode=(.*)&/); if (match) { let filename = match[1]; let dbname = match[2]; let dbcode = match[3]; href = "https://kns.cnki.net/kcms/detail/detail.aspx?filename=" + filename + "&dbname=" + dbname + "&dbcode=" + dbcode; let text = element.innerText; let desc = keyword; console.debug( "parser.js mikuclubParser href is: " + href + ", text is:" + text); if (href && text) { hrefs.push({ link: href, linkText: text, desc: desc, source: source, keyword: keyword, url: url, type: "scholar" }); } } } return hrefs; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | let myDocument = documentFactory(content); const elements = myDocument.querySelectorAll( "td.name a" ); let hrefs = []; for (let element of elements) { let href = element.getAttribute( "href" ); let match = href.match(/FileName=(.*)&DbName=(.*)&DbCode=(.*)&/); if (match) { let filename = match[1]; let dbname = match[2]; let dbcode = match[3]; href = "https://kns.cnki.net/kcms/detail/detail.aspx?filename=" + filename + "&dbname=" + dbname + "&dbcode=" + dbcode; let text = element.innerText; let desc = keyword; console.debug( "parser.js mikuclubParser href is: " + href + ", text is:" + text); if (href && text) { hrefs.push({ link: href, linkText: text, desc: desc, source: source, keyword: keyword, url: url, type: "scholar" }); } } } return hrefs; |
1 2 | let myDocument = documentFactory(content); const elements = myDocument.querySelectorAll( "td.name a" ); |
由此总算介绍完了聚BT浏览器扩展的几种请求类型,下一篇针对系统内置的几个函数重点介绍,包括documentFactory, httpGet,httpPost等,以更好理解自定义searcher和parser的逻辑。