利用搜索引擎批量抓取url

有的时候爆出0day,我们需要抓取大量的url进行测试,这个时候需要用到搜索引擎,常用的有:google,fofa,shodan
下面是整理的利用js对对不同搜索引擎进行批量抓取的代码:
fofa: (如何使用:StartReq(搜索语法,开始页码,结束页码) )

  1. StartReq('body=wooyun',1,10)
  2. function StartReq(q,startpage,endpage){
  3. for(var i=startpage;i<=endpage;i++){
  4. Req(i+"q="+encodeURIComponent(q)+"&qbase64="+btoa(q));
  5. }
  6. }
  7. function Connection(Sendtype,url,content,callback){
  8. if (window.XMLHttpRequest){
  9. var xmlhttp=new XMLHttpRequest();
  10. }
  11. else{
  12. var xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
  13. }
  14. xmlhttp.onreadystatechange=function(){
  15. if(xmlhttp.readyState==4&&xmlhttp.status==200)
  16. {
  17. callback(xmlhttp.responseText);
  18. }
  19. }
  20. xmlhttp.open(Sendtype,url,true);
  21. xmlhttp.setRequestHeader("Content-Type","application/x-www-form-urlencoded");
  22. xmlhttp.send(content);
  23. }
  24. function Req(searchString){
  25. var searchurl = "http://fofa.so/search/result?page="+searchString;
  26. Connection("GET",searchurl,"",function(callback){
  27. var result = $(callback);
  28. result.find('div.col-lg-4 a').each(function(i,o){
  29. var o = $(o);
  30. if(o.attr('target')=="_blank"){
  31. if(o.attr('href').indexOf('/search/checkapp?all=true&host=')){
  32. console.log(o.attr('href'));
  33. }
  34. }
  35. })
  36. })
  37. }

google:

  1. StartReq("site:xss1.com",1,1);
  2. var tmp = [];
  3. var HerfRegExp = /http:\/\/\w.*\/|https:\/\/\w.*\//;
  4. document.body.appendChild(document.createElement('script')).src='//code.jquery.com/jquery-1.9.1.min.js';
  5. function StartReq(q,startpage,endpage){
  6. for(var i=startpage;i<=endpage;i++){
  7. if(i==1){
  8. Req("q="+encodeURIComponent(q)+"&start=100&num=100&newwindow="+i);
  9. }
  10. else{
  11. Req("q="+encodeURIComponent(q)+"&start="+(i*100)+"&num=100&newwindow="+i);
  12. }
  13. }
  14. }
  15. function Connection(Sendtype,url,content,callback){
  16. if (window.XMLHttpRequest){
  17. var xmlhttp=new XMLHttpRequest();
  18. }
  19. else{
  20. var xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
  21. }
  22. xmlhttp.onreadystatechange=function(){
  23. if(xmlhttp.readyState==4&&xmlhttp.status==200)
  24. {
  25. callback(xmlhttp.responseText);
  26. }
  27. }
  28. xmlhttp.open(Sendtype,url,true);
  29. xmlhttp.setRequestHeader("Content-Type","application/x-www-form-urlencoded");
  30. xmlhttp.send(content);
  31. }
  32. function Req(searchString){
  33. var searchurl = "https://www.google.com.hk/search?"+searchString;
  34. Connection("GET",searchurl,"",function(callback){
  35. var result = $(callback);
  36. result.find('div.rc h3.r a').each(function(i,o){
  37. var o = $(o);
  38. tmp.push(String(HerfRegExp.exec(o.attr('href'))));
  39. })
  40. })
  41. }

最后的结果不会输出,会存入到tmp 数组,方便去重,如果需要输出可以自行加个循环tmp 把值打印出来
如何使用:StartReq(搜索语法,开始页码,结束页码)

shodan:

  1. var url = "http://www.shodanhq.com/search?q=关键字&page=";
  2. for(var i=1;i<101;i++){
  3. var request = null;
  4. if (window.ActiveXObject) {
  5. request = new ActiveXObject("Microsoft.XMLHTTP");
  6. }else {
  7. request = new XMLHttpRequest();
  8. }
  9. request.open("GET",url+i, false);
  10. request.setRequestHeader('If-Modified-Since', '0');
  11. request.send(null);
  12. var str = request.responseText;
  13. str = str.replace(/\r/g,"");
  14. str = str.replace(/\n/g,"");
  15. var urls = [];
  16. str.replace(/\<div class=\'ip\'>.*?<a href=\".*?\">(.*?)<\/a>.*?<\/div>/ig, function(a,b) {
  17. urls.push(b);
  18. });
  19. console.info(urls.join('\n'));
  20. }

新版的shodan:

  1. var url = "https://www.shodan.io/search?query=port%3A27017&page=";
  2. for(var i=1;i<101;i++){
  3. var request = null;
  4. if (window.ActiveXObject) {
  5. request = new ActiveXObject("Microsoft.XMLHTTP");
  6. }else {
  7. request = new XMLHttpRequest();
  8. }
  9. request.open("GET",url+i, false);
  10. request.setRequestHeader('If-Modified-Since', '0');
  11. request.send(null);
  12. var str = request.responseText;
  13. str = str.replace(/\r/g,"");
  14. str = str.replace(/\n/g,"");
  15. var urls = [];
  16. str.replace(/\<div class=\"ip\">.*?<a href=\".*?\">(.*?)<\/a>.*?<\/div>/ig, function(a,b) {
  17. urls.push(b);
  18. });
  19. console.info(urls.join('\n'));
  20. }

以上内容整理自:http://zone.wooyun.org/content/16840

About the Author

admin

Leave a Reply

Your email address will not be published. Required fields are marked *