hta作品笨狼CSDN爬虫
作者:admin
时间:2021-01-30 21:01
<html>
<head>
<title>笨狼CSDN爬虫</title>
<style>
body
{
font-size:12;
margin:20;
background-color:#eeeeee;
}
div
{
font-size:12;
border:1 solid navy;
margin-left:20;
}
input.num
{
width:20;
overflow:visible;
BORDER-RIGHT: 0px;
BORDER-TOP: 0px;
BORDER-LEFT: 0px;
BORDER-BOTTOM: 1px solid navy;
margin-left:-5;
margin-bottom:-2;
background-color:#eeeeee;
}
.worm
{
font-family:webdings;
font-size:30;
color:#663300;
}
</style>
</head>
<body bgcolor="#ffffff" leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">
帖子号码<br>
<span class="worm" >!</span>从: http://community.csdn.net/Expert/topic/
<input value="3738" class="num">/
<input value="3738400" class="num">
.xml?temp=.XXXXXXX
<br>
<span class="worm">"</span>到: http://community.csdn.net/Expert/topic/
<input value="3739" class="num">
/<input value="3748450" class="num">
.xml?temp=.XXXXXXX
<br/><br/>
爬行范围<br/>
<span class="worm">ß</span>
<select onchange="javascript:changeitem();">
<option value="0">全部社区</option>
<option value="3"selected>Web开发</option>
<option value="4">软件工程/管理</option>
<option value="5">企业开发</option>
<option value="6">专题开发</option>
<option value="7">多媒体/设计</option>
<option value="8">Linux/Unix社区</option>
<option value="9">硬件/嵌入开发</option>
<option value="10">Windows专区</option>
<option value="11">硬件使用</option>
<option value="13">软件培训/认证/考试</option>
<option value="14">扩充话题</option>
<option value="15">社区支持</option>
<option value="16">产品/厂家</option>
<option value="17">其他开发语言</option>
<option value="18">其他数据库开发</option>
<option value="20">计算机图书</option>
<option value="50">VC/MFC</option>
<option value="51">VB</option>
<option value="52">.NET技术</option>
<option value="53">Delphi</option>
<option value="54">Java</option>
<option value="55">C++Builder</option>
<option value="56">C/C++</option>
<option value="57">MS-SQLServer</option>
<option value="58">PowerBuilder</option>
<option value="59">WebSphere</option>
<option value="60">DB2</option>
<option value="61">Oracle</option>
<option value="62">程序员杂志</option>
<option value="63">移动平台</option>
<option value="64">PowerTools控件专区</option>
<option value="65">《开发高手》杂志</option>
<option value="66">palmos</option>
<option value="67">2004微软创新杯软件开发大赛</option>
<option value="68">《MSDN开发精选》杂志</option>
</select>
<select >
<option value="301"selected>ASP</option>
<option value="303">PHP</option>
<option value="304">JavaScript</option>
<option value="305">CGI</option>
<option value="306">XML/SOAP</option>
<option value="307">IIS</option>
<option value="308">Apache</option>
<option value="309">应用服务器</option>
<option value="310">HTML(CSS)</option>
<option value="311">其他</option>
<option value="312">非技术区</option>
<option value="313">ColdFusion</option>
</select>
<br/>
<button onclick="gogogo();" >开始爬...</button>
<br/> <br/>
说明:<br/>
1,将CSDN帖子分类保存在爬虫身边/*.XML文件;
<br/>
2,也可以选择保留XML文件,进行深加工;
<br/>
3,请根据当前的帖子号码来设定爬行距离;
<br/>
4,欢迎光临<a href="http://superdullwolf.cnzone.net/index.asp" target="_blank">我的站点</a>;
<script language="javascript">
window.resizeTo(600,400);
window.moveTo(window.screen.availWidth/2-300,window.screen.availHeight/2-150);
var xmlURL=new String();
var headURL ="http://community.csdn.net/Expert/topic/";
var tailURL =".xml?temp=";
var xmlhttp = new ActiveXObject("Microsoft.XMLHTTP");
var stream = new ActiveXObject("ADODB.Stream");
var fso = new ActiveXObject("Scripting.FileSystemObject")
//定义本文件绝对路径
var thisFileDir =getthisFileDir();
//定义本文件名
var thisFileName = LastOne(thisFileDir,"\\");
//定义本文件夹路径
var thisFileFolder = getFolderDir(thisFileDir);
InitBigClass();
InitSmallClass();
var xmlID;
var xmlFolder = setFolder("xml");
function gogogo()
{ //主程序
wormClick.innerText ="正在爬...";
//window.resizeTo(100,200);
//window.scrollTo(10,20);
//window.moveTo(window.screen.availWidth-200,window.screen.availHeight-250)
for(i=parseInt(numStart1.value) ;i<parseInt(numEnd1.value);i++)
{
for(j=parseInt(numStart2.value);j<parseInt(numEnd2.value);j++)
{
xmlURL = headURL + i + "/" + j + tailURL + Math.random().toString().substr(1,8);
try
{
saveToFile(xmlURL,smallclassid.value);
xmlID = j;
}
catch(e)
{
}
}
}
alert("爬行完毕!!");
wormClick.innerText = "开始爬...";
}
function saveToFile(URL,classID)
{
xmlhttp.Open("GET",URL, false);
try
{
xmlhttp.Send();
var result = xmlhttp.status;
}
catch(e)
{
return(false);
}
if (xmlhttp.readyState == 4) // 调用完毕
{
if(result==200)
{
var tempXML=xmlhttp.responseBody;
stream.type=1;
stream.Open() ;
stream.Write(tempXML);
stream.SaveToFile(xmlFolder + xmlID + ".xml")
stream.Close();
}
}
}
function setFolder(str)
{
var folder = thisFileFolder + str;
if(!fso.FolderExists( folder))
{
fso.CreateFolder(folder)
}
return(folder + "\\")
}
function getthisFileDir()
{//得到本文件完全路径
var temp = window.location.href;
temp = temp.substr(8);
temp = unescape(temp);
temp = temp.replace(/[\/]/ig,"\\")
return(temp);
}
function LastOne(Str,splitStr)
{//输入字符和分隔符,得到最后一部分
var Arr=Str.split(splitStr)
return(Arr[Arr.length-1]);
}
function getFolderDir(fullDir)
{//输入得到全路径,得到文件夹路径
var s=LastOne(fullDir,"\\");
return(fullDir.substr(0,fullDir.length-s.length));
}
function InitBigClass()
{
bigclass = new Array();
bigclass[0] = new Array();
bigclass[0][0] = "0";
bigclass[0][1] = "全部社区";
bigclass[1] = new Array();
bigclass[1][0] = "3";
bigclass[1][1] = "Web 开发";
bigclass[2] = new Array();
bigclass[2][0] = "4";
bigclass[2][1] = "软件工程/管理";
bigclass[3] = new Array();
bigclass[3][0] = "5";
bigclass[3][1] = "企业开发";
bigclass[4] = new Array();
bigclass[4][0] = "6";
bigclass[4][1] = "专题开发";
bigclass[5] = new Array();
bigclass[5][0] = "7";
bigclass[5][1] = "多媒体/设计";
bigclass[6] = new Array();
bigclass[6][0] = "8";
bigclass[6][1] = "Linux/Unix社区";
bigclass[7] = new Array();
bigclass[7][0] = "9";
bigclass[7][1] = "硬件/嵌入开发";
bigclass[8] = new Array();
bigclass[8][0] = "10";
bigclass[8][1] = "Windows专区";
bigclass[9] = new Array();
bigclass[9][0] = "11";
bigclass[9][1] = "硬件使用";
bigclass[10] = new Array();
bigclass[10][0] = "13";
bigclass[10][1] = "软件培训/认证/考试";
bigclass[11] = new Array();
bigclass[11][0] = "14";
bigclass[11][1] = "扩充话题";
bigclass[12] = new Array();
bigclass[12][0] = "15";
bigclass[12][1] = "社区支持";
bigclass[13] = new Array();
bigclass[13][0] = "16";
bigclass[13][1] = "产品/厂家";
bigclass[14] = new Array();
bigclass[14][0] = "17";
bigclass[14][1] = "其他开发语言";
bigclass[15] = new Array();
bigclass[15][0] = "18";
bigclass[15][1] = "其他数据库开发";
bigclass[16] = new Array();
bigclass[16][0] = "20";
bigclass[16][1] = "计算机图书";
bigclass[17] = new Array();
bigclass[17][0] = "50";
bigclass[17][1] = "VC/MFC";
bigclass[18] = new Array();
bigclass[18][0] = "51";
bigclass[18][1] = "VB";
bigclass[19] = new Array();
bigclass[19][0] = "52";
bigclass[19][1] = ".NET技术";
bigclass[20] = new Array();
bigclass[20][0] = "53";
bigclass[20][1] = "Delphi";
bigclass[21] = new Array();
bigclass[21][0] = "54";
bigclass[21][1] = "Java";
bigclass[22] = new Array();
bigclass[22][0] = "55";
bigclass[22][1] = "C++ Builder";
bigclass[23] = new Array();
bigclass[23][0] = "56";
bigclass[23][1] = "C/C++";
bigclass[24] = new Array();
bigclass[24][0] = "57";
bigclass[24][1] = "MS-SQL Server";
bigclass[25] = new Array();
bigclass[25][0] = "58";
bigclass[25][1] = "PowerBuilder";
bigclass[26] = new Array();
bigclass[26][0] = "59";
bigclass[26][1] = "WebSphere";
bigclass[27] = new Array();
bigclass[27][0] = "60";
bigclass[27][1] = "DB2";
bigclass[28] = new Array();
bigclass[28][0] = "61";
bigclass[28][1] = "Oracle";
bigclass[29] = new Array();
bigclass[29][0] = "62";
bigclass[29][1] = "程序员杂志";
bigclass[30] = new Array();
bigclass[30][0] = "63";
bigclass[30][1] = "移动平台";
bigclass[31] = new Array();
bigclass[31][0] = "64";
bigclass[31][1] = "PowerTools控件专区";
bigclass[32] = new Array();
bigclass[32][0] = "65";
bigclass[32][1] = "《开发高手》杂志";
bigclass[33] = new Array();
bigclass[33][0] = "66";
bigclass[33][1] = "palm os";
bigclass[34] = new Array();
bigclass[34][0] = "67";
bigclass[34][1] = "2004微软创新杯软件开发大赛";
bigclass[35] = new Array();
bigclass[35][0] = "68";
bigclass[35][1] = "《MSDN开发精选》杂志";
}
function InitSmallClass(){
smallclass = new Array();
smallclass[0] = new Array();
smallclass[0][0] = "301";
smallclass[0][1] = "ASP";
smallclass[0][2] = "3";
smallclass[1] = new Array();
smallclass[1][0] = "303";
smallclass[1][1] = "PHP";
smallclass[1][2] = "3";
smallclass[2] = new Array();
smallclass[2][0] = "304";
smallclass[2][1] = "JavaScript";
smallclass[2][2] = "3";
smallclass[3] = new Array();
smallclass[3][0] = "305";
smallclass[3][1] = "CGI";
smallclass[3][2] = "3";
smallclass[4] = new Array();
smallclass[4][0] = "306";
smallclass[4][1] = "XML/SOAP";
smallclass[4][2] = "3";
smallclass[5] = new Array();
smallclass[5][0] = "307";
smallclass[5][1] = "IIS";
smallclass[5][2] = "3";
smallclass[6] = new Array();
smallclass[6][0] = "308";
smallclass[6][1] = "Apache";
smallclass[6][2] = "3";
smallclass[7] = new Array();
smallclass[7][0] = "309";
smallclass[7][1] = "应用服务器";
smallclass[7][2] = "3";
smallclass[8] = new Array();
smallclass[8][0] = "310";
smallclass[8][1] = "HTML(CSS)";
smallclass[8][2] = "3";
smallclass[9] = new Array();
smallclass[9][0] = "311";
smallclass[9][1] = "其他";
smallclass[9][2] = "3";
smallclass[10] = new Array();
smallclass[10][0] = "312";
smallclass[10][1] = "非技术区";
smallclass[10][2] = "3";
smallclass[11] = new Array();
smallclass[11][0] = "313";
smallclass[11][1] = "ColdFusion";
smallclass[11][2] = "3";
smallclass[12] = new Array();
smallclass[12][0] = "401";
smallclass[12][1] = "开发方法版";
smallclass[12][2] = "4";
smallclass[13] = new Array();
smallclass[13][0] = "402";
smallclass[13][1] = "开发过程版";
smallclass[13][2] = "4";
smallclass[14] = new Array();
smallclass[14][0] = "403";
smallclass[14][1] = "质量管理与控制版";
smallclass[14][2] = "4";
smallclass[15] = new Array();
smallclass[15][0] = "404";
smallclass[15][1] = "工程管理版";
smallclass[15][2] = "4";
smallclass[16] = new Array();
smallclass[16][0] = "405";
smallclass[16][1] = "休闲广场";
smallclass[16][2] = "4";
smallclass[17] = new Array();
smallclass[17][0] = "406";
smallclass[17][1] = "微创软件开发管理";
smallclass[17][2] = "4";
smallclass[18] = new Array();
smallclass[18][0] = "407";
smallclass[18][1] = "Rational";
smallclass[18][2] = "4";
smallclass[19] = new Array();
smallclass[19][0] = "408";
smallclass[19][1] = "Power Designer";
smallclass[19][2] = "4";
smallclass[20] = new Array();
smallclass[20][0] = "409";
smallclass[20][1] = "软件规划版";
smallclass[20][2] = "4";
smallclass[21] = new Array();
smallclass[21][0] = "501";
smallclass[21][1] = "地理信息系统";
smallclass[21][2] = "5";
smallclass[22] = new Array();
smallclass[22][0] = "502";
smallclass[22][1] = "企业信息化";
smallclass[22][2] = "5";
smallclass[23] = new Array();
smallclass[23][0] = "503";
smallclass[23][1] = "行业开发";
smallclass[23][2] = "5";
smallclass[24] = new Array();
smallclass[24][0] = "504";
smallclass[24][1] = "WebLogic";
smallclass[24][2] = "5";
smallclass[25] = new Array();
smallclass[25][0] = "505";
smallclass[25][1] = "Lotus";
smallclass[25][2] = "5";
smallclass[26] = new Array();
smallclass[26][0] = "506";
smallclass[26][1] = "Exchange Server";
smallclass[26][2] = "5";
smallclass[27] = new Array();
smallclass[27][0] = "507";
smallclass[27][1] = "其他";
smallclass[27][2] = "5";
smallclass[28] = new Array();
smallclass[28][0] = "508";
smallclass[28][1] = "SAP";
smallclass[28][2] = "5";
smallclass[29] = new Array();
smallclass[29][0] = "509";
smallclass[29][1] = "BizTalk Server";
smallclass[29][2] = "5";
smallclass[30] = new Array();
smallclass[30][0] = "601";
smallclass[30][1] = "多媒体/流媒体开发";
smallclass[30][2] = "6";
smallclass[31] = new Array();
smallclass[31][0] = "602";
smallclass[31][1] = "游戏开发";
smallclass[31][2] = "6";
smallclass[32] = new Array();
smallclass[32][0] = "603";
smallclass[32][1] = "数据结构与算法";
smallclass[32][2] = "6";
smallclass[33] = new Array();
smallclass[33][0] = "604";
smallclass[33][1] = "网络通信";
smallclass[33][2] = "6";
smallclass[34] = new Array();
smallclass[34][0] = "605";
smallclass[34][1] = "图形图象";
smallclass[34][2] = "6";
smallclass[35] = new Array();
smallclass[35][0] = "606";
smallclass[35][1] = "数据库应用/设计版";
smallclass[35][2] = "6";
smallclass[36] = new Array();
smallclass[36][0] = "607";
smallclass[36][1] = "信息/网络安全";
smallclass[36][2] = "6";
smallclass[37] = new Array();
smallclass[37][0] = "608";
smallclass[37][1] = "非技术区";
smallclass[37][2] = "6";
smallclass[38] = new Array();
smallclass[38][0] = "701";
smallclass[38][1] = "网页设计(Dreamweaver等)";
smallclass[38][2] = "7";
smallclass[39] = new Array();
smallclass[39][0] = "702";
smallclass[39][1] = "图象工具使用";
smallclass[39][2] = "7";
smallclass[40] = new Array();
smallclass[40][0] = "703";
smallclass[40][1] = "Flash流媒体";
smallclass[40][2] = "7";
smallclass[41] = new Array();
smallclass[41][0] = "704";
smallclass[41][1] = "多媒体设计(3DMaX, Maya等)";
smallclass[41][2] = "7";
smallclass[42] = new Array();
smallclass[42][0] = "705";
smallclass[42][1] = "交互式设计";
smallclass[42][2] = "7";
smallclass[43] = new Array();
smallclass[43][0] = "706";
smallclass[43][1] = "AutoCAD";
smallclass[43][2] = "7";
smallclass[44] = new Array();
smallclass[44][0] = "801";
smallclass[44][1] = "系统维护与使用区";
smallclass[44][2] = "8";
smallclass[45] = new Array();
smallclass[45][0] = "802";
smallclass[45][1] = "程序开发区";
smallclass[45][2] = "8";
smallclass[46] = new Array();
smallclass[46][0] = "803";
smallclass[46][1] = "内核及驱动程序研究区";
smallclass[46][2] = "8";
smallclass[47] = new Array();
smallclass[47][0] = "804";
smallclass[47][1] = "专题技术讨论区";
smallclass[47][2] = "8";
smallclass[48] = new Array();
smallclass[48][0] = "805";
smallclass[48][1] = "实用资料发布区";
smallclass[48][2] = "8";
smallclass[49] = new Array();
smallclass[49][0] = "806";
smallclass[49][1] = "UNIX文化";
smallclass[49][2] = "8";
smallclass[50] = new Array();
smallclass[50][0] = "901";
smallclass[50][1] = "硬件设计";
smallclass[50][2] = "9";
smallclass[51] = new Array();
smallclass[51][0] = "902";
smallclass[51][1] = "驱动开发/核心开发";
smallclass[51][2] = "9";
smallclass[52] = new Array();
smallclass[52][0] = "903";
smallclass[52][1] = "单片机/工控";
smallclass[52][2] = "9";
smallclass[53] = new Array();
smallclass[53][0] = "904";
smallclass[53][1] = "无线";
smallclass[53][2] = "9";
smallclass[54] = new Array();
smallclass[54][0] = "905";
smallclass[54][1] = "嵌入开发(WinCE)";
smallclass[54][2] = "9";
smallclass[55] = new Array();
smallclass[55][0] = "906";
smallclass[55][1] = "其他硬件开发";
smallclass[55][2] = "9";