`
guoyiqi
  • 浏览: 965173 次
社区版块
存档分类
最新评论

网页抓取例子

    博客分类:
  • jsp
阅读更多

<%@ page contentType="text/html;charset=gb2312"%>
<% 

String sCurrentLine; 

String sTotalString; 

sCurrentLine=""; 

sTotalString=""; 
String temp="";
java.io.InputStream l_urlStream; 


for(int i=14;i<=22;i++){


java.net.URL l_url = new java.net.URL

("http://www.dltcedu.org/index_5/html/994"+i+".shtml"); 

java.net.HttpURLConnection l_connection = (java.net.HttpURLConnection)

l_url.openConnection(); 

l_connection.connect(); 

l_urlStream = l_connection.getInputStream(); 

java.io.BufferedReader l_reader = new java.io.BufferedReader(new

java.io.InputStreamReader(l_urlStream)); 

int start=-1;
while ((sCurrentLine = l_reader.readLine()) != null) 


if((start=sCurrentLine.indexOf("<div style=\"overflow:hidden"))!=-1){
temp=sCurrentLine.substring(start,sCurrentLine.length());
sTotalString+=temp.replaceAll("/UploadFile","http://www.dltcedu.org/UploadFile");


break;


}
//sTotalString+=sCurrentLine; 


while ((sCurrentLine = l_reader.readLine()) != null) 


if((start=sCurrentLine.indexOf("</P></p></div>"))!=-1){

temp=sCurrentLine.substring(0,(start+14));
sTotalString+=temp.replaceAll("/UploadFile","http://www.dltcedu.org/UploadFile");

break;

}

sTotalString+=sCurrentLine.replaceAll

("/UploadFile","http://www.dltcedu.org/UploadFile");; 


}//for
out.println(sTotalString); 

%>

 

  • oh.rar (619 Bytes)
  • 下载次数: 9
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics