`
hsyzijvaa
  • 浏览: 106873 次
  • 性别: Icon_minigender_1
  • 来自: 成都
社区版块
存档分类
最新评论

[] javaeye rss抓取

    博客分类:
  • java
阅读更多
   
import java.io.bufferedinputstream;import java.net.malformedurlexception;import java.net.url;import java.net.urlconnection;import java.util.list;import org.junit.ignore;import org.junit.test;import org.xml.sax.inputsource;import com.sun.syndication.feed.wirefeed;import com.sun.syndication.feed.module.module;import com.sun.syndication.feed.rss.channel;import com.sun.syndication.feed.rss.item;import com.sun.syndication.feed.synd.syndentry;import com.sun.syndication.feed.synd.syndfeed;import com.sun.syndication.feed.synd.syndfeedimpl;import com.sun.syndication.io.syndfeedinput;import com.sun.syndication.io.wirefeedinput;import com.sun.syndication.io.xmlreader;//rss(简易资讯聚合)//rss也叫聚合rss是在线共享内容的一种简易方式(也叫聚合内容,really simple syndication)。public class simplerometest {        @ignore    @test     public void testfetchjavaeyeopml()  throws exception {        url javaeyeurl = new url("http://www.iteye.com/rss");        wirefeedinput feedinput = new wirefeedinput();        wirefeed feed = feedinput.build(new xmlreader(javaeyeurl));    }    @test    public void testfetchjavaeyenewsrss() throws exception {        url javaeyeurl = new url("http://www.iteye.com/rss/news/");                stringbuffer sb = new stringbuffer(1024*1024);        bufferedinputstream is = new java.io.bufferedinputstream(javaeyeurl.openconnection().getinputstream());        int ch = is.read();        while(ch != -1) {            sb.append((char)ch);            ch = is.read();        }        system.out.println(new string(sb.tostring().getbytes("iso-8859-1"), "utf-8"));                /**         * <!doctype html public "-//w3c//dtd html 4.01 transitional//en"<!doctype html public "-//w3c//dtd xhtml 1.0 transitional//en" "http://www.w3.org/tr/xhtml1/dtd/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-cn" dir="ltr">  <head>    <meta http-equiv="content-type" content="text/html; charset=utf-8" />    <meta name="keyword" content="javaeye,it,开发,交流,社区,java, ruby, ajax, agile" />    <title>您的访问请求被拒绝 - javaeye技术社区</title>    <style type="text/css">      .clearfix:after {        content: ".";        display: block;        height: 0;        clear: both;        visibility: hidden;      }      .clearfix {        display:block;      }      .left {        float: left;      }      h1 {font-size: 20px;color: #6293bb;}      p  {font-size: 14px;color: #6293bb;}    </style>  </head>  <body>    <div style="padding:50px 0 0 300px">      <h1>您的访问请求被拒绝</h1>    </div>    <div class="clearfix">      <div class="left" style="padding-left:120px">        <img src="/images/filenotfound.jpg" width="128" height="128" />      </div>      <div class="left" style="width:700px;padding:30px 0 0 30px">        <p>您可能使用了网络爬虫抓取javaeye网站页面!</p>        <p>javaeye网站不允许您使用网络爬虫对javaeye进行恶意的网页抓取,请您立刻停止该抓取行为!</p>        <p>如果您的网络爬虫不属于恶意抓取行为,希望javaeye网站允许你进行网页抓取,请和javaeye管理员联系,取得授权: webmaster<img src='/images/email.gif' alt="email" />support.iteye.com</p>        <p>如果您确实使用浏览器访问,但是被错误的识别为网络爬虫,请将您浏览器发送的“user agent”信息告知我们,帮助我们解决错误: webmaster<img src='/images/email.gif' alt="email" />support.iteye.com</p>      </div>    </div>    <div style="padding:20px 0 0 500px">      <a href="http://www.iteye.com"><img src='/images/logo_small.gif' border='0'></a>    </div>  </body></html>         */        //        syndfeedinput feedinput = new syndfeedinput(true);//        syndfeed feed = feedinput.build(new inputsource(javaeyeurl.openstream()));//        // rome中rss的可选标准   //        // rss_0.90, rss_0.91, rss_0.92, rss_0.93, rss_0.94, rss_1.0, rss_2.0, atom_0.3     //        list<syndentry> entries = feed.getentries();//        //        for(syndentry entry : entries) {//            system.out.println("title:" + entry.gettitle());//            system.out.println("desc:" + entry.getdescription());//            system.out.println("link:" + entry.getlink());//            system.out.println("date:" + entry.getpublisheddate());//            system.out.println("==================================");//            system.out.println("==================================");//            system.out.println("==================================");//        }            }            @test    public void testfetchjavaeyenewsrsswithhttpclient() throws exception {        url javaeyeurl = new url("http://www.iteye.com/rss/news/");        urlconnection conn = javaeyeurl.openconnection();        conn.setrequestproperty("user-agent", "mozilla/5.0 (windows; u; windows nt 5.1; zh-cn; rv:1.9.2.3) gecko/20100401 firefox/3.6.3");                syndfeedinput feedinput = new syndfeedinput(true);        syndfeed feed = feedinput.build(new inputsource(conn.getinputstream()));        // rome中rss的可选标准           // rss_0.90, rss_0.91, rss_0.92, rss_0.93, rss_0.94, rss_1.0, rss_2.0, atom_0.3             list<syndentry> entries = feed.getentries();                for(syndentry entry : entries) {            system.out.println("title:" + entry.gettitle());            system.out.println("desc:" + entry.getdescription().getvalue());            system.out.println("link:" + entry.getlink());            system.out.println("date:" + entry.getpublisheddate());            system.out.println("==================================");            system.out.println("==================================");            system.out.println("==================================");        }    }}
 
1
3
分享到:
评论
2 楼 沙舟狼客 2011-10-27  
这太折磨人了, 还有人要看吗?
1 楼 greatwqs 2011-10-26  
代码都成这样了 

相关推荐

Global site tag (gtag.js) - Google Analytics