+doc
parent
fd9ae6d93e
commit
5cb45af3a4
|
@ -8,7 +8,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* <pre class="zh">
|
||||
* Page保存了上一次抓取的结果,并可定义待抓取的链接内容。
|
||||
*
|
||||
* 主要方法:
|
||||
|
@ -19,6 +19,17 @@ import java.util.List;
|
|||
* {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} 添加待抓取的链接
|
||||
*
|
||||
* </pre>
|
||||
* <pre class="en">
|
||||
* Store extracted result and urls to be crawled.
|
||||
*
|
||||
* Main method:
|
||||
* {@link #getUrl()} get url of current page
|
||||
* {@link #getHtml()} get content of current page
|
||||
* {@link #putField(String, Object)} save extracted result
|
||||
* {@link #getResultItems()} get extract results to be used in {@link us.codecraft.webmagic.pipeline.Pipeline}
|
||||
* {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} add urls to crawl
|
||||
*
|
||||
* </pre>
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
*/
|
||||
|
@ -44,7 +55,7 @@ public class Page {
|
|||
}
|
||||
|
||||
/**
|
||||
* 保存抽取的结果
|
||||
*
|
||||
*
|
||||
* @param key 结果的key
|
||||
* @param field 结果的value
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <div class="zh">
|
||||
* Request对象封装了待抓取的url信息。<br/>
|
||||
* 在PageProcessor中,Request对象可以通过{@link us.codecraft.webmagic.Page#getRequest()} 获取。<br/>
|
||||
* <br/>
|
||||
|
@ -22,6 +23,7 @@ import java.util.Map;
|
|||
* String linktext = (String)page.getRequest().getExtra()[0];
|
||||
* }
|
||||
* </pre>
|
||||
* </div>
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* Date: 13-4-21
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
<html>
|
||||
<body>
|
||||
<div class="en">
|
||||
Main class "Spider" and models.
|
||||
</div>
|
||||
<div class="zh">
|
||||
包括webmagic入口类Spider和一些数据传递的实体类。
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
package us.codecraft.webmagic.model.annotation;
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.Target;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* Date: 13-8-16 <br>
|
||||
* Time: 下午11:09 <br>
|
||||
*/
|
||||
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
|
||||
@Target({ElementType.FIELD, ElementType.TYPE})
|
||||
public @interface ComboExtract {
|
||||
|
||||
|
||||
|
||||
}
|
Loading…
Reference in New Issue