update email

master
yihua.huang 2013-08-03 14:01:18 +08:00
parent 7c9e9ce869
commit 866ab0a056
17 changed files with 110 additions and 29 deletions

View File

@ -5,7 +5,7 @@ import java.util.Map;
/**
* PageProcessor{@link us.codecraft.webmagic.pipeline.Pipeline}<br>
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-7-25 <br>
* Time: 12:20 <br>
*/

View File

@ -90,10 +90,6 @@ public class Spider implements Runnable, Task {
return new Spider(pageProcessor);
}
public static Spider create(Site site,Class... pageModels) {
return new Spider(ObjectPageProcessor.create(site,pageModels));
}
/**
* startUrlsSitestartUrls
*

View File

@ -0,0 +1,15 @@
package us.codecraft.webmagic.annotation;
import us.codecraft.webmagic.Page;
/**
* <br>
*
* @author code4crafter@gmail.com <br>
* @date: 13-8-3 <br>
* Time: 9:42 <br>
*/
public interface AfterExtractor<T> {
public void afterProcess(Page page, T t);
}

View File

@ -5,7 +5,7 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-8-1 <br>
* Time: 8:40 <br>
*/

View File

@ -5,7 +5,7 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-8-1 <br>
* Time: 8:40 <br>
*/

View File

@ -6,7 +6,7 @@ import java.lang.reflect.Field;
import java.lang.reflect.Method;
/**
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-8-1 <br>
* Time: 9:48 <br>
*/

View File

@ -5,7 +5,7 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-8-1 <br>
* Time: 8:40 <br>
*/

View File

@ -0,0 +1,29 @@
package us.codecraft.webmagic.annotation;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
/**
* @author code4crafter@gmail.com <br>
* @date: 13-8-3 <br>
* Time: 9:51 <br>
*/
public class OOSpider extends Spider{
/**
* 使Spider
*
* @param pageProcessor
*/
public OOSpider(PageProcessor pageProcessor) {
super(pageProcessor);
}
public static OOSpider create(Site site,Class... pageModels) {
OOSpider ooSpider = new OOSpider(ObjectPageProcessor.create(site, pageModels));
ooSpider.pipeline(new ObjectPipeline());
return ooSpider;
}
}

View File

@ -12,7 +12,7 @@ import java.util.Set;
import java.util.regex.Pattern;
/**
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-8-1 <br>
* Time: 8:46 <br>
*/

View File

@ -4,18 +4,36 @@ import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-8-2 <br>
* Time: 10:47 <br>
*/
public class ObjectPipeline implements Pipeline {
private Map<Class, PageModelPipeline> pageModelPipelines = new ConcurrentHashMap<Class, PageModelPipeline>();
public ObjectPipeline() {
}
public ObjectPipeline put(Class clazz, PageModelPipeline pageModelPipeline) {
pageModelPipelines.put(clazz, pageModelPipeline);
return this;
}
@Override
public void process(ResultItems resultItems, Task task) {
}
public <T> T read() {
return null;
if (resultItems.isSkip()) {
return;
}
for (Map.Entry<Class, PageModelPipeline> classPageModelPipelineEntry : pageModelPipelines.entrySet()) {
Object o = resultItems.get(classPageModelPipelineEntry.getKey().getCanonicalName());
if (o != null) {
classPageModelPipelineEntry.getValue().process(o, task);
}
}
}
}

View File

@ -16,7 +16,7 @@ import java.util.List;
import java.util.regex.Pattern;
/**
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-8-1 <br>
* Time: 9:33 <br>
*/
@ -30,6 +30,8 @@ class PageModelExtractor {
private List<FieldExtractor> fieldExtractors;
private AfterExtractor afterExtractor;
public static PageModelExtractor create(Class clazz) {
PageModelExtractor pageModelExtractor = new PageModelExtractor();
pageModelExtractor.init(clazz);
@ -40,6 +42,13 @@ class PageModelExtractor {
this.clazz = clazz;
initTargetUrlPatterns();
fieldExtractors = new ArrayList<FieldExtractor>();
if (clazz.isAssignableFrom(AfterExtractor.class)){
try {
afterExtractor=(AfterExtractor)clazz.newInstance();
} catch (Exception e) {
throw new IllegalArgumentException(e);
}
}
for (Field field : clazz.getDeclaredFields()) {
field.setAccessible(true);
if (!field.getType().isAssignableFrom(String.class)){
@ -147,6 +156,9 @@ class PageModelExtractor {
}
setField(o, fieldExtractor, value);
}
if (afterExtractor!=null){
afterExtractor.afterProcess(page,o);
}
} catch (InstantiationException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {

View File

@ -0,0 +1,14 @@
package us.codecraft.webmagic.annotation;
import us.codecraft.webmagic.Task;
/**
* @author code4crafter@gmail.com <br>
* @date: 13-8-3 <br>
* Time: 9:34 <br>
*/
public interface PageModelPipeline<T> {
public void process(T t, Task task);
}

View File

@ -5,7 +5,7 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-8-1 <br>
* Time: 8:40 <br>
*/
@ -14,4 +14,5 @@ import java.lang.annotation.Target;
public @interface TargetUrl {
String[] value();
}

View File

@ -2,7 +2,7 @@ package us.codecraft.webmagic.downloader;
/**
* Spiderdestroy()<br>
* @author yihua.huang@dianping.com <br>
* @author code4crafter@gmail.com <br>
* @date: 13-7-26 <br>
* Time: 3:10 <br>
*/

View File

@ -3,7 +3,6 @@ package us.codecraft.webmagic.annotation;
import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
/**
* @author yihua.huang@dianping.com <br>
@ -16,12 +15,9 @@ public class TestFetcher {
@Test
public void test() {
ObjectPipeline objectPipeline = new ObjectPipeline();
Spider.create(ObjectPageProcessor.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), OschinaBlog.class))
.pipeline(objectPipeline).runAsync();
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), OschinaBlog.class)
.pipeline(objectPipeline);
OschinaBlog oschinaBlog = null;
while ((oschinaBlog = objectPipeline.read()) != null) {
System.out.println(oschinaBlog);
}
}

View File

@ -1,8 +1,8 @@
package us.codecraft.webmagic.annotation.samples;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.annotation.ExtractBy;
import us.codecraft.webmagic.annotation.OOSpider;
import us.codecraft.webmagic.annotation.TargetUrl;
/**
@ -28,7 +28,7 @@ public class IteyeBlog implements Blog{
}
public static void main(String[] args) {
Spider.create(Site.me().addStartUrl("http://dengminhui.iteye.com/blog"),IteyeBlog.class).run();
OOSpider.create(Site.me().addStartUrl("http://dengminhui.iteye.com/blog"), IteyeBlog.class).run();
}
public String getTitle() {

View File

@ -1,8 +1,8 @@
package us.codecraft.webmagic.annotation.samples;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.annotation.ExtractBy;
import us.codecraft.webmagic.annotation.OOSpider;
import us.codecraft.webmagic.annotation.TargetUrl;
/**
@ -28,7 +28,7 @@ public class OschinaBlog implements Blog{
}
public static void main(String[] args) {
Spider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"),OschinaBlog.class).run();
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class).run();
}
public String getTitle() {