fix nullpointer exception

master
yihua.huang 2013-08-05 22:06:39 +08:00
parent b0af45f4bb
commit 4eb3d60083
3 changed files with 15 additions and 12 deletions

View File

@ -51,14 +51,14 @@ class ModelPageProcessor implements PageProcessor {
@Override @Override
public void process(Page page) { public void process(Page page) {
for (PageModelExtractor pageModelExtractor : pageModelExtractorList) { for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns());
extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns());
Object process = pageModelExtractor.process(page); Object process = pageModelExtractor.process(page);
if (process == null || (process instanceof List && ((List) process).size() == 0)) { if (process == null || (process instanceof List && ((List) process).size() == 0)) {
page.getResultItems().setSkip(true); page.getResultItems().setSkip(true);
} }
postProcessPageModel(pageModelExtractor.getClazz(), process); postProcessPageModel(pageModelExtractor.getClazz(), process);
page.putField(pageModelExtractor.getClazz().getCanonicalName(), process); page.putField(pageModelExtractor.getClazz().getCanonicalName(), process);
extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns());
extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns());
} }
} }

View File

@ -56,18 +56,20 @@ public class RedisScheduler implements Scheduler {
public synchronized Request poll(Task task) { public synchronized Request poll(Task task) {
Jedis jedis = pool.getResource(); Jedis jedis = pool.getResource();
String url = jedis.lpop(QUEUE_PREFIX + task.getUUID()); String url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
String key = ITEM_PREFIX + DigestUtils.shaHex(url);
byte[] bytes = jedis.get(key.getBytes());
try {
Object o = HessianSerializer.INSTANCE.deSerialize(bytes);
return (Request)o;
} catch (Exception e) {
e.printStackTrace();
}
pool.returnResource(jedis);
if (url == null) { if (url == null) {
return null; return null;
} }
String key = ITEM_PREFIX + DigestUtils.shaHex(url);
byte[] bytes = jedis.get(key.getBytes());
if (bytes!=null){
try {
Object o = HessianSerializer.INSTANCE.deSerialize(bytes);
return (Request)o;
} catch (Exception e) {
e.printStackTrace();
}
}
pool.returnResource(jedis);
return new Request(url); return new Request(url);
} }
} }

View File

@ -5,6 +5,7 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.*; import us.codecraft.webmagic.model.*;
import us.codecraft.webmagic.pipeline.ConsolePipeline; import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.PagedPipeline; import us.codecraft.webmagic.pipeline.PagedPipeline;
import us.codecraft.webmagic.scheduler.RedisScheduler;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
@ -71,7 +72,7 @@ public class News163 implements PagedModel {
public static void main(String[] args) { public static void main(String[] args) {
OOSpider.create(Site.me().addStartUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html"), News163.class) OOSpider.create(Site.me().addStartUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html"), News163.class)
.clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run(); .scheduler(new RedisScheduler("localhost")).clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run();
} }
} }