fix nullpointer exception
parent
b0af45f4bb
commit
4eb3d60083
|
@ -51,14 +51,14 @@ class ModelPageProcessor implements PageProcessor {
|
|||
@Override
|
||||
public void process(Page page) {
|
||||
for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
|
||||
extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns());
|
||||
extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns());
|
||||
Object process = pageModelExtractor.process(page);
|
||||
if (process == null || (process instanceof List && ((List) process).size() == 0)) {
|
||||
page.getResultItems().setSkip(true);
|
||||
}
|
||||
postProcessPageModel(pageModelExtractor.getClazz(), process);
|
||||
page.putField(pageModelExtractor.getClazz().getCanonicalName(), process);
|
||||
extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns());
|
||||
extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -56,18 +56,20 @@ public class RedisScheduler implements Scheduler {
|
|||
public synchronized Request poll(Task task) {
|
||||
Jedis jedis = pool.getResource();
|
||||
String url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
|
||||
String key = ITEM_PREFIX + DigestUtils.shaHex(url);
|
||||
byte[] bytes = jedis.get(key.getBytes());
|
||||
try {
|
||||
Object o = HessianSerializer.INSTANCE.deSerialize(bytes);
|
||||
return (Request)o;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
pool.returnResource(jedis);
|
||||
if (url == null) {
|
||||
return null;
|
||||
}
|
||||
String key = ITEM_PREFIX + DigestUtils.shaHex(url);
|
||||
byte[] bytes = jedis.get(key.getBytes());
|
||||
if (bytes!=null){
|
||||
try {
|
||||
Object o = HessianSerializer.INSTANCE.deSerialize(bytes);
|
||||
return (Request)o;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
pool.returnResource(jedis);
|
||||
return new Request(url);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import us.codecraft.webmagic.Site;
|
|||
import us.codecraft.webmagic.model.*;
|
||||
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
||||
import us.codecraft.webmagic.pipeline.PagedPipeline;
|
||||
import us.codecraft.webmagic.scheduler.RedisScheduler;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
@ -71,7 +72,7 @@ public class News163 implements PagedModel {
|
|||
|
||||
public static void main(String[] args) {
|
||||
OOSpider.create(Site.me().addStartUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html"), News163.class)
|
||||
.clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run();
|
||||
.scheduler(new RedisScheduler("localhost")).clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue