fix nullpointer exception
parent
b0af45f4bb
commit
4eb3d60083
|
@ -51,14 +51,14 @@ class ModelPageProcessor implements PageProcessor {
|
||||||
@Override
|
@Override
|
||||||
public void process(Page page) {
|
public void process(Page page) {
|
||||||
for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
|
for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
|
||||||
|
extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns());
|
||||||
|
extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns());
|
||||||
Object process = pageModelExtractor.process(page);
|
Object process = pageModelExtractor.process(page);
|
||||||
if (process == null || (process instanceof List && ((List) process).size() == 0)) {
|
if (process == null || (process instanceof List && ((List) process).size() == 0)) {
|
||||||
page.getResultItems().setSkip(true);
|
page.getResultItems().setSkip(true);
|
||||||
}
|
}
|
||||||
postProcessPageModel(pageModelExtractor.getClazz(), process);
|
postProcessPageModel(pageModelExtractor.getClazz(), process);
|
||||||
page.putField(pageModelExtractor.getClazz().getCanonicalName(), process);
|
page.putField(pageModelExtractor.getClazz().getCanonicalName(), process);
|
||||||
extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns());
|
|
||||||
extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,18 +56,20 @@ public class RedisScheduler implements Scheduler {
|
||||||
public synchronized Request poll(Task task) {
|
public synchronized Request poll(Task task) {
|
||||||
Jedis jedis = pool.getResource();
|
Jedis jedis = pool.getResource();
|
||||||
String url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
|
String url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
|
||||||
|
if (url == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
String key = ITEM_PREFIX + DigestUtils.shaHex(url);
|
String key = ITEM_PREFIX + DigestUtils.shaHex(url);
|
||||||
byte[] bytes = jedis.get(key.getBytes());
|
byte[] bytes = jedis.get(key.getBytes());
|
||||||
|
if (bytes!=null){
|
||||||
try {
|
try {
|
||||||
Object o = HessianSerializer.INSTANCE.deSerialize(bytes);
|
Object o = HessianSerializer.INSTANCE.deSerialize(bytes);
|
||||||
return (Request)o;
|
return (Request)o;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
pool.returnResource(jedis);
|
|
||||||
if (url == null) {
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
pool.returnResource(jedis);
|
||||||
return new Request(url);
|
return new Request(url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.model.*;
|
import us.codecraft.webmagic.model.*;
|
||||||
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
||||||
import us.codecraft.webmagic.pipeline.PagedPipeline;
|
import us.codecraft.webmagic.pipeline.PagedPipeline;
|
||||||
|
import us.codecraft.webmagic.scheduler.RedisScheduler;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -71,7 +72,7 @@ public class News163 implements PagedModel {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
OOSpider.create(Site.me().addStartUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html"), News163.class)
|
OOSpider.create(Site.me().addStartUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html"), News163.class)
|
||||||
.clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run();
|
.scheduler(new RedisScheduler("localhost")).clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue