bugfix: rawhtml do not work
parent
a994b1c9fd
commit
6cc1d62a08
|
@ -4,6 +4,7 @@ import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import us.codecraft.webmagic.*;
|
import us.codecraft.webmagic.*;
|
||||||
|
import us.codecraft.webmagic.model.annotation.Experimental;
|
||||||
import us.codecraft.webmagic.pipeline.Pipeline;
|
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||||
import us.codecraft.webmagic.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
import us.codecraft.webmagic.processor.SimplePageProcessor;
|
import us.codecraft.webmagic.processor.SimplePageProcessor;
|
||||||
|
@ -20,6 +21,7 @@ import java.io.*;
|
||||||
* @author code4crafter@gmail.com
|
* @author code4crafter@gmail.com
|
||||||
* @since 0.2.1
|
* @since 0.2.1
|
||||||
*/
|
*/
|
||||||
|
@Experimental
|
||||||
public class FileCache extends FilePersistentBase implements Downloader, Pipeline, PageProcessor {
|
public class FileCache extends FilePersistentBase implements Downloader, Pipeline, PageProcessor {
|
||||||
|
|
||||||
private Downloader downloaderWhenFileMiss;
|
private Downloader downloaderWhenFileMiss;
|
||||||
|
|
|
@ -23,6 +23,12 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
* private List<String> tags;
|
* private List<String> tags;
|
||||||
* }
|
* }
|
||||||
</pre>
|
</pre>
|
||||||
|
* And start the spider by:
|
||||||
|
* <pre>
|
||||||
|
* OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
|
||||||
|
* ,new JsonFilePageModelPipeline(), OschinaBlog.class).run();
|
||||||
|
* }
|
||||||
|
</pre>
|
||||||
* @author code4crafter@gmail.com <br>
|
* @author code4crafter@gmail.com <br>
|
||||||
* @since 0.2.0
|
* @since 0.2.0
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -105,7 +105,8 @@ class PageModelExtractor {
|
||||||
default:
|
default:
|
||||||
selector = new AndSelector(ExtractorUtils.getSelectors(extractBies));
|
selector = new AndSelector(ExtractorUtils.getSelectors(extractBies));
|
||||||
}
|
}
|
||||||
fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, comboExtract.notNull(), comboExtract.multi());
|
fieldExtractor = new FieldExtractor(field, selector, comboExtract.source() == ComboExtract.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
|
||||||
|
comboExtract.notNull(), comboExtract.multi());
|
||||||
Method setterMethod = getSetterMethod(clazz, field);
|
Method setterMethod = getSetterMethod(clazz, field);
|
||||||
if (setterMethod != null) {
|
if (setterMethod != null) {
|
||||||
fieldExtractor.setSetterMethod(setterMethod);
|
fieldExtractor.setSetterMethod(setterMethod);
|
||||||
|
@ -119,7 +120,8 @@ class PageModelExtractor {
|
||||||
ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
|
ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
|
||||||
if (extractBy != null) {
|
if (extractBy != null) {
|
||||||
Selector selector = ExtractorUtils.getSelector(extractBy);
|
Selector selector = ExtractorUtils.getSelector(extractBy);
|
||||||
fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, extractBy.notNull(), extractBy.multi());
|
fieldExtractor = new FieldExtractor(field, selector, extractBy.source() == ExtractBy.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
|
||||||
|
extractBy.notNull(), extractBy.multi());
|
||||||
Method setterMethod = getSetterMethod(clazz, field);
|
Method setterMethod = getSetterMethod(clazz, field);
|
||||||
if (setterMethod != null) {
|
if (setterMethod != null) {
|
||||||
fieldExtractor.setSetterMethod(setterMethod);
|
fieldExtractor.setSetterMethod(setterMethod);
|
||||||
|
|
Loading…
Reference in New Issue