bugfix: rawhtml do not work
parent
a994b1c9fd
commit
6cc1d62a08
|
@ -4,6 +4,7 @@ import org.apache.commons.codec.digest.DigestUtils;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import us.codecraft.webmagic.*;
|
||||
import us.codecraft.webmagic.model.annotation.Experimental;
|
||||
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
import us.codecraft.webmagic.processor.SimplePageProcessor;
|
||||
|
@ -20,6 +21,7 @@ import java.io.*;
|
|||
* @author code4crafter@gmail.com
|
||||
* @since 0.2.1
|
||||
*/
|
||||
@Experimental
|
||||
public class FileCache extends FilePersistentBase implements Downloader, Pipeline, PageProcessor {
|
||||
|
||||
private Downloader downloaderWhenFileMiss;
|
||||
|
|
|
@ -23,6 +23,12 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
* private List<String> tags;
|
||||
* }
|
||||
</pre>
|
||||
* And start the spider by:
|
||||
* <pre>
|
||||
* OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
|
||||
* ,new JsonFilePageModelPipeline(), OschinaBlog.class).run();
|
||||
* }
|
||||
</pre>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.0
|
||||
*/
|
||||
|
|
|
@ -105,7 +105,8 @@ class PageModelExtractor {
|
|||
default:
|
||||
selector = new AndSelector(ExtractorUtils.getSelectors(extractBies));
|
||||
}
|
||||
fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, comboExtract.notNull(), comboExtract.multi());
|
||||
fieldExtractor = new FieldExtractor(field, selector, comboExtract.source() == ComboExtract.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
|
||||
comboExtract.notNull(), comboExtract.multi());
|
||||
Method setterMethod = getSetterMethod(clazz, field);
|
||||
if (setterMethod != null) {
|
||||
fieldExtractor.setSetterMethod(setterMethod);
|
||||
|
@ -119,7 +120,8 @@ class PageModelExtractor {
|
|||
ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
|
||||
if (extractBy != null) {
|
||||
Selector selector = ExtractorUtils.getSelector(extractBy);
|
||||
fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, extractBy.notNull(), extractBy.multi());
|
||||
fieldExtractor = new FieldExtractor(field, selector, extractBy.source() == ExtractBy.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
|
||||
extractBy.notNull(), extractBy.multi());
|
||||
Method setterMethod = getSetterMethod(clazz, field);
|
||||
if (setterMethod != null) {
|
||||
fieldExtractor.setSetterMethod(setterMethod);
|
||||
|
|
Loading…
Reference in New Issue