Make PageProcessor#getSite be default method. Closes #1040.

master
Sutra Zhou 2021-10-24 23:20:38 +08:00
parent c5a037a807
commit 34da2fb3a0
2 changed files with 56 additions and 10 deletions

View File

@ -4,13 +4,16 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
/** /**
* Interface to be implemented to customize a crawler.<br> * Interface to be implemented to customize a crawler.
* <br> *
* <p>
* In PageProcessor, you can customize: * In PageProcessor, you can customize:
* <br> * </p>
* start urls and other settings in {@link Site}<br> * <ul>
* how the urls to fetch are detected <br> * <li>start URLs and other settings in {@link Site}</li>
* how the data are extracted and stored <br> * <li>how the URLs to fetch are detected</li>
* <li>how the data are extracted and stored</li>
* </ul>
* *
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
* @see Site * @see Site
@ -20,17 +23,20 @@ import us.codecraft.webmagic.Site;
public interface PageProcessor { public interface PageProcessor {
/** /**
* process the page, extract urls to fetch, extract the data and store * Processes the page, extract URLs to fetch, extract the data and store.
* *
* @param page page * @param page page
*/ */
public void process(Page page); void process(Page page);
/** /**
* get the site settings * Returns the site settings.
* *
* @return site * @return site
* @see Site * @see Site
*/ */
public Site getSite(); default Site getSite() {
return Site.me();
}
} }

View File

@ -0,0 +1,40 @@
package us.codecraft.webmagic.processor;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
public class PageProcessorTest {
@Test
public void testGetSite() {
Site actualSite = new PageProcessor() {
@Override
public void process(Page page) {
}
}.getSite();
assertEquals(Site.me(), actualSite);
actualSite = new PageProcessor() {
@Override
public void process(Page page) {
}
@Override
public Site getSite() {
return Site.me().setTimeOut(123);
};
}.getSite();
assertEquals(Site.me().setTimeOut(123), actualSite);
}
}