complete javadoc
parent
8b90b91e33
commit
7edfa26f90
|
@ -518,7 +518,7 @@ public class Spider implements Runnable, Task {
|
|||
* Add urls with information to crawl.<br>
|
||||
*
|
||||
* @param requests requests
|
||||
* @return
|
||||
* @return this
|
||||
*/
|
||||
public Spider addRequest(Request... requests) {
|
||||
for (Request request : requests) {
|
||||
|
@ -730,7 +730,7 @@ public class Spider implements Runnable, Task {
|
|||
}
|
||||
|
||||
/**
|
||||
* Set wait time when no url is polled.<br></br>
|
||||
* Set wait time when no url is polled.<br><br>
|
||||
*
|
||||
* @param emptySleepTime In MILLISECONDS.
|
||||
*/
|
||||
|
|
|
@ -8,7 +8,7 @@ import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
|
|||
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
|
||||
|
||||
/**
|
||||
* Remove duplicate urls and only push urls which are not duplicate.<br></br>
|
||||
* Remove duplicate urls and only push urls which are not duplicate.<br><br>
|
||||
*
|
||||
* @author code4crafer@gmail.com
|
||||
* @since 0.5.0
|
||||
|
|
|
@ -69,7 +69,7 @@ public class Html extends HtmlNode {
|
|||
|
||||
/**
|
||||
* @param selector selector
|
||||
* @return
|
||||
* @return result
|
||||
*/
|
||||
public String selectDocument(Selector selector) {
|
||||
if (selector instanceof ElementSelector) {
|
||||
|
|
|
@ -60,7 +60,7 @@ public class HtmlNode extends AbstractSelectable {
|
|||
* select elements
|
||||
*
|
||||
* @param elementSelector elementSelector
|
||||
* @return
|
||||
* @return result
|
||||
*/
|
||||
protected Selectable selectElements(BaseElementSelector elementSelector) {
|
||||
ListIterator<Element> elementIterator = getElements().listIterator();
|
||||
|
|
|
@ -35,6 +35,7 @@ public abstract class Selectors {
|
|||
/**
|
||||
* @Deprecated
|
||||
* @see #xpath(String)
|
||||
* @param expr expr
|
||||
* @return new selector
|
||||
*/
|
||||
public static XpathSelector xsoup(String expr) {
|
||||
|
|
|
@ -7,10 +7,10 @@ import java.util.concurrent.locks.Condition;
|
|||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
/**
|
||||
* Thread pool for workers.<br></br>
|
||||
* Use {@link java.util.concurrent.ExecutorService} as inner implement. <br></br>
|
||||
* New feature: <br></br>
|
||||
* 1. Block when thread pool is full to avoid poll many urls without process. <br></br>
|
||||
* Thread pool for workers.<br><br>
|
||||
* Use {@link java.util.concurrent.ExecutorService} as inner implement. <br><br>
|
||||
* New feature: <br><br>
|
||||
* 1. Block when thread pool is full to avoid poll many urls without process. <br><br>
|
||||
* 2. Count of thread alive for monitor.
|
||||
*
|
||||
* @author code4crafer@gmail.com
|
||||
|
|
|
@ -52,7 +52,7 @@ public class UrlUtils {
|
|||
/**
|
||||
*
|
||||
* @param url url
|
||||
* @return
|
||||
* @return new url
|
||||
*/
|
||||
public static String encodeIllegalCharacterInUrl(String url) {
|
||||
//TODO more charator support
|
||||
|
|
|
@ -9,7 +9,7 @@ import java.util.regex.Pattern;
|
|||
* User: Sebastian MA
|
||||
* Date: April 03, 2014
|
||||
* Time: 10:00
|
||||
* <p></p>
|
||||
* <p>
|
||||
* A PatternHandler is in charge of both page extraction and data processing by implementing
|
||||
* its two abstract methods.
|
||||
*/
|
||||
|
|
|
@ -9,12 +9,12 @@ import us.codecraft.webmagic.Request;
|
|||
public interface RequestMatcher {
|
||||
|
||||
/**
|
||||
* Check whether to process the page.<br></br>
|
||||
* Check whether to process the page.<br><br>
|
||||
* Please DO NOT change page status in this method.
|
||||
*
|
||||
* @param page page
|
||||
*
|
||||
* @return
|
||||
* @return whether matches
|
||||
*/
|
||||
public boolean match(Request page);
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ public interface SubPipeline extends RequestMatcher {
|
|||
/**
|
||||
* process the page, extract urls to fetch, extract the data and store
|
||||
*
|
||||
* @param page page
|
||||
* @param resultItems resultItems
|
||||
* @param task task
|
||||
* @return whether continue to match
|
||||
*/
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.util.List;
|
|||
* private String content;
|
||||
*
|
||||
* {@literal @}ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
|
||||
* private List<String> tags;
|
||||
* private List<String> tags;
|
||||
* }
|
||||
* </pre>
|
||||
* And start the spider by:
|
||||
|
|
|
@ -43,7 +43,7 @@ public class SpiderMonitor {
|
|||
* Register spider for monitor.
|
||||
*
|
||||
* @param spiders spiders
|
||||
* @return
|
||||
* @return this
|
||||
*/
|
||||
public synchronized SpiderMonitor register(Spider... spiders) throws JMException {
|
||||
for (Spider spider : spiders) {
|
||||
|
|
|
@ -30,6 +30,7 @@ public class DoubleKeyMap<K1, K2, V> extends MultiKeyMapBase {
|
|||
/**
|
||||
* init map with protoMapClass
|
||||
*
|
||||
* @param map the origin map to contains the DoubleKeyMap
|
||||
* @param protoMapClass protoMapClass
|
||||
*/
|
||||
@SuppressWarnings("rawtypes")
|
||||
|
|
Loading…
Reference in New Issue