[refactor]move monitor to webmagic-extension #98
parent
d61f65cef8
commit
11ba5beb42
|
@ -111,7 +111,7 @@ public class Request implements Serializable {
|
|||
/**
|
||||
* The http method of the request. Get for default.
|
||||
* @return httpMethod
|
||||
* @see us.codecraft.webmagic.constant.HttpConstant.Method
|
||||
* @see us.codecraft.webmagic.utils.HttpConstant.Method
|
||||
* @since 0.5.0
|
||||
*/
|
||||
public String getMethod() {
|
||||
|
|
|
@ -50,7 +50,7 @@ public class Site {
|
|||
private boolean useGzip = true;
|
||||
|
||||
/**
|
||||
* @see us.codecraft.webmagic.constant.HttpConstant.Header
|
||||
* @see us.codecraft.webmagic.utils.HttpConstant.Header
|
||||
* @deprecated
|
||||
*/
|
||||
public static interface HeaderConst {
|
||||
|
|
|
@ -8,7 +8,6 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import us.codecraft.webmagic.downloader.Downloader;
|
||||
import us.codecraft.webmagic.downloader.HttpClientDownloader;
|
||||
import us.codecraft.webmagic.monitor.SpiderListener;
|
||||
import us.codecraft.webmagic.pipeline.CollectorPipeline;
|
||||
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
||||
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
package us.codecraft.webmagic.monitor;
|
||||
|
||||
import us.codecraft.webmagic.Request;
|
||||
package us.codecraft.webmagic;
|
||||
|
||||
/**
|
||||
* Listener of Spider on page processing. Used for monitor and such on.
|
||||
*
|
||||
* @author code4crafer@gmail.com
|
||||
* @since 0.5.0
|
||||
*/
|
|
@ -18,7 +18,7 @@ import us.codecraft.webmagic.Page;
|
|||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.constant.HttpConstant;
|
||||
import us.codecraft.webmagic.utils.HttpConstant;
|
||||
import us.codecraft.webmagic.selector.PlainText;
|
||||
import us.codecraft.webmagic.utils.UrlUtils;
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.monitor.MonitorableScheduler;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package us.codecraft.webmagic.monitor;
|
||||
package us.codecraft.webmagic.scheduler;
|
||||
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.scheduler.Scheduler;
|
||||
|
||||
/**
|
||||
* The scheduler whose requests can be counted for monitor.
|
|
@ -1,4 +1,4 @@
|
|||
package us.codecraft.webmagic.constant;
|
||||
package us.codecraft.webmagic.utils;
|
||||
|
||||
/**
|
||||
* Some constants of Http protocal.
|
|
@ -0,0 +1,31 @@
|
|||
package us.codecraft.webmagic.example;
|
||||
|
||||
import us.codecraft.webmagic.Spider;
|
||||
import us.codecraft.webmagic.monitor.SpiderMonitor;
|
||||
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
|
||||
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
|
||||
|
||||
/**
|
||||
* @author code4crafer@gmail.com
|
||||
*/
|
||||
public class MonitorExample {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
Spider oschinaSpider = Spider.create(new OschinaBlogPageProcessor())
|
||||
.addUrl("http://my.oschina.net/flashsword/blog").thread(2);
|
||||
Spider githubSpider = Spider.create(new GithubRepoPageProcessor())
|
||||
.addUrl("https://github.com/code4craft");
|
||||
|
||||
SpiderMonitor spiderMonitor = new SpiderMonitor();
|
||||
spiderMonitor.register(oschinaSpider, githubSpider);
|
||||
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
|
||||
//ONLY ONE server can start for a machine.
|
||||
//Others will be registered
|
||||
spiderMonitor.server().server();
|
||||
spiderMonitor.jmxStart();
|
||||
oschinaSpider.start();
|
||||
githubSpider.start();
|
||||
|
||||
}
|
||||
}
|
|
@ -1,9 +1,13 @@
|
|||
package us.codecraft.webmagic.monitor;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Spider;
|
||||
import us.codecraft.webmagic.SpiderListener;
|
||||
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
|
||||
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
|
||||
import us.codecraft.webmagic.utils.IPUtils;
|
||||
|
||||
import javax.management.JMException;
|
||||
import javax.management.MBeanServer;
|
||||
|
@ -15,6 +19,7 @@ import java.io.IOException;
|
|||
import java.lang.management.ManagementFactory;
|
||||
import java.rmi.registry.LocateRegistry;
|
||||
import java.rmi.registry.Registry;
|
||||
import java.rmi.server.ExportException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
@ -30,6 +35,8 @@ public class SpiderMonitor {
|
|||
Server, Client, Local;
|
||||
}
|
||||
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private static final int DEFAULT_SERVER_PORT = 14721;
|
||||
|
||||
private static final String DEFAULT_SERVER_HOST = "localhost";
|
||||
|
@ -52,6 +59,7 @@ public class SpiderMonitor {
|
|||
|
||||
/**
|
||||
* Register spider for monitor.
|
||||
*
|
||||
* @param spiders
|
||||
* @return
|
||||
*/
|
||||
|
@ -113,13 +121,18 @@ public class SpiderMonitor {
|
|||
|
||||
/**
|
||||
* Start monitor as server mode.
|
||||
*
|
||||
* @param port
|
||||
* @return
|
||||
* @throws IOException
|
||||
* @throws JMException
|
||||
*/
|
||||
public SpiderMonitor server(int port) throws IOException, JMException {
|
||||
try {
|
||||
Registry registry = LocateRegistry.createRegistry(port);
|
||||
} catch (ExportException e) {
|
||||
logger.warn("Start server fail, maybe the address is in using.", e);
|
||||
}
|
||||
serverPort = port;
|
||||
serverHost = "localhost";
|
||||
type = Type.Server;
|
||||
|
@ -128,6 +141,7 @@ public class SpiderMonitor {
|
|||
|
||||
/**
|
||||
* Start monitor as server mode.
|
||||
*
|
||||
* @return
|
||||
* @throws IOException
|
||||
* @throws JMException
|
||||
|
@ -139,6 +153,7 @@ public class SpiderMonitor {
|
|||
|
||||
/**
|
||||
* Start monitor as client mode.
|
||||
*
|
||||
* @param serverHost
|
||||
* @param serverPort
|
||||
* @return
|
||||
|
@ -154,6 +169,7 @@ public class SpiderMonitor {
|
|||
|
||||
/**
|
||||
* Start monitor as client mode.
|
||||
*
|
||||
* @return
|
||||
* @throws IOException
|
||||
* @throws JMException
|
||||
|
@ -167,7 +183,7 @@ public class SpiderMonitor {
|
|||
}
|
||||
|
||||
public SpiderMonitor jmxStart(String jndiServer, int rmiPort) throws IOException, JMException {
|
||||
String jmxServerName = "WebMagic";
|
||||
String jmxServerName = "WebMagic-"+ IPUtils.getFirstNoLoopbackIPAddresses();
|
||||
|
||||
// start JNDI
|
||||
MBeanServer localServer = ManagementFactory.getPlatformMBeanServer();
|
||||
|
@ -199,7 +215,10 @@ public class SpiderMonitor {
|
|||
|
||||
SpiderMonitor spiderMonitor = new SpiderMonitor();
|
||||
spiderMonitor.register(oschinaSpider, githubSpider);
|
||||
//
|
||||
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
|
||||
//ONLY ONE server can start for a machine.
|
||||
//Others will be registered
|
||||
spiderMonitor.server().server();
|
||||
spiderMonitor.jmxStart();
|
||||
oschinaSpider.start();
|
||||
githubSpider.start();
|
|
@ -3,6 +3,7 @@ package us.codecraft.webmagic.monitor;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import us.codecraft.webmagic.Spider;
|
||||
import us.codecraft.webmagic.scheduler.MonitorableScheduler;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -7,7 +7,6 @@ import redis.clients.jedis.JedisPool;
|
|||
import redis.clients.jedis.JedisPoolConfig;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.monitor.MonitorableScheduler;
|
||||
|
||||
/**
|
||||
* Use Redis as url scheduler for distributed crawlers.<br>
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
package us.codecraft.webmagic.utils;
|
||||
|
||||
import java.net.Inet6Address;
|
||||
import java.net.InetAddress;
|
||||
import java.net.NetworkInterface;
|
||||
import java.net.SocketException;
|
||||
import java.util.Enumeration;
|
||||
|
||||
/**
|
||||
* @author code4crafer@gmail.com
|
||||
* @since 0.5.0
|
||||
*/
|
||||
public abstract class IPUtils {
|
||||
|
||||
public static String getFirstNoLoopbackIPAddresses() throws SocketException {
|
||||
|
||||
Enumeration<NetworkInterface> networkInterfaces = NetworkInterface.getNetworkInterfaces();
|
||||
|
||||
InetAddress localAddress = null;
|
||||
while (networkInterfaces.hasMoreElements()) {
|
||||
NetworkInterface networkInterface = networkInterfaces.nextElement();
|
||||
Enumeration<InetAddress> inetAddresses = networkInterface.getInetAddresses();
|
||||
while (inetAddresses.hasMoreElements()) {
|
||||
InetAddress address = inetAddresses.nextElement();
|
||||
if (!address.isLoopbackAddress() && !Inet6Address.class.isInstance(address)) {
|
||||
return address.getHostAddress();
|
||||
} else if (!address.isLoopbackAddress()) {
|
||||
localAddress = address;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return localAddress.getHostAddress();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
package us.codecraft.webmagic.utils;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* @author code4crafer@gmail.com
|
||||
*/
|
||||
public class IPUtilsTest {
|
||||
|
||||
@Test
|
||||
public void testGetFirstNoLoopbackIPAddresses() throws Exception {
|
||||
System.out.println(IPUtils.getFirstNoLoopbackIPAddresses());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue