From 31fb0048a12f17a696d3ac6dacdc3af6e7b181e7 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 7 Dec 2013 00:37:07 +0800 Subject: [PATCH] add worker --- pom.xml | 4 ++ webmagic-panel/README.md | 20 ++++++ webmagic-panel/pom.xml | 25 ++++++++ webmagic-worker/pom.xml | 45 +++++++++++++ .../codecraft/webmagic/worker/Bootstrap.java | 19 ++++++ .../webmagic/worker/SpiderManager.java | 64 +++++++++++++++++++ .../applicationContext-webmagic-worker.xml | 19 ++++++ 7 files changed, 196 insertions(+) create mode 100644 webmagic-panel/README.md create mode 100644 webmagic-panel/pom.xml create mode 100644 webmagic-worker/pom.xml create mode 100644 webmagic-worker/src/main/java/us/codecraft/webmagic/worker/Bootstrap.java create mode 100644 webmagic-worker/src/main/java/us/codecraft/webmagic/worker/SpiderManager.java create mode 100755 webmagic-worker/src/main/resources/spring/applicationContext-webmagic-worker.xml diff --git a/pom.xml b/pom.xml index 0c158e9..ae9fb26 100644 --- a/pom.xml +++ b/pom.xml @@ -12,6 +12,8 @@ UTF-8 UTF-8 + 3.1.1.RELEASE + webmagic-parent webmagic-parent @@ -49,6 +51,8 @@ webmagic-core webmagic-extension/ webmagic-scripts/ + webmagic-panel + webmagic-worker diff --git a/webmagic-panel/README.md b/webmagic-panel/README.md new file mode 100644 index 0000000..30ddd13 --- /dev/null +++ b/webmagic-panel/README.md @@ -0,0 +1,20 @@ +Worker: + +任务执行者,提供Http接口,监控运行状态,终止和开始job + +队列: + +仍然使用redis + +Panel: + +提供Web管理后台,管理 + + + +1. 新建任务 + 1. 通过脚本 + 2. 配置 + 3. 分配机器 +2. 已有任务 +3. 任务查看 \ No newline at end of file diff --git a/webmagic-panel/pom.xml b/webmagic-panel/pom.xml new file mode 100644 index 0000000..c0e6693 --- /dev/null +++ b/webmagic-panel/pom.xml @@ -0,0 +1,25 @@ + + + + webmagic-parent + us.codecraft + 0.4.3-SNAPSHOT + + 4.0.0 + + us.codecraft + webmagic-panel + 0.4.3-SNAPSHOT + + + + us.codecraft + webmagic-scripts + 0.4.3-SNAPSHOT + + + + + \ No newline at end of file diff --git a/webmagic-worker/pom.xml b/webmagic-worker/pom.xml new file mode 100644 index 0000000..4bb7c90 --- /dev/null +++ b/webmagic-worker/pom.xml @@ -0,0 +1,45 @@ + + + + webmagic-parent + us.codecraft + 0.4.3-SNAPSHOT + + 4.0.0 + + us.codecraft + webmagic-worker + 0.4.3-SNAPSHOT + + + + us.codecraft + webmagic-scripts + 0.4.3-SNAPSHOT + + + us.codecraft + express.java + 0.1.0 + + + org.springframework + spring-core + ${spring-version} + + + org.springframework + spring-asm + ${spring-version} + + + org.springframework + spring-context + ${spring-version} + + + + + \ No newline at end of file diff --git a/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/Bootstrap.java b/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/Bootstrap.java new file mode 100644 index 0000000..155588e --- /dev/null +++ b/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/Bootstrap.java @@ -0,0 +1,19 @@ +package us.codecraft.webmagic.worker; + +import org.springframework.context.support.ClassPathXmlApplicationContext; +import us.codecraft.express.WebServer; + +/** + * @author code4crafter@gmail.com + */ +public class Bootstrap { + + public static void main(String[] args) throws Exception { + ClassPathXmlApplicationContext classPathXmlApplicationContext = new ClassPathXmlApplicationContext( + new String[]{"classpath*:/spring/applicationContext-*.xml"} + ); + WebServer webServer = classPathXmlApplicationContext.getBean(WebServer.class); + webServer.port(11111).start(); + } + +} diff --git a/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/SpiderManager.java b/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/SpiderManager.java new file mode 100644 index 0000000..99d7826 --- /dev/null +++ b/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/SpiderManager.java @@ -0,0 +1,64 @@ +package us.codecraft.webmagic.worker; + +import org.springframework.beans.factory.InitializingBean; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; +import us.codecraft.express.WebServer; +import us.codecraft.express.controller.AjaxController; +import us.codecraft.express.controller.ParamMap; +import us.codecraft.express.controller.ResultMap; +import us.codecraft.webmagic.Spider; +import us.codecraft.webmagic.scripts.Language; +import us.codecraft.webmagic.scripts.ScriptProcessor; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * @author code4crafter@gmail.com + */ +@Component +public class SpiderManager implements InitializingBean { + + @Autowired + private WebServer webServer; + + private Map spiderMap = new ConcurrentHashMap(); + + public Spider newSpider(ParamMap params) { + Spider spider = Spider + .create(new ScriptProcessor(Language.JavaScript, params.get("script"), params.getInt("thread"))) + .thread(params.getInt("thread")).addUrl(params.get("url")); + spider.start(); + return spider; + } + + @Override + public void afterPropertiesSet() throws Exception { + AjaxController newController = new AjaxController() { + @Override + public Object ajax(ParamMap params) { + try { + Spider spider = newSpider(params); + spiderMap.put(params.get("uuid"), spider); + return ResultMap.create().put("code", 200).put("msg", "success"); + } catch (Exception e) { + // If you provide worker to user, DO NOT return + // e.getMessage()! + return ResultMap.create().put("code", 500).put("msg", e.getMessage()); + } + } + }; + webServer.post("/new/${uuid}", newController); + webServer.get("/new/${uuid}", newController); + webServer.get("/status/${uuid}", new AjaxController() { + @Override + public Object ajax(ParamMap params) { + Spider spider = spiderMap.get(params.get("uuid")); + ResultMap put = ResultMap.create().put("pageCount", spider.getPageCount()) + .put("status", spider.getStatus().name()).put("thread", spider.getThreadAlive()); + return put; + } + }); + } +} diff --git a/webmagic-worker/src/main/resources/spring/applicationContext-webmagic-worker.xml b/webmagic-worker/src/main/resources/spring/applicationContext-webmagic-worker.xml new file mode 100755 index 0000000..41f40ba --- /dev/null +++ b/webmagic-worker/src/main/resources/spring/applicationContext-webmagic-worker.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + +