From 44794282770572bf091f2e420571962036706ac4 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Tue, 12 Nov 2013 13:11:31 +0800 Subject: [PATCH] add multithread support --- webmagic-scripts/deploy.sh | 2 +- .../webmagic/scripts/ScriptConsole.java | 34 +++++++++++++--- .../webmagic/scripts/ScriptEnginePool.java | 39 +++++++++++++++++++ .../webmagic/scripts/ScriptProcessor.java | 25 ++++++------ .../scripts/ScriptProcessorBuilder.java | 9 ++++- webmagic-scripts/src/main/resources/log4j.xml | 2 +- 6 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java diff --git a/webmagic-scripts/deploy.sh b/webmagic-scripts/deploy.sh index 11ff2bf..e4a121b 100644 --- a/webmagic-scripts/deploy.sh +++ b/webmagic-scripts/deploy.sh @@ -1,5 +1,5 @@ #!/bin/sh -VERSION="0.4.1-SNAPTHOS" +VERSION="0.4.1-SNAPSHOT" mvn clean package cp target/webmagic-scripts-${VERSION}.jar /usr/local/webmagic/webmagic-console.jar rsync -avz --delete target/lib/ /usr/local/webmagic/lib/ diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java index bceab4c..f4e6fb6 100644 --- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java +++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java @@ -2,6 +2,8 @@ package us.codecraft.webmagic.scripts; import com.google.common.collect.Sets; import org.apache.commons.cli.*; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; import us.codecraft.webmagic.Spider; import java.util.HashMap; @@ -85,7 +87,7 @@ public class ScriptConsole { private static void startSpider(Params params) { ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom() - .language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).build(); + .language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build(); pageProcessor.getSite().setSleepTime(params.getSleepTime()); pageProcessor.getSite().setAcceptStatCode(Sets.newHashSet(200, 404, 500)); Spider spider = Spider.create(pageProcessor).thread(params.getThread()); @@ -100,13 +102,15 @@ public class ScriptConsole { spider.run(); } + private static Params parseCommand(String[] args) { try { Options options = new Options(); - options.addOption(new Option("l", true, "language")); - options.addOption(new Option("t", true, "thread")); - options.addOption(new Option("f", true, "script file")); - options.addOption(new Option("s", true, "sleep time")); + options.addOption(new Option("l", "language", true, "language")); + options.addOption(new Option("t", "thread", true, "thread")); + options.addOption(new Option("f", "file", true, "script file")); + options.addOption(new Option("s", "sleep", true, "sleep time")); + options.addOption(new Option("g", "logger", true, "sleep time")); CommandLineParser commandLineParser = new PosixParser(); CommandLine commandLine = commandLineParser.parse(options, args); return readOptions(commandLine); @@ -143,7 +147,27 @@ public class ScriptConsole { Integer thread = Integer.parseInt(commandLine.getOptionValue("t")); params.setThread(thread); } + if (commandLine.hasOption("g")) { + configLogger(commandLine.getOptionValue("g")); + } params.setUrls(commandLine.getArgList()); return params; } + + private static void configLogger(String value) { + Logger rootLogger = Logger.getRootLogger(); + if ("debug".equalsIgnoreCase(value)) { + rootLogger.setLevel(Level.DEBUG); + } else if ("info".equalsIgnoreCase(value)) { + rootLogger.setLevel(Level.INFO); + } else if ("warn".equalsIgnoreCase(value)) { + rootLogger.setLevel(Level.WARN); + } else if ("trace".equalsIgnoreCase(value)) { + rootLogger.setLevel(Level.TRACE); + } else if ("off".equalsIgnoreCase(value)) { + rootLogger.setLevel(Level.OFF); + } else if ("error".equalsIgnoreCase(value)) { + rootLogger.setLevel(Level.ERROR); + } + } } diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java new file mode 100644 index 0000000..9dc7413 --- /dev/null +++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java @@ -0,0 +1,39 @@ +package us.codecraft.webmagic.scripts; + +import javax.script.ScriptEngine; +import javax.script.ScriptEngineManager; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * @author code4crafter@gmail.com + * @since 0.4.1 + */ +public class ScriptEnginePool { + + private final int size; + + private final AtomicInteger availableCount; + + private final LinkedBlockingQueue scriptEngines = new LinkedBlockingQueue(); + + public ScriptEnginePool(Language language,int size) { + this.size = size; + this.availableCount = new AtomicInteger(size); + for (int i=0;i - +