add multithread support
parent
b5f2498c99
commit
4479428277
|
@ -1,5 +1,5 @@
|
|||
#!/bin/sh
|
||||
VERSION="0.4.1-SNAPTHOS"
|
||||
VERSION="0.4.1-SNAPSHOT"
|
||||
mvn clean package
|
||||
cp target/webmagic-scripts-${VERSION}.jar /usr/local/webmagic/webmagic-console.jar
|
||||
rsync -avz --delete target/lib/ /usr/local/webmagic/lib/
|
||||
|
|
|
@ -2,6 +2,8 @@ package us.codecraft.webmagic.scripts;
|
|||
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.commons.cli.*;
|
||||
import org.apache.log4j.Level;
|
||||
import org.apache.log4j.Logger;
|
||||
import us.codecraft.webmagic.Spider;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
@ -85,7 +87,7 @@ public class ScriptConsole {
|
|||
|
||||
private static void startSpider(Params params) {
|
||||
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom()
|
||||
.language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).build();
|
||||
.language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build();
|
||||
pageProcessor.getSite().setSleepTime(params.getSleepTime());
|
||||
pageProcessor.getSite().setAcceptStatCode(Sets.<Integer>newHashSet(200, 404, 500));
|
||||
Spider spider = Spider.create(pageProcessor).thread(params.getThread());
|
||||
|
@ -100,13 +102,15 @@ public class ScriptConsole {
|
|||
spider.run();
|
||||
}
|
||||
|
||||
|
||||
private static Params parseCommand(String[] args) {
|
||||
try {
|
||||
Options options = new Options();
|
||||
options.addOption(new Option("l", true, "language"));
|
||||
options.addOption(new Option("t", true, "thread"));
|
||||
options.addOption(new Option("f", true, "script file"));
|
||||
options.addOption(new Option("s", true, "sleep time"));
|
||||
options.addOption(new Option("l", "language", true, "language"));
|
||||
options.addOption(new Option("t", "thread", true, "thread"));
|
||||
options.addOption(new Option("f", "file", true, "script file"));
|
||||
options.addOption(new Option("s", "sleep", true, "sleep time"));
|
||||
options.addOption(new Option("g", "logger", true, "sleep time"));
|
||||
CommandLineParser commandLineParser = new PosixParser();
|
||||
CommandLine commandLine = commandLineParser.parse(options, args);
|
||||
return readOptions(commandLine);
|
||||
|
@ -143,7 +147,27 @@ public class ScriptConsole {
|
|||
Integer thread = Integer.parseInt(commandLine.getOptionValue("t"));
|
||||
params.setThread(thread);
|
||||
}
|
||||
if (commandLine.hasOption("g")) {
|
||||
configLogger(commandLine.getOptionValue("g"));
|
||||
}
|
||||
params.setUrls(commandLine.getArgList());
|
||||
return params;
|
||||
}
|
||||
|
||||
private static void configLogger(String value) {
|
||||
Logger rootLogger = Logger.getRootLogger();
|
||||
if ("debug".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.DEBUG);
|
||||
} else if ("info".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.INFO);
|
||||
} else if ("warn".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.WARN);
|
||||
} else if ("trace".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.TRACE);
|
||||
} else if ("off".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.OFF);
|
||||
} else if ("error".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
package us.codecraft.webmagic.scripts;
|
||||
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptEngineManager;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com
|
||||
* @since 0.4.1
|
||||
*/
|
||||
public class ScriptEnginePool {
|
||||
|
||||
private final int size;
|
||||
|
||||
private final AtomicInteger availableCount;
|
||||
|
||||
private final LinkedBlockingQueue<ScriptEngine> scriptEngines = new LinkedBlockingQueue<ScriptEngine>();
|
||||
|
||||
public ScriptEnginePool(Language language,int size) {
|
||||
this.size = size;
|
||||
this.availableCount = new AtomicInteger(size);
|
||||
for (int i=0;i<size;i++){
|
||||
ScriptEngineManager manager = new ScriptEngineManager();
|
||||
ScriptEngine engine = manager.getEngineByName(language.getEngineName());
|
||||
scriptEngines.add(engine);
|
||||
}
|
||||
}
|
||||
|
||||
public ScriptEngine getEngine() {
|
||||
availableCount.decrementAndGet();
|
||||
return scriptEngines.poll();
|
||||
}
|
||||
|
||||
public void release(ScriptEngine scriptEngine){
|
||||
scriptEngines.add(scriptEngine);
|
||||
}
|
||||
|
||||
}
|
|
@ -7,7 +7,6 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
|
||||
import javax.script.ScriptContext;
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptEngineManager;
|
||||
import javax.script.ScriptException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
@ -18,7 +17,7 @@ import java.io.InputStream;
|
|||
*/
|
||||
public class ScriptProcessor implements PageProcessor {
|
||||
|
||||
private ScriptEngine engine;
|
||||
private ScriptEnginePool enginePool;
|
||||
|
||||
private String defines;
|
||||
|
||||
|
@ -28,13 +27,12 @@ public class ScriptProcessor implements PageProcessor {
|
|||
|
||||
private Site site = Site.me();
|
||||
|
||||
public ScriptProcessor(Language language, String script) {
|
||||
public ScriptProcessor(Language language, String script, int threadNum) {
|
||||
if (language == null || script == null) {
|
||||
throw new IllegalArgumentException("language and script must not be null!");
|
||||
}
|
||||
this.language = language;
|
||||
ScriptEngineManager manager = new ScriptEngineManager();
|
||||
engine = manager.getEngineByName(language.getEngineName());
|
||||
enginePool = new ScriptEnginePool(language, threadNum);
|
||||
InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream(language.getDefineFile());
|
||||
try {
|
||||
defines = IOUtils.toString(resourceAsStream);
|
||||
|
@ -46,11 +44,13 @@ public class ScriptProcessor implements PageProcessor {
|
|||
|
||||
@Override
|
||||
public void process(Page page) {
|
||||
ScriptContext context = engine.getContext();
|
||||
context.setAttribute("page", page, ScriptContext.ENGINE_SCOPE);
|
||||
context.setAttribute("config", site, ScriptContext.ENGINE_SCOPE);
|
||||
ScriptEngine engine = enginePool.getEngine();
|
||||
try {
|
||||
engine.eval(defines + "\n" + script, context);
|
||||
ScriptContext context = engine.getContext();
|
||||
context.setAttribute("page", page, ScriptContext.ENGINE_SCOPE);
|
||||
context.setAttribute("config", site, ScriptContext.ENGINE_SCOPE);
|
||||
try {
|
||||
engine.eval(defines + "\n" + script, context);
|
||||
// switch (language) {
|
||||
// case JavaScript:
|
||||
// NativeObject o = (NativeObject) engine.get("result");
|
||||
|
@ -64,8 +64,11 @@ public class ScriptProcessor implements PageProcessor {
|
|||
// Object o1 = engine.get("result");
|
||||
// break;
|
||||
// }
|
||||
} catch (ScriptException e) {
|
||||
e.printStackTrace();
|
||||
} catch (ScriptException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
} finally {
|
||||
enginePool.release(engine);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@ public class ScriptProcessorBuilder {
|
|||
|
||||
private String script;
|
||||
|
||||
private int threadNum = 1;
|
||||
|
||||
private ScriptProcessorBuilder() {
|
||||
}
|
||||
|
||||
|
@ -57,8 +59,13 @@ public class ScriptProcessorBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
public ScriptProcessorBuilder thread(int threadNum) {
|
||||
this.threadNum = threadNum;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ScriptProcessor build(){
|
||||
return new ScriptProcessor(language,script);
|
||||
return new ScriptProcessor(language,script,threadNum);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
</appender>
|
||||
|
||||
<logger name="org.apache" additivity="false">
|
||||
<level value="warn" />
|
||||
<level value="error" />
|
||||
<appender-ref ref="stdout" />
|
||||
</logger>
|
||||
|
||||
|
|
Loading…
Reference in New Issue