Refactored and implement of a template method pattern for logger config in webmagic-scripts (#1158)
* Refactor of processSingle in PageModelExtractor * Changed my refactor of processSingle, this one is a lot better * Changed my refactor of processSingle, this one is a lot better * add lombok for getters and setters * Refactored and implement of a template method pattern for logger configmaster
parent
2df7dca871
commit
d8321baf56
|
@ -53,6 +53,12 @@
|
|||
<artifactId>webmagic-extension</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>1.18.32</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
package us.codecraft.webmagic.scripts;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import us.codecraft.webmagic.scripts.languages.JRuby;
|
||||
import us.codecraft.webmagic.scripts.languages.Javascript;
|
||||
import us.codecraft.webmagic.scripts.languages.Language;
|
||||
import us.codecraft.webmagic.utils.WMCollections;
|
||||
|
||||
public class Params {
|
||||
@Getter
|
||||
Language language = new Javascript();
|
||||
|
||||
@Getter @Setter
|
||||
String scriptFileName;
|
||||
|
||||
@Getter @Setter
|
||||
List<String> urls;
|
||||
|
||||
@Getter @Setter
|
||||
int thread = 1;
|
||||
|
||||
@Getter @Setter
|
||||
int sleepTime = 1000;
|
||||
|
||||
private static Map<Language, Set<String>> alias;
|
||||
|
||||
public Params() {
|
||||
alias = new HashMap<Language, Set<String>>();
|
||||
alias.put(new Javascript(), WMCollections.<String>newHashSet("js", "javascript", "JavaScript", "JS"));
|
||||
alias.put(new JRuby(), WMCollections.<String>newHashSet("ruby", "jruby", "Ruby", "JRuby"));
|
||||
}
|
||||
|
||||
public void setLanguagefromArg(String arg) {
|
||||
for (Map.Entry<Language, Set<String>> languageSetEntry : alias.entrySet()) {
|
||||
if (languageSetEntry.getValue().contains(arg)) {
|
||||
this.language = languageSetEntry.getKey();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,90 +1,21 @@
|
|||
package us.codecraft.webmagic.scripts;
|
||||
|
||||
import org.apache.commons.cli.*;
|
||||
import org.apache.logging.log4j.Level;
|
||||
import org.apache.logging.log4j.core.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import us.codecraft.webmagic.ResultItems;
|
||||
import us.codecraft.webmagic.Spider;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||
import us.codecraft.webmagic.scripts.config.CommandLineOption;
|
||||
import us.codecraft.webmagic.utils.WMCollections;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com
|
||||
* @author code4crafter@gmail.com / FrancoisGib
|
||||
* @since 0.4.1
|
||||
*/
|
||||
public class ScriptConsole {
|
||||
|
||||
private static class Params {
|
||||
Language language = Language.JavaScript;
|
||||
String scriptFileName;
|
||||
List<String> urls;
|
||||
int thread = 1;
|
||||
int sleepTime = 1000;
|
||||
private static Map<Language, Set<String>> alias = new HashMap<Language, Set<String>>();
|
||||
|
||||
static {
|
||||
alias.put(Language.JavaScript, WMCollections.<String>newHashSet("js", "javascript", "JavaScript", "JS"));
|
||||
alias.put(Language.JRuby, WMCollections.<String>newHashSet("ruby", "jruby", "Ruby", "JRuby"));
|
||||
}
|
||||
|
||||
public void setLanguagefromArg(String arg) {
|
||||
for (Map.Entry<Language, Set<String>> languageSetEntry : alias.entrySet()) {
|
||||
if (languageSetEntry.getValue().contains(arg)) {
|
||||
this.language = languageSetEntry.getKey();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Language getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
private void setLanguage(Language language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
private String getScriptFileName() {
|
||||
return scriptFileName;
|
||||
}
|
||||
|
||||
private void setScriptFileName(String scriptFileName) {
|
||||
this.scriptFileName = scriptFileName;
|
||||
}
|
||||
|
||||
private List<String> getUrls() {
|
||||
return urls;
|
||||
}
|
||||
|
||||
private void setUrls(List<String> urls) {
|
||||
this.urls = urls;
|
||||
}
|
||||
|
||||
private int getThread() {
|
||||
return thread;
|
||||
}
|
||||
|
||||
private void setThread(int thread) {
|
||||
this.thread = thread;
|
||||
}
|
||||
|
||||
private int getSleepTime() {
|
||||
return sleepTime;
|
||||
}
|
||||
|
||||
private void setSleepTime(int sleepTime) {
|
||||
this.sleepTime = sleepTime;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
Params params = parseCommand(args);
|
||||
startSpider(params);
|
||||
|
@ -142,45 +73,9 @@ public class ScriptConsole {
|
|||
|
||||
private static Params readOptions(CommandLine commandLine) {
|
||||
Params params = new Params();
|
||||
if (commandLine.hasOption("l")) {
|
||||
String language = commandLine.getOptionValue("l");
|
||||
params.setLanguagefromArg(language);
|
||||
}
|
||||
if (commandLine.hasOption("f")) {
|
||||
String scriptFilename = commandLine.getOptionValue("f");
|
||||
params.setScriptFileName(scriptFilename);
|
||||
} else {
|
||||
exit();
|
||||
}
|
||||
if (commandLine.hasOption("s")) {
|
||||
Integer sleepTime = Integer.parseInt(commandLine.getOptionValue("s"));
|
||||
params.setSleepTime(sleepTime);
|
||||
}
|
||||
if (commandLine.hasOption("t")) {
|
||||
Integer thread = Integer.parseInt(commandLine.getOptionValue("t"));
|
||||
params.setThread(thread);
|
||||
}
|
||||
if (commandLine.hasOption("g")) {
|
||||
configLogger(commandLine.getOptionValue("g"));
|
||||
}
|
||||
params.setUrls(commandLine.getArgList());
|
||||
List<CommandLineOption> options = CommandLineOption.getAllOptions();
|
||||
for (CommandLineOption option : options)
|
||||
option.addParamOptionIfInCommandLine(params, commandLine);
|
||||
return params;
|
||||
}
|
||||
|
||||
private static void configLogger(String value) {
|
||||
Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME);
|
||||
if ("debug".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.DEBUG);
|
||||
} else if ("info".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.INFO);
|
||||
} else if ("warn".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.WARN);
|
||||
} else if ("trace".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.TRACE);
|
||||
} else if ("off".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.OFF);
|
||||
} else if ("error".equalsIgnoreCase(value)) {
|
||||
rootLogger.setLevel(Level.ERROR);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,6 +2,9 @@ package us.codecraft.webmagic.scripts;
|
|||
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptEngineManager;
|
||||
|
||||
import us.codecraft.webmagic.scripts.languages.Language;
|
||||
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
|
@ -11,14 +14,11 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
*/
|
||||
public class ScriptEnginePool {
|
||||
|
||||
private final int size;
|
||||
|
||||
private final AtomicInteger availableCount;
|
||||
|
||||
private final LinkedBlockingQueue<ScriptEngine> scriptEngines = new LinkedBlockingQueue<ScriptEngine>();
|
||||
|
||||
public ScriptEnginePool(Language language,int size) {
|
||||
this.size = size;
|
||||
this.availableCount = new AtomicInteger(size);
|
||||
for (int i=0;i<size;i++){
|
||||
ScriptEngineManager manager = new ScriptEngineManager();
|
||||
|
|
|
@ -4,17 +4,14 @@ package us.codecraft.webmagic.scripts;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import javax.script.ScriptContext;
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptException;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.jruby.RubyHash;
|
||||
import org.python.core.PyDictionary;
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
import us.codecraft.webmagic.scripts.languages.Language;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com
|
||||
|
@ -55,35 +52,7 @@ public class ScriptProcessor implements PageProcessor {
|
|||
context.setAttribute("page", page, ScriptContext.ENGINE_SCOPE);
|
||||
context.setAttribute("config", site, ScriptContext.ENGINE_SCOPE);
|
||||
try {
|
||||
switch (language) {
|
||||
case JavaScript:
|
||||
engine.eval(defines + "\n" + script, context);
|
||||
// NativeObject o = (NativeObject) engine.get("result");
|
||||
// if (o != null) {
|
||||
// for (Object o1 : o.getIds()) {
|
||||
// String key = String.valueOf(o1);
|
||||
// page.getResultItems().put(key, NativeObject.getProperty(o, key));
|
||||
// }
|
||||
// }
|
||||
break;
|
||||
case JRuby:
|
||||
RubyHash oRuby = (RubyHash) engine.eval(defines + "\n" + script, context);
|
||||
Iterator itruby = oRuby.entrySet().iterator();
|
||||
while (itruby.hasNext()) {
|
||||
Map.Entry pairs = (Map.Entry) itruby.next();
|
||||
page.getResultItems().put(pairs.getKey().toString(), pairs.getValue());
|
||||
}
|
||||
break;
|
||||
case Jython:
|
||||
engine.eval(defines + "\n" + script, context);
|
||||
PyDictionary oJython = (PyDictionary) engine.get("result");
|
||||
Iterator it = oJython.entrySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Map.Entry pairs = (Map.Entry) it.next();
|
||||
page.getResultItems().put(pairs.getKey().toString(), pairs.getValue());
|
||||
}
|
||||
break;
|
||||
}
|
||||
this.language.process(engine, defines, script, page);
|
||||
} catch (ScriptException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
|
|
@ -7,6 +7,9 @@ import java.io.InputStream;
|
|||
import java.nio.charset.Charset;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import us.codecraft.webmagic.scripts.languages.Javascript;
|
||||
import us.codecraft.webmagic.scripts.languages.Language;
|
||||
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com
|
||||
|
@ -14,7 +17,7 @@ import org.apache.commons.io.IOUtils;
|
|||
*/
|
||||
public class ScriptProcessorBuilder {
|
||||
|
||||
private static final Language DefaultLanguage = Language.JavaScript;
|
||||
private static final Language DefaultLanguage = new Javascript();
|
||||
|
||||
private Language language = DefaultLanguage;
|
||||
|
||||
|
@ -39,7 +42,6 @@ public class ScriptProcessorBuilder {
|
|||
InputStream resourceAsStream = new FileInputStream(fileName);
|
||||
this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
|
||||
} catch (IOException e) {
|
||||
//wrap IOException because I prefer a runtime exception...
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
return this;
|
||||
|
@ -50,7 +52,6 @@ public class ScriptProcessorBuilder {
|
|||
InputStream resourceAsStream = ScriptProcessor.class.getClassLoader().getResourceAsStream(fileName);
|
||||
this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
|
||||
} catch (IOException e) {
|
||||
//wrap IOException because I prefer a runtime exception...
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
return this;
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
package us.codecraft.webmagic.scripts.config;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
|
||||
import lombok.Getter;
|
||||
import us.codecraft.webmagic.scripts.Params;
|
||||
|
||||
public abstract class CommandLineOption {
|
||||
@Getter
|
||||
char option;
|
||||
|
||||
public CommandLineOption(char option) {
|
||||
this.option = option;
|
||||
}
|
||||
|
||||
protected abstract void addParamOption(Params params, CommandLine commandLine);
|
||||
|
||||
public void addParamOptionIfInCommandLine(Params params, CommandLine commandLine) {
|
||||
if (commandLine.hasOption(this.option))
|
||||
this.addParamOption(params, commandLine);
|
||||
}
|
||||
|
||||
public static List<CommandLineOption> getAllOptions() {
|
||||
return List.of(new OptionL(), new OptionF(), new OptionS(), new OptionT(), new OptionG());
|
||||
}
|
||||
}
|
||||
|
||||
class OptionL extends CommandLineOption {
|
||||
public OptionL() {
|
||||
super('l');
|
||||
}
|
||||
|
||||
protected void addParamOption(Params params, CommandLine commandLine) {
|
||||
String language = commandLine.getOptionValue("l");
|
||||
params.setLanguagefromArg(language);
|
||||
}
|
||||
}
|
||||
|
||||
class OptionF extends CommandLineOption {
|
||||
public OptionF() {
|
||||
super('f');
|
||||
}
|
||||
|
||||
protected void addParamOption(Params params, CommandLine commandLine) {
|
||||
String scriptFilename = commandLine.getOptionValue("f");
|
||||
params.setScriptFileName(scriptFilename);
|
||||
}
|
||||
}
|
||||
|
||||
class OptionS extends CommandLineOption {
|
||||
public OptionS() {
|
||||
super('s');
|
||||
}
|
||||
|
||||
protected void addParamOption(Params params, CommandLine commandLine) {
|
||||
Integer sleepTime = Integer.parseInt(commandLine.getOptionValue("s"));
|
||||
params.setSleepTime(sleepTime);
|
||||
}
|
||||
}
|
||||
|
||||
class OptionT extends CommandLineOption {
|
||||
public OptionT() {
|
||||
super('t');
|
||||
}
|
||||
|
||||
protected void addParamOption(Params params, CommandLine commandLine) {
|
||||
Integer thread = Integer.parseInt(commandLine.getOptionValue("t"));
|
||||
params.setThread(thread);
|
||||
}
|
||||
}
|
||||
|
||||
class OptionG extends CommandLineOption {
|
||||
public OptionG() {
|
||||
super('g');
|
||||
}
|
||||
|
||||
protected void addParamOption(Params params, CommandLine commandLine) {
|
||||
ConfigLogger.configLogger(commandLine.getOptionValue("g"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package us.codecraft.webmagic.scripts.config;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.logging.log4j.Level;
|
||||
import org.apache.logging.log4j.core.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class ConfigLogger {
|
||||
/**
|
||||
* Log the config parameter. If the counter is less than the number of available
|
||||
* options then it means that the user entered an option
|
||||
*
|
||||
* @param value The config string
|
||||
*/
|
||||
public static void configLogger(String value) {
|
||||
List<Pair<String, Level>> options = List.of(
|
||||
Pair.of("debug", Level.DEBUG),
|
||||
Pair.of("info", Level.INFO),
|
||||
Pair.of("warn", Level.WARN),
|
||||
Pair.of("trace", Level.TRACE),
|
||||
Pair.of("off", Level.OFF),
|
||||
Pair.of("error", Level.ERROR));
|
||||
Pair<String, Level> option = options.get(0);
|
||||
int i = 1;
|
||||
while (i < options.size() && !option.getLeft().equalsIgnoreCase(value))
|
||||
option = options.get(i++);
|
||||
if (i < options.size()) {
|
||||
Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME);
|
||||
rootLogger.setLevel(option.getRight());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
package us.codecraft.webmagic.scripts.languages;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptException;
|
||||
|
||||
import org.jruby.RubyHash;
|
||||
|
||||
import us.codecraft.webmagic.Page;
|
||||
|
||||
public class JRuby extends Language {
|
||||
public JRuby() {
|
||||
super("jruby","ruby/defines.rb","");
|
||||
}
|
||||
|
||||
public void process(ScriptEngine engine, String defines, String script, Page page) throws ScriptException {
|
||||
RubyHash oRuby = (RubyHash) engine.eval(defines + "\n" + script, engine.getContext());
|
||||
Iterator itruby = oRuby.entrySet().iterator();
|
||||
while (itruby.hasNext()) {
|
||||
Map.Entry pairs = (Map.Entry) itruby.next();
|
||||
page.getResultItems().put(pairs.getKey().toString(), pairs.getValue());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
package us.codecraft.webmagic.scripts.languages;
|
||||
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptException;
|
||||
|
||||
import us.codecraft.webmagic.Page;
|
||||
|
||||
public class Javascript extends Language {
|
||||
public Javascript() {
|
||||
super("javascript","js/defines.js","");
|
||||
}
|
||||
|
||||
public void process(ScriptEngine engine, String defines, String script, Page page) throws ScriptException {
|
||||
engine.eval(defines + "\n" + script, engine.getContext());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package us.codecraft.webmagic.scripts.languages;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptException;
|
||||
|
||||
import org.python.core.PyDictionary;
|
||||
|
||||
import us.codecraft.webmagic.Page;
|
||||
|
||||
public class Jython extends Language {
|
||||
public Jython() {
|
||||
super("jython","python/defines.py","");
|
||||
}
|
||||
|
||||
public void process(ScriptEngine engine, String defines, String script, Page page) throws ScriptException {
|
||||
engine.eval(defines + "\n" + script, engine.getContext());
|
||||
PyDictionary oJython = (PyDictionary) engine.get("result");
|
||||
Iterator it = oJython.entrySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Map.Entry pairs = (Map.Entry) it.next();
|
||||
page.getResultItems().put(pairs.getKey().toString(), pairs.getValue());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,15 +1,18 @@
|
|||
package us.codecraft.webmagic.scripts;
|
||||
package us.codecraft.webmagic.scripts.languages;
|
||||
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptException;
|
||||
import us.codecraft.webmagic.Page;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com
|
||||
* @author FrancoisGib
|
||||
*/
|
||||
public enum Language {
|
||||
|
||||
JavaScript("javascript","js/defines.js",""),
|
||||
|
||||
JRuby("jruby","ruby/defines.rb",""),
|
||||
|
||||
Jython("jython","python/defines.py","");
|
||||
public abstract class Language {
|
||||
public Language(String engineName, String defineFile, String gatherFile) {
|
||||
this.engineName = engineName;
|
||||
this.defineFile = defineFile;
|
||||
this.gatherFile = gatherFile;
|
||||
}
|
||||
|
||||
private String engineName;
|
||||
|
||||
|
@ -17,12 +20,6 @@ public enum Language {
|
|||
|
||||
private String gatherFile;
|
||||
|
||||
Language(String engineName, String defineFile, String gatherFile) {
|
||||
this.engineName = engineName;
|
||||
this.defineFile = defineFile;
|
||||
this.gatherFile = gatherFile;
|
||||
}
|
||||
|
||||
public String getEngineName() {
|
||||
return engineName;
|
||||
}
|
||||
|
@ -34,4 +31,6 @@ public enum Language {
|
|||
public String getGatherFile() {
|
||||
return gatherFile;
|
||||
}
|
||||
|
||||
public abstract void process(ScriptEngine engine, String defines, String script, Page page) throws ScriptException;
|
||||
}
|
|
@ -2,7 +2,11 @@ package us.codecraft.webmagic.scripts;
|
|||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import us.codecraft.webmagic.Spider;
|
||||
import us.codecraft.webmagic.scripts.languages.JRuby;
|
||||
import us.codecraft.webmagic.scripts.languages.Javascript;
|
||||
import us.codecraft.webmagic.scripts.languages.Jython;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com
|
||||
|
@ -13,14 +17,14 @@ public class ScriptProcessorTest {
|
|||
|
||||
@Test
|
||||
public void testJavaScriptProcessor() {
|
||||
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(Language.JavaScript).scriptFromClassPathFile("js/oschina.js").build();
|
||||
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(new Javascript()).scriptFromClassPathFile("js/oschina.js").build();
|
||||
pageProcessor.getSite().setSleepTime(0);
|
||||
Spider.create(pageProcessor).addUrl("http://my.oschina.net/flashsword/blog").setSpawnUrl(false).run();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRubyProcessor() {
|
||||
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(Language.JRuby).scriptFromClassPathFile("ruby/oschina.rb").build();
|
||||
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(new JRuby()).scriptFromClassPathFile("ruby/oschina.rb").build();
|
||||
pageProcessor.getSite().setSleepTime(0);
|
||||
Spider.create(pageProcessor).addUrl("http://my.oschina.net/flashsword/blog").setSpawnUrl(false).run();
|
||||
}
|
||||
|
@ -28,7 +32,7 @@ public class ScriptProcessorTest {
|
|||
|
||||
@Test
|
||||
public void testPythonProcessor() {
|
||||
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(Language.Jython).scriptFromClassPathFile("python/oschina.py").build();
|
||||
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(new Jython()).scriptFromClassPathFile("python/oschina.py").build();
|
||||
pageProcessor.getSite().setSleepTime(0);
|
||||
Spider.create(pageProcessor).addUrl("http://my.oschina.net/flashsword/blog").setSpawnUrl(false).run();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue