diff --git a/pom.xml b/pom.xml
index 2aa3df7..bc3d03a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,6 +48,7 @@
webmagic-core
webmagic-extension/
+ webmagic-scripts/
diff --git a/webmagic-core/src/main/resources/log4j.xml b/webmagic-core/src/main/resources/log4j.xml
index 9084694..c2b5a2f 100644
--- a/webmagic-core/src/main/resources/log4j.xml
+++ b/webmagic-core/src/main/resources/log4j.xml
@@ -8,21 +8,11 @@
-
-
-
-
-
-
-
-
-
-
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/javascript/JsScriptProcessor.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/javascript/JsScriptProcessor.java
new file mode 100644
index 0000000..3c5982a
--- /dev/null
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/javascript/JsScriptProcessor.java
@@ -0,0 +1,82 @@
+package us.codecraft.webmagic.javascript;
+
+import org.apache.commons.io.IOUtils;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.processor.PageProcessor;
+import us.codecraft.webmagic.scripts.ScriptProcessor;
+import us.codecraft.webmagic.scripts.ScriptProcessorBuilder;
+
+import javax.script.ScriptContext;
+import javax.script.ScriptEngine;
+import javax.script.ScriptEngineManager;
+import javax.script.ScriptException;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * @author code4crafter@gmail.com
+ * @since 0.4.1
+ */
+public class JsScriptProcessor implements PageProcessor {
+
+ private ScriptEngine engine;
+
+ private String defines;
+
+ private String script;
+
+ JsScriptProcessor(String script) throws IOException {
+ ScriptEngineManager manager = new ScriptEngineManager();
+ engine = manager.getEngineByName("javascript");
+ InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream("js/defines.js");
+ defines = IOUtils.toString(resourceAsStream);
+ this.script = script;
+ }
+
+ public static JsScriptProcessor fromFile(String fileName) {
+ try {
+ InputStream resourceAsStream = new FileInputStream(fileName);
+ String script = IOUtils.toString(resourceAsStream);
+ return new JsScriptProcessor(script);
+ } catch (IOException e) {
+ //wrap IOException because I prefer a runtime exception...
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ public static JsScriptProcessor fromClassPathFile(String fileName) {
+ try {
+ InputStream resourceAsStream = JsScriptProcessor.class.getClassLoader().getResourceAsStream(fileName);
+ String script = IOUtils.toString(resourceAsStream);
+ return new JsScriptProcessor(script);
+ } catch (IOException e) {
+ //wrap IOException because I prefer a runtime exception...
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ @Override
+ public void process(Page page) {
+ ScriptContext context = engine.getContext();
+ context.setAttribute("page", page, ScriptContext.ENGINE_SCOPE);
+ try {
+ engine.eval(defines + script, context);
+ } catch (ScriptException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ @Override
+ public Site getSite() {
+ return Site.me();
+ }
+
+ public static void main(String[] args) {
+ ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().scriptFromClassPathFile("js/oschina.js").build();
+ Spider.create(pageProcessor).addUrl("http://my.oschina.net/flashsword/blog").run();
+ }
+}
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/processor/RubyScriptProcessor.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/jruby/RubyScriptProcessor.java
similarity index 91%
rename from webmagic-scripts/src/main/java/us/codecraft/webmagic/processor/RubyScriptProcessor.java
rename to webmagic-scripts/src/main/java/us/codecraft/webmagic/jruby/RubyScriptProcessor.java
index cf6801c..409374a 100644
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/processor/RubyScriptProcessor.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/jruby/RubyScriptProcessor.java
@@ -1,9 +1,10 @@
-package us.codecraft.webmagic.processor;
+package us.codecraft.webmagic.jruby;
import org.apache.commons.io.IOUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.processor.PageProcessor;
import javax.script.ScriptContext;
import javax.script.ScriptEngine;
@@ -15,7 +16,7 @@ import java.io.InputStream;
/**
* @author code4crafter@gmail.com
*/
-public class RubyScriptProcessor implements PageProcessor{
+public class RubyScriptProcessor implements PageProcessor {
private ScriptEngine rubyEngine;
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Language.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Language.java
new file mode 100644
index 0000000..c7ddcda
--- /dev/null
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Language.java
@@ -0,0 +1,35 @@
+package us.codecraft.webmagic.scripts;
+
+/**
+ * @author code4crafter@gmail.com
+ */
+public enum Language {
+
+ JavaScript("javascript","js/defines.js",""),
+
+ JRuby("jruby","ruby/defines.rb","");
+
+ private String engineName;
+
+ private String defineFile;
+
+ private String gatherFile;
+
+ Language(String engineName, String defineFile, String gatherFile) {
+ this.engineName = engineName;
+ this.defineFile = defineFile;
+ this.gatherFile = gatherFile;
+ }
+
+ public String getEngineName() {
+ return engineName;
+ }
+
+ public String getDefineFile() {
+ return defineFile;
+ }
+
+ public String getGatherFile() {
+ return gatherFile;
+ }
+}
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/processor/JsScriptProcessor.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java
similarity index 50%
rename from webmagic-scripts/src/main/java/us/codecraft/webmagic/processor/JsScriptProcessor.java
rename to webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java
index 51ec04e..c1ec74a 100644
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/processor/JsScriptProcessor.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java
@@ -1,9 +1,9 @@
-package us.codecraft.webmagic.processor;
+package us.codecraft.webmagic.scripts;
import org.apache.commons.io.IOUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
-import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.processor.PageProcessor;
import javax.script.ScriptContext;
import javax.script.ScriptEngine;
@@ -14,48 +14,51 @@ import java.io.InputStream;
/**
* @author code4crafter@gmail.com
+ * @since 0.4.1
*/
-public class JsScriptProcessor implements PageProcessor{
+public class ScriptProcessor implements PageProcessor {
- private ScriptEngine rubyEngine;
+ private ScriptEngine engine;
private String defines;
private String script;
- public JsScriptProcessor(String filename){
+ private final Language language;
+
+ private Site site = Site.me();
+
+ public ScriptProcessor(Language language, String script) {
+ if (language == null || script == null) {
+ throw new IllegalArgumentException("language and script must not be null!");
+ }
+ this.language = language;
ScriptEngineManager manager = new ScriptEngineManager();
- rubyEngine = manager.getEngineByName("javascript");
- InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream("js/defines.js");
+ engine = manager.getEngineByName(language.getEngineName());
+ InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream(language.getDefineFile());
try {
defines = IOUtils.toString(resourceAsStream);
- resourceAsStream = this.getClass().getClassLoader().getResourceAsStream(filename);
- script = IOUtils.toString(resourceAsStream);
} catch (IOException e) {
- e.printStackTrace();
+ throw new IllegalArgumentException(e);
}
-
-
+ this.script = script;
}
@Override
public void process(Page page) {
- ScriptContext context = rubyEngine.getContext();
+ ScriptContext context = engine.getContext();
context.setAttribute("page", page, ScriptContext.ENGINE_SCOPE);
+ context.setAttribute("config", site, ScriptContext.ENGINE_SCOPE);
try {
- rubyEngine.eval(defines+script, context);
+ engine.eval(defines + script, context);
} catch (ScriptException e) {
e.printStackTrace();
}
-
}
@Override
public Site getSite() {
- return Site.me();
+ return site;
}
- public static void main(String[] args) {
- Spider.create(new JsScriptProcessor("js/oschina.js")).addUrl("http://my.oschina.net/flashsword/blog").run();
- }
}
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java
new file mode 100644
index 0000000..2958729
--- /dev/null
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java
@@ -0,0 +1,64 @@
+package us.codecraft.webmagic.scripts;
+
+import org.apache.commons.io.IOUtils;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * @author code4crafter@gmail.com
+ * @since 0.4.1
+ */
+public class ScriptProcessorBuilder {
+
+ private static final Language DefaultLanguage = Language.JavaScript;
+
+ private Language language = DefaultLanguage;
+
+ private String script;
+
+ private ScriptProcessorBuilder() {
+ }
+
+ public static ScriptProcessorBuilder custom() {
+ return new ScriptProcessorBuilder();
+ }
+
+ public ScriptProcessorBuilder language(Language language) {
+ this.language = language;
+ return this;
+ }
+
+ public ScriptProcessorBuilder scriptFromFile(String fileName) {
+ try {
+ InputStream resourceAsStream = new FileInputStream(fileName);
+ this.script = IOUtils.toString(resourceAsStream);
+ } catch (IOException e) {
+ //wrap IOException because I prefer a runtime exception...
+ throw new IllegalArgumentException(e);
+ }
+ return this;
+ }
+
+ public ScriptProcessorBuilder scriptFromClassPathFile(String fileName) {
+ try {
+ InputStream resourceAsStream = ScriptProcessor.class.getClassLoader().getResourceAsStream(fileName);
+ this.script = IOUtils.toString(resourceAsStream);
+ } catch (IOException e) {
+ //wrap IOException because I prefer a runtime exception...
+ throw new IllegalArgumentException(e);
+ }
+ return this;
+ }
+
+ public ScriptProcessorBuilder script(String script) {
+ this.script = script;
+ return this;
+ }
+
+ public ScriptProcessor build(){
+ return new ScriptProcessor(language,script);
+ }
+
+}
diff --git a/webmagic-scripts/src/test/java/us/codecraft/webmagic/scripts/ScriptProcessorTest.java b/webmagic-scripts/src/test/java/us/codecraft/webmagic/scripts/ScriptProcessorTest.java
new file mode 100644
index 0000000..ec3f674
--- /dev/null
+++ b/webmagic-scripts/src/test/java/us/codecraft/webmagic/scripts/ScriptProcessorTest.java
@@ -0,0 +1,25 @@
+package us.codecraft.webmagic.scripts;
+
+import org.junit.Test;
+import us.codecraft.webmagic.Spider;
+
+/**
+ * @author code4crafter@gmail.com
+ * @since 0.4.1
+ */
+public class ScriptProcessorTest {
+
+ @Test
+ public void testJavaScriptProcessor() {
+ ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(Language.JavaScript).scriptFromClassPathFile("js/oschina.js").build();
+ pageProcessor.getSite().setSleepTime(0);
+ Spider.create(pageProcessor).addUrl("http://my.oschina.net/flashsword/blog").setSpawnUrl(false).run();
+ }
+
+ @Test
+ public void testRubyProcessor() {
+ ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(Language.JRuby).scriptFromClassPathFile("ruby/oschina.rb").build();
+ pageProcessor.getSite().setSleepTime(0);
+ Spider.create(pageProcessor).addUrl("http://my.oschina.net/flashsword/blog").setSpawnUrl(false).run();
+ }
+}
diff --git a/webmagic-scripts/src/test/resouces/log4j.xml b/webmagic-scripts/src/test/resouces/log4j.xml
new file mode 100644
index 0000000..1f64d8d
--- /dev/null
+++ b/webmagic-scripts/src/test/resouces/log4j.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+