add scripts
parent
4cd3e1d871
commit
f1d5e297bf
|
@ -1,6 +1,7 @@
|
||||||
package us.codecraft.webmagic.scripts;
|
package us.codecraft.webmagic.scripts;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import sun.org.mozilla.javascript.internal.NativeObject;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
@ -11,6 +12,7 @@ import javax.script.ScriptEngineManager;
|
||||||
import javax.script.ScriptException;
|
import javax.script.ScriptException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author code4crafter@gmail.com
|
* @author code4crafter@gmail.com
|
||||||
|
@ -51,6 +53,19 @@ public class ScriptProcessor implements PageProcessor {
|
||||||
context.setAttribute("config", site, ScriptContext.ENGINE_SCOPE);
|
context.setAttribute("config", site, ScriptContext.ENGINE_SCOPE);
|
||||||
try {
|
try {
|
||||||
engine.eval(defines + "\n" + script, context);
|
engine.eval(defines + "\n" + script, context);
|
||||||
|
switch (language) {
|
||||||
|
case JavaScript:
|
||||||
|
NativeObject o = (NativeObject) engine.get("result");
|
||||||
|
if (o != null) {
|
||||||
|
for (Map.Entry<Object, Object> objectObjectEntry : o.entrySet()) {
|
||||||
|
page.getResultItems().put(objectObjectEntry.getKey().toString(), objectObjectEntry.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case JRuby:
|
||||||
|
Object o1 = engine.get("result");
|
||||||
|
break;
|
||||||
|
}
|
||||||
} catch (ScriptException e) {
|
} catch (ScriptException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
title = css "div.BlogTitle h1"
|
title = css "div.BlogTitle h1"
|
||||||
content = css "div.BlogContent"
|
content = css "div.BlogContent"
|
||||||
urls "http://my\\.oschina\\.net/flashsword/blog/\\d+"
|
urls "http://my\\.oschina\\.net/flashsword/blog/\\d+"
|
||||||
puts title
|
|
||||||
puts content
|
|
Loading…
Reference in New Issue