diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/Fetcher.java b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/Fetcher.java
new file mode 100644
index 0000000..86f78db
--- /dev/null
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/Fetcher.java
@@ -0,0 +1,21 @@
+package us.codecraft.webmagic.annotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-1
+ * Time: 下午8:40
+ */
+@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
+@Target({ElementType.FIELD})
+public @interface Fetcher {
+
+ String value();
+
+ public enum Type {XPath, Regex, Css};
+
+ Type type() default Type.XPath;
+}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/FieldFetcher.java b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/FieldFetcher.java
new file mode 100644
index 0000000..ee9962b
--- /dev/null
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/FieldFetcher.java
@@ -0,0 +1,30 @@
+package us.codecraft.webmagic.annotation;
+
+import us.codecraft.webmagic.selector.Selector;
+
+import java.lang.reflect.Field;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-1
+ * Time: 下午9:48
+ */
+class FieldFetcher {
+
+ private final Field field;
+
+ private final Selector selector;
+
+ FieldFetcher(Field field, Selector selector) {
+ this.field = field;
+ this.selector = selector;
+ }
+
+ Field getField() {
+ return field;
+ }
+
+ Selector getSelector() {
+ return selector;
+ }
+}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ObjectPageProcessor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ObjectPageProcessor.java
new file mode 100644
index 0000000..98c969e
--- /dev/null
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ObjectPageProcessor.java
@@ -0,0 +1,65 @@
+package us.codecraft.webmagic.annotation;
+
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.processor.PageProcessor;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-1
+ * Time: 下午8:46
+ */
+public class ObjectPageProcessor implements PageProcessor {
+
+ private List pageModelFetcherList;
+
+ private Site site;
+
+ private Set targetUrlPatterns;
+
+ public static ObjectPageProcessor create(Site site, Class... clazzs) {
+ List pageModelFetcherList = new ArrayList();
+ for (Class clazz : clazzs) {
+ PageModelFetcher pageModelFetcher = PageModelFetcher.create(clazz);
+ pageModelFetcherList.add(pageModelFetcher);
+ }
+ ObjectPageProcessor objectPageProcessor = new ObjectPageProcessor(site, pageModelFetcherList);
+ return objectPageProcessor;
+ }
+
+ private ObjectPageProcessor(Site site, List pageModelFetcherList) {
+ this.site = site;
+ this.pageModelFetcherList = pageModelFetcherList;
+ targetUrlPatterns = new HashSet();
+ for (PageModelFetcher pageModelFetcher : pageModelFetcherList) {
+ targetUrlPatterns.addAll(pageModelFetcher.getTargetUrlPatterns());
+ }
+ }
+
+ @Override
+ public void process(Page page) {
+ for (PageModelFetcher pageModelFetcher : pageModelFetcherList) {
+ Object process = pageModelFetcher.process(page);
+ page.putField(pageModelFetcher.getClazz().getCanonicalName(), process);
+ }
+ for (String link : page.getHtml().links().all()) {
+ for (Pattern targetUrlPattern : targetUrlPatterns) {
+ if (targetUrlPattern.matcher(link).matches()){
+ page.addTargetRequest(new Request(link));
+ }
+ }
+ }
+ }
+
+ @Override
+ public Site getSite() {
+ return site;
+ }
+}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/PageModelFetcher.java b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/PageModelFetcher.java
new file mode 100644
index 0000000..097f1af
--- /dev/null
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/PageModelFetcher.java
@@ -0,0 +1,104 @@
+package us.codecraft.webmagic.annotation;
+
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.selector.CssSelector;
+import us.codecraft.webmagic.selector.RegexSelector;
+import us.codecraft.webmagic.selector.Selector;
+import us.codecraft.webmagic.selector.XpathSelector;
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-1
+ * Time: 下午9:33
+ */
+class PageModelFetcher {
+
+ private List targetUrlPatterns;
+
+ private Class clazz;
+
+ private List fieldFetchers;
+
+ public static PageModelFetcher create(Class clazz) {
+ PageModelFetcher pageModelFetcher = new PageModelFetcher();
+ pageModelFetcher.init(clazz);
+ return pageModelFetcher;
+ }
+
+ private void init(Class clazz) {
+ this.clazz = clazz;
+ initTargetUrlPatterns();
+ fieldFetchers = new ArrayList();
+ for (Field field : clazz.getDeclaredFields()) {
+ field.setAccessible(true);
+ Fetcher fetcher = field.getAnnotation(Fetcher.class);
+ String value = fetcher.value();
+ Selector selector;
+ switch (fetcher.type()) {
+ case Css:
+ selector = new CssSelector(value);
+ break;
+ case Regex:
+ selector = new RegexSelector(value);
+ break;
+ case XPath:
+ selector = new XpathSelector(value);
+ break;
+ default:
+ selector = new XpathSelector(value);
+ }
+ fieldFetchers.add(new FieldFetcher(field, selector));
+ }
+ }
+
+ private void initTargetUrlPatterns() {
+ targetUrlPatterns = new ArrayList();
+ Annotation annotation = clazz.getAnnotation(TargetUrl.class);
+ if (annotation == null) {
+ targetUrlPatterns.add(Pattern.compile(".*"));
+ } else {
+ String[] value = ((TargetUrl) annotation).value();
+ for (String s : value) {
+ targetUrlPatterns.add(Pattern.compile(s.replace(".","\\.").replace("*","[^\"'#]*")));
+ }
+ }
+ }
+
+ public Object process(Page page) {
+ boolean matched = false;
+ for (Pattern targetPattern : targetUrlPatterns) {
+ if (targetPattern.matcher(page.getUrl().toString()).matches()) {
+ matched = true;
+ }
+ }
+ if (!matched) {
+ return null;
+ }
+ Object o = null;
+ try {
+ o = clazz.newInstance();
+ for (FieldFetcher fieldFetcher : fieldFetchers) {
+ fieldFetcher.getField().set(o, fieldFetcher.getSelector().select(page.getHtml().toString()));
+ }
+ } catch (InstantiationException e) {
+ e.printStackTrace();
+ } catch (IllegalAccessException e) {
+ e.printStackTrace();
+ }
+ return o;
+ }
+
+ Class getClazz() {
+ return clazz;
+ }
+
+ List getTargetUrlPatterns() {
+ return targetUrlPatterns;
+ }
+}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/TargetUrl.java b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/TargetUrl.java
new file mode 100644
index 0000000..f4f58ed
--- /dev/null
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/TargetUrl.java
@@ -0,0 +1,17 @@
+package us.codecraft.webmagic.annotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-1
+ * Time: 下午8:40
+ */
+@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
+@Target({ElementType.TYPE})
+public @interface TargetUrl {
+
+ String[] value();
+}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java
index 845c0b6..4af2b44 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java
@@ -8,7 +8,7 @@ import java.util.List;
* Date: 13-4-20
* Time: 下午8:02
*/
-interface Selector {
+public interface Selector {
public String select(String text);
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/Blog.java b/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/Blog.java
new file mode 100644
index 0000000..6c6e88c
--- /dev/null
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/Blog.java
@@ -0,0 +1,24 @@
+package us.codecraft.webmagic.annotation;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-1
+ * Time: 下午10:18
+ */
+@TargetUrl("http://djjchobits.iteye.com/blog/\\d+")
+public class Blog {
+
+ @Fetcher("//title")
+ private String title;
+
+ @Fetcher(value = "div#main",type = Fetcher.Type.Css)
+ private String content;
+
+ @Override
+ public String toString() {
+ return "Blog{" +
+ "title='" + title + '\'' +
+ ", content='" + content + '\'' +
+ '}';
+ }
+}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/TestFetcher.java b/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/TestFetcher.java
new file mode 100644
index 0000000..5318703
--- /dev/null
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/TestFetcher.java
@@ -0,0 +1,20 @@
+package us.codecraft.webmagic.annotation;
+
+import org.junit.Test;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Spider;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-1
+ * Time: 下午8:42
+ */
+public class TestFetcher {
+
+ @Test
+ public void test() {
+ Spider.create(ObjectPageProcessor.create(Site.me().addStartUrl("http://djjchobits.iteye.com/blog/569000"), Blog.class)).run();
+
+ }
+
+}