Serialize request URL only in FileCacheQueueScheduler.
parent
7945c0612d
commit
5d14efc50f
|
@ -1,14 +1,13 @@
|
||||||
package us.codecraft.webmagic.scheduler;
|
package us.codecraft.webmagic.scheduler;
|
||||||
|
|
||||||
import org.apache.commons.codec.binary.Base64;
|
import java.io.BufferedReader;
|
||||||
import org.apache.commons.io.IOUtils;
|
import java.io.Closeable;
|
||||||
import org.apache.commons.lang3.SerializationUtils;
|
import java.io.File;
|
||||||
import org.apache.commons.lang3.math.NumberUtils;
|
import java.io.FileNotFoundException;
|
||||||
import us.codecraft.webmagic.Request;
|
import java.io.FileReader;
|
||||||
import us.codecraft.webmagic.Task;
|
import java.io.FileWriter;
|
||||||
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
|
import java.io.IOException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
import java.io.*;
|
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.BlockingQueue;
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
@ -19,6 +18,13 @@ import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.Request;
|
||||||
|
import us.codecraft.webmagic.Task;
|
||||||
|
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Store urls and cursor in files so that a Spider can resume the status when shutdown.<br>
|
* Store urls and cursor in files so that a Spider can resume the status when shutdown.<br>
|
||||||
|
@ -208,20 +214,11 @@ public class FileCacheQueueScheduler extends DuplicateRemovedScheduler implement
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String serializeRequest(Request request) {
|
protected String serializeRequest(Request request) {
|
||||||
String line = String.format("%1$s\t%2$s", request.getUrl(),
|
return request.getUrl();
|
||||||
Base64.encodeBase64String(SerializationUtils.serialize(request)));
|
|
||||||
return line;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Request deserializeRequest(String line) {
|
protected Request deserializeRequest(String line) {
|
||||||
Request request;
|
return new Request(line);
|
||||||
String[] sections = line.split("\t");
|
|
||||||
if (sections.length >= 2) {
|
|
||||||
request = (Request) SerializationUtils.deserialize(Base64.decodeBase64(sections[1]));
|
|
||||||
} else {
|
|
||||||
request = new Request(sections[0]);
|
|
||||||
}
|
|
||||||
return request;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue