Merge branch 'master' of github.com:code4craft/webmagic
commit
362fdd0662
|
@ -1,4 +1,5 @@
|
||||||
target/*
|
target
|
||||||
*.iml
|
*.iml
|
||||||
out/
|
out/
|
||||||
.idea
|
.idea
|
||||||
|
|
||||||
|
|
|
@ -127,3 +127,7 @@ To write webmagic, I refered to the projects below :
|
||||||
### Mail-list:
|
### Mail-list:
|
||||||
|
|
||||||
[https://groups.google.com/forum/#!forum/webmagic-java](https://groups.google.com/forum/#!forum/webmagic-java)
|
[https://groups.google.com/forum/#!forum/webmagic-java](https://groups.google.com/forum/#!forum/webmagic-java)
|
||||||
|
|
||||||
|
|
||||||
|
[](https://bitdeli.com/free "Bitdeli Badge")
|
||||||
|
|
||||||
|
|
|
@ -126,7 +126,7 @@ public class FileCacheQueueScheduler implements Scheduler {
|
||||||
private void readCursorFile() throws IOException {
|
private void readCursorFile() throws IOException {
|
||||||
BufferedReader fileCursorReader = null;
|
BufferedReader fileCursorReader = null;
|
||||||
try {
|
try {
|
||||||
new BufferedReader(new FileReader(getFileName(fileCursor)));
|
fileCursorReader = new BufferedReader(new FileReader(getFileName(fileCursor)));
|
||||||
String line;
|
String line;
|
||||||
//read the last number
|
//read the last number
|
||||||
while ((line = fileCursorReader.readLine()) != null) {
|
while ((line = fileCursorReader.readLine()) != null) {
|
||||||
|
|
|
@ -84,6 +84,7 @@ public class SeleniumDownloader implements Downloader, Closeable {
|
||||||
WebElement webElement = webDriver.findElement(By.xpath("/html"));
|
WebElement webElement = webDriver.findElement(By.xpath("/html"));
|
||||||
String content = webElement.getAttribute("outerHTML");
|
String content = webElement.getAttribute("outerHTML");
|
||||||
Page page = new Page();
|
Page page = new Page();
|
||||||
|
page.setRawText(content);
|
||||||
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
|
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
|
||||||
page.setUrl(new PlainText(request.getUrl()));
|
page.setUrl(new PlainText(request.getUrl()));
|
||||||
page.setRequest(request);
|
page.setRequest(request);
|
||||||
|
|
Loading…
Reference in New Issue