extension point of geturl #118
parent
ec1c2e8cbc
commit
01aec7e1ab
|
@ -46,14 +46,18 @@ public class BloomFilterDuplicateRemover implements DuplicateRemover {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isDuplicate(Request request, Task task) {
|
public boolean isDuplicate(Request request, Task task) {
|
||||||
boolean isDuplicate = bloomFilter.mightContain(request.getUrl());
|
boolean isDuplicate = bloomFilter.mightContain(getUrl(request));
|
||||||
if (!isDuplicate) {
|
if (!isDuplicate) {
|
||||||
bloomFilter.put(request.getUrl());
|
bloomFilter.put(getUrl(request));
|
||||||
counter.incrementAndGet();
|
counter.incrementAndGet();
|
||||||
}
|
}
|
||||||
return isDuplicate;
|
return isDuplicate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected String getUrl(Request request) {
|
||||||
|
return request.getUrl();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void resetDuplicateCheck(Task task) {
|
public void resetDuplicateCheck(Task task) {
|
||||||
rebuildBloomFilter();
|
rebuildBloomFilter();
|
||||||
|
|
|
@ -16,7 +16,11 @@ public class HashSetDuplicateRemover implements DuplicateRemover {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isDuplicate(Request request, Task task) {
|
public boolean isDuplicate(Request request, Task task) {
|
||||||
return !urls.add(request.getUrl());
|
return !urls.add(getUrl(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String getUrl(Request request) {
|
||||||
|
return request.getUrl();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue