test and so on
parent
4f22f1210e
commit
ec1c2e8cbc
|
@ -23,9 +23,14 @@ public class BloomFilterDuplicateRemover implements DuplicateRemover {
|
||||||
private AtomicInteger counter;
|
private AtomicInteger counter;
|
||||||
|
|
||||||
public BloomFilterDuplicateRemover(int expectedInsertions) {
|
public BloomFilterDuplicateRemover(int expectedInsertions) {
|
||||||
this(expectedInsertions, 0.03);
|
this(expectedInsertions, 0.01);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param expectedInsertions the number of expected insertions to the constructed
|
||||||
|
* @param fpp the desired false positive probability (must be positive and less than 1.0)
|
||||||
|
*/
|
||||||
public BloomFilterDuplicateRemover(int expectedInsertions, double fpp) {
|
public BloomFilterDuplicateRemover(int expectedInsertions, double fpp) {
|
||||||
this.expectedInsertions = expectedInsertions;
|
this.expectedInsertions = expectedInsertions;
|
||||||
this.fpp = fpp;
|
this.fpp = fpp;
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
package us.codecraft.webmagic.scheduler;
|
package us.codecraft.webmagic.scheduler;
|
||||||
|
|
||||||
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.scheduler.component.BloomFilterDuplicateRemover;
|
import us.codecraft.webmagic.scheduler.component.BloomFilterDuplicateRemover;
|
||||||
|
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
|
||||||
|
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
@ -24,4 +27,54 @@ public class BloomFilterDuplicateRemoverTest {
|
||||||
assertThat(isDuplicate).isTrue();
|
assertThat(isDuplicate).isTrue();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ignore("long time")
|
||||||
|
@Test
|
||||||
|
public void testMemory() throws Exception {
|
||||||
|
int times = 5000000;
|
||||||
|
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times,0.005);
|
||||||
|
long freeMemory = Runtime.getRuntime().freeMemory();
|
||||||
|
long time = System.currentTimeMillis();
|
||||||
|
for (int i = 0; i < times; i++) {
|
||||||
|
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
|
||||||
|
}
|
||||||
|
System.out.println("Time used by bloomfilter:" + (System.currentTimeMillis() - time));
|
||||||
|
System.out.println("Memory used by bloomfilter:" + (freeMemory - Runtime.getRuntime().freeMemory()));
|
||||||
|
|
||||||
|
duplicateRemover = new HashSetDuplicateRemover();
|
||||||
|
System.gc();
|
||||||
|
freeMemory = Runtime.getRuntime().freeMemory();
|
||||||
|
time = System.currentTimeMillis();
|
||||||
|
for (int i = 0; i < times; i++) {
|
||||||
|
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
|
||||||
|
}
|
||||||
|
System.out.println("Time used by hashset:" + (System.currentTimeMillis() - time));
|
||||||
|
System.out.println("Memory used by hashset:" + (freeMemory - Runtime.getRuntime().freeMemory()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Ignore("long time")
|
||||||
|
@Test
|
||||||
|
public void testMissHit() throws Exception {
|
||||||
|
int times = 5000000;
|
||||||
|
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times, 0.01);
|
||||||
|
int right = 0;
|
||||||
|
int wrong = 0;
|
||||||
|
int missCheck = 0;
|
||||||
|
for (int i = 0; i < times; i++) {
|
||||||
|
boolean duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
|
||||||
|
if (duplicate) {
|
||||||
|
wrong++;
|
||||||
|
} else {
|
||||||
|
right++;
|
||||||
|
}
|
||||||
|
duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
|
||||||
|
if (!duplicate) {
|
||||||
|
missCheck++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("Right count: " + right + " Wrong count: " + wrong + " Miss check: " + missCheck);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue