diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java index 5806602..32d8354 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java @@ -36,51 +36,61 @@ public class MultiPagePipeline implements Pipeline { private void handleObject(Iterator> iterator) { Map.Entry objectEntry = iterator.next(); Object o = objectEntry.getValue(); + //需要拼凑 if (o instanceof MultiPageModel) { MultiPageModel multiPageModel = (MultiPageModel) o; - pageMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), Boolean.TRUE); - if (multiPageModel.getOtherPages() != null) { - for (String otherPage : multiPageModel.getOtherPages()) { - Boolean aBoolean = pageMap.get(multiPageModel.getPageKey(), otherPage); - if (aBoolean == null) { - pageMap.put(multiPageModel.getPageKey(), otherPage, Boolean.FALSE); - } - } - } - //check if all pages are processed - Map booleanMap = pageMap.get(multiPageModel.getPageKey()); - objectMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), multiPageModel); - if (booleanMap == null) { - return; - } - for (Map.Entry stringBooleanEntry : booleanMap.entrySet()) { - if (!stringBooleanEntry.getValue()) { - iterator.remove(); - return; - } - } - List> entryList = new ArrayList>(); - entryList.addAll(objectMap.get(multiPageModel.getPageKey()).entrySet()); - if (entryList.size() != 0) { - Collections.sort(entryList, new Comparator>() { - @Override - public int compare(Map.Entry o1, Map.Entry o2) { - try { - int i1 = Integer.parseInt(o1.getKey()); - int i2 = Integer.parseInt(o2.getKey()); - return i1 - i2; - } catch (NumberFormatException e) { - return o1.getKey().compareTo(o2.getKey()); + //这次处理的部分,设置为完成 + pageMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), Boolean.FALSE); + //每个key单独加锁 + synchronized (pageMap.get(multiPageModel.getPageKey())) { + pageMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), Boolean.TRUE); + //其他需要拼凑的部分 + if (multiPageModel.getOtherPages() != null) { + for (String otherPage : multiPageModel.getOtherPages()) { + Boolean aBoolean = pageMap.get(multiPageModel.getPageKey(), otherPage); + if (aBoolean == null) { + pageMap.put(multiPageModel.getPageKey(), otherPage, Boolean.FALSE); } } - }); - MultiPageModel value = entryList.get(0).getValue(); - for (int i = 1; i < entryList.size(); i++) { - value = value.combine(entryList.get(i).getValue()); } - objectEntry.setValue(value); + //check if all pages are processed + Map booleanMap = pageMap.get(multiPageModel.getPageKey()); + objectMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), multiPageModel); + if (booleanMap == null) { + return; + } + // /过滤,这次完成的page item中,还未拼凑完整的item,不进入下一个pipeline + for (Map.Entry stringBooleanEntry : booleanMap.entrySet()) { + if (!stringBooleanEntry.getValue()) { + iterator.remove(); + return; + } + } + List> entryList = new ArrayList>(); + entryList.addAll(objectMap.get(multiPageModel.getPageKey()).entrySet()); + if (entryList.size() != 0) { + Collections.sort(entryList, new Comparator>() { + @Override + public int compare(Map.Entry o1, Map.Entry o2) { + try { + int i1 = Integer.parseInt(o1.getKey()); + int i2 = Integer.parseInt(o2.getKey()); + return i1 - i2; + } catch (NumberFormatException e) { + return o1.getKey().compareTo(o2.getKey()); + } + } + }); + // 合并 + MultiPageModel value = entryList.get(0).getValue(); + for (int i = 1; i < entryList.size(); i++) { + value = value.combine(entryList.get(i).getValue()); + } + objectEntry.setValue(value); + } } } + } } diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java index 92c05c8..70e2109 100755 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java @@ -75,8 +75,9 @@ public class DoubleKeyMap extends MultiKeyMapBase { * @param value * @return value */ - public V put(K1 key1, K2 key2, V value) { + public synchronized V put(K1 key1, K2 key2, V value) { if (map.get(key1) == null) { + //不加锁的话,多个线程有可能都会执行到这里 map.put(key1, this.newMap()); } return get(key1).put(key2, value); @@ -87,7 +88,7 @@ public class DoubleKeyMap extends MultiKeyMapBase { * @param key2 * @return value */ - public V remove(K1 key1, K2 key2) { + public synchronized V remove(K1 key1, K2 key2) { if (get(key1) == null) { return null; }