From 52bb8a57737ad0bf373344c4c9ea47e1b905042d Mon Sep 17 00:00:00 2001 From: mcilwain Date: Wed, 24 Aug 2016 08:36:18 -0700 Subject: [PATCH] Reduce default [] shards to 100 We've continuously had concurrent modification exceptions for our regularly occurring []s that run on thousands of shards, perhaps unnecessarily so. These exceptions started after the last major [] framework refactoring, which changed the default number of shards from 100 to essentially infinite. I don't think infinite is the way to go, and 100 shards should be more than sufficient for anything we're currently running. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=131175353 --- .../google/registry/mapreduce/MapreduceRunner.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/java/google/registry/mapreduce/MapreduceRunner.java b/java/google/registry/mapreduce/MapreduceRunner.java index ea387c098..4ac8ac242 100644 --- a/java/google/registry/mapreduce/MapreduceRunner.java +++ b/java/google/registry/mapreduce/MapreduceRunner.java @@ -67,8 +67,18 @@ public class MapreduceRunner { private String jobName; private String moduleName; - // Defaults for number of mappers/reducers if not specified in HTTP params. - private int defaultMapShards = Integer.MAX_VALUE; + // Defaults for number of mappers/reducers if not specified in HTTP params. The max allowable + // count for both (which is specified in the App Engine mapreduce framework) is 1000. We use 100 + // mapper shards because there's a bottleneck in the App Engine mapreduce framework caused by + // updating the mapreduce status on a single Datastore entity (which only supports so many writes + // per second). The existing mapreduces don't actually do that much work for TLDs that aren't + // .com-sized, so the shards finish so quickly that contention becomes a problem. This number can + // always be tuned up for large registry systems with on the order of hundreds of thousands of + // entities on up. + // The default reducer shard count is one because most mapreduces use it to collate and output + // results. The ones that actually perform a substantial amount of work in a reduce step use a + // higher non-default number of reducer shards. + private int defaultMapShards = 100; private int defaultReduceShards = 1; /**