Add a cron job to run the RDE Beam pipeline in parallel with MapReduce (#1500)

This commit is contained in:
Lai Jiang 2022-01-21 23:36:13 -05:00 committed by GitHub
parent 7a14f23c79
commit 4892b03ffb
2 changed files with 16 additions and 0 deletions

View file

@ -248,6 +248,9 @@ public class RdeIO {
// Now that we're done, output roll the cursor forward.
if (key.manual()) {
logger.atInfo().log("Manual operation; not advancing cursor or enqueuing upload task.");
// Temporary measure to run RDE in beam in parallel with the daily MapReduce based RDE runs.
} else if (tm().isOfy()) {
logger.atInfo().log("Ofy is primary TM; not advancing cursor or enqueuing upload task.");
} else {
outputReceiver.output(KV.of(key, revision));
}

View file

@ -36,6 +36,19 @@
<target>backend</target>
</cron>
<cron>
<url>/_dr/task/rdeStaging?beam=true</url>
<description>
This job generates a full RDE escrow deposit as a single gigantic XML
document using the Beam pipeline regardless of the current TM
configuration and streams it to cloud storage. It does not trigger the
subsequent upload tasks and is meant to run parallel with the main cron
job in order to compare the results from both runs.
</description>
<schedule>every 8 hours from 00:07 to 20:00</schedule>
<target>backend</target>
</cron>
<cron>
<url><![CDATA[/_dr/cron/fanout?queue=rde-upload&endpoint=/_dr/task/rdeUpload&forEachRealTld]]></url>
<description>