Add preliminary spec11 monthly pipeline

This adds the scaffolding for a basic Spec11 pipeline- it gathers all domains from all time for a given project and counts how many there are. I've factored out a few common utilities for beam pipelines to avoid excessive duplication.

Future CLs will:
- Actually process domains via the SafeBrowsing API
- Generate a real spec11 report
- Template queries based on the input YearMonth
- Abstract more commonalities across beam pipelines to reduce boilerplate when adding new pipelines.

TESTED: FOSS test passed, and ran successfully on alpha

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=205997741
This commit is contained in:
larryruili 2018-07-25 08:34:58 -07:00 committed by jianglai
parent ded40851d3
commit d199b383e5
14 changed files with 252 additions and 38 deletions

View file

@ -14,6 +14,9 @@
package google.registry.beam.invoicing;
import static google.registry.beam.BeamUtils.checkFieldsNotNull;
import static google.registry.beam.BeamUtils.extractField;
import com.google.auto.value.AutoValue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
@ -108,7 +111,7 @@ public abstract class BillingEvent implements Serializable {
* Apache AVRO GenericRecord</a>
*/
static BillingEvent parseFromRecord(SchemaAndRecord schemaAndRecord) {
checkFieldsNotNull(schemaAndRecord);
checkFieldsNotNull(FIELD_NAMES, schemaAndRecord);
GenericRecord record = schemaAndRecord.getRecord();
String flags = extractField(record, "flags");
double amount = getDiscountedAmount(Double.parseDouble(extractField(record, "amount")), flags);
@ -337,30 +340,4 @@ public abstract class BillingEvent implements Serializable {
}
}
}
/** Extracts a string representation of a field in a {@code GenericRecord}. */
private static String extractField(GenericRecord record, String fieldName) {
return String.valueOf(record.get(fieldName));
}
/**
* Checks that no expected fields in the record are missing.
*
* <p>Note that this simply makes sure the field is not null; it may still generate a parse error
* in {@code parseFromRecord}.
*/
private static void checkFieldsNotNull(SchemaAndRecord schemaAndRecord) {
GenericRecord record = schemaAndRecord.getRecord();
ImmutableList<String> nullFields =
FIELD_NAMES
.stream()
.filter(fieldName -> record.get(fieldName) == null)
.collect(ImmutableList.toImmutableList());
if (!nullFields.isEmpty()) {
logger.atSevere().log(
"Found unexpected null value(s) in field(s) %s for record %s",
Joiner.on(", ").join(nullFields), record);
throw new IllegalStateException("Read null value from Bigquery query");
}
}
}