mirror of
https://github.com/google/nomulus.git
synced 2025-05-19 02:39:34 +02:00
Fix open source build
It broke because I forgot to add the new spec11 packages to gtld. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=206021827
This commit is contained in:
parent
d199b383e5
commit
c87fde605c
10 changed files with 976 additions and 0 deletions
26
java/google/registry/beam/spec11/BUILD
Normal file
26
java/google/registry/beam/spec11/BUILD
Normal file
|
@ -0,0 +1,26 @@
|
|||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
java_library(
|
||||
name = "spec11",
|
||||
srcs = glob(["*.java"]),
|
||||
resources = glob(["sql/*"]),
|
||||
deps = [
|
||||
"//java/google/registry/beam",
|
||||
"//java/google/registry/config",
|
||||
"@com_google_auto_value",
|
||||
"@com_google_dagger",
|
||||
"@com_google_flogger",
|
||||
"@com_google_flogger_system_backend",
|
||||
"@com_google_guava",
|
||||
"@javax_inject",
|
||||
"@org_apache_avro",
|
||||
"@org_apache_beam_runners_direct_java",
|
||||
"@org_apache_beam_runners_google_cloud_dataflow_java",
|
||||
"@org_apache_beam_sdks_java_core",
|
||||
"@org_apache_beam_sdks_java_io_google_cloud_platform",
|
||||
],
|
||||
)
|
118
java/google/registry/beam/spec11/Spec11Pipeline.java
Normal file
118
java/google/registry/beam/spec11/Spec11Pipeline.java
Normal file
|
@ -0,0 +1,118 @@
|
|||
// Copyright 2018 The Nomulus Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package google.registry.beam.spec11;
|
||||
|
||||
import google.registry.config.RegistryConfig.Config;
|
||||
import java.io.Serializable;
|
||||
import javax.inject.Inject;
|
||||
import org.apache.beam.runners.dataflow.DataflowRunner;
|
||||
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
|
||||
import org.apache.beam.sdk.Pipeline;
|
||||
import org.apache.beam.sdk.coders.SerializableCoder;
|
||||
import org.apache.beam.sdk.io.TextIO;
|
||||
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
|
||||
import org.apache.beam.sdk.options.Description;
|
||||
import org.apache.beam.sdk.options.PipelineOptionsFactory;
|
||||
import org.apache.beam.sdk.options.ValueProvider;
|
||||
import org.apache.beam.sdk.transforms.Count;
|
||||
import org.apache.beam.sdk.transforms.ToString;
|
||||
import org.apache.beam.sdk.values.PCollection;
|
||||
|
||||
/**
|
||||
* Definition of a Dataflow pipeline template, which generates a given month's spec11 report.
|
||||
*
|
||||
* <p>To stage this template on GCS, run the {@link
|
||||
* google.registry.tools.DeploySpec11PipelineCommand} Nomulus command.
|
||||
*
|
||||
* <p>Then, you can run the staged template via the API client library, gCloud or a raw REST call.
|
||||
*
|
||||
* @see <a href="https://cloud.google.com/dataflow/docs/templates/overview">Dataflow Templates</a>
|
||||
*/
|
||||
public class Spec11Pipeline implements Serializable {
|
||||
|
||||
@Inject
|
||||
@Config("projectId")
|
||||
String projectId;
|
||||
|
||||
@Inject
|
||||
@Config("beamStagingUrl")
|
||||
String beamStagingUrl;
|
||||
|
||||
@Inject
|
||||
@Config("spec11TemplateUrl")
|
||||
String spec11TemplateUrl;
|
||||
|
||||
@Inject
|
||||
@Config("spec11BucketUrl")
|
||||
String spec11BucketUrl;
|
||||
|
||||
@Inject
|
||||
Spec11Pipeline() {}
|
||||
|
||||
/** Custom options for running the spec11 pipeline. */
|
||||
interface Spec11PipelineOptions extends DataflowPipelineOptions {
|
||||
/** Returns the yearMonth we're generating the report for, in yyyy-MM format. */
|
||||
@Description("The yearMonth we generate the report for, in yyyy-MM format.")
|
||||
ValueProvider<String> getYearMonth();
|
||||
|
||||
/**
|
||||
* Sets the yearMonth we generate invoices for.
|
||||
*
|
||||
* <p>This is implicitly set when executing the Dataflow template, by specifying the 'yearMonth
|
||||
* parameter.
|
||||
*/
|
||||
void setYearMonth(ValueProvider<String> value);
|
||||
}
|
||||
|
||||
/** Deploys the spec11 pipeline as a template on GCS, for a given projectID and GCS bucket. */
|
||||
public void deploy() {
|
||||
// We can't store options as a member variable due to serialization concerns.
|
||||
Spec11PipelineOptions options = PipelineOptionsFactory.as(Spec11PipelineOptions.class);
|
||||
options.setProject(projectId);
|
||||
options.setRunner(DataflowRunner.class);
|
||||
// This causes p.run() to stage the pipeline as a template on GCS, as opposed to running it.
|
||||
options.setTemplateLocation(spec11TemplateUrl);
|
||||
options.setStagingLocation(beamStagingUrl);
|
||||
Pipeline p = Pipeline.create(options);
|
||||
PCollection<Subdomain> domains =
|
||||
p.apply(
|
||||
"Read active domains from BigQuery",
|
||||
BigQueryIO.read(Subdomain::parseFromRecord)
|
||||
.fromQuery(
|
||||
// This query must be customized for your own use.
|
||||
"SELECT * FROM YOUR_TABLE_HERE")
|
||||
.withCoder(SerializableCoder.of(Subdomain.class))
|
||||
.usingStandardSql()
|
||||
.withoutValidation()
|
||||
.withTemplateCompatibility());
|
||||
countDomainsAndOutputResults(domains);
|
||||
p.run();
|
||||
}
|
||||
|
||||
/** Globally count the number of elements and output the results to GCS. */
|
||||
void countDomainsAndOutputResults(PCollection<Subdomain> domains) {
|
||||
// TODO(b/111545355): Actually process each domain with the SafeBrowsing API
|
||||
domains
|
||||
.apply("Count number of subdomains", Count.globally())
|
||||
.apply("Convert global count to string", ToString.elements())
|
||||
.apply(
|
||||
"Output to text file",
|
||||
TextIO.write()
|
||||
// TODO(b/111545355): Replace this with a templated directory based on yearMonth
|
||||
.to(spec11BucketUrl)
|
||||
.withoutSharding()
|
||||
.withHeader("HELLO WORLD"));
|
||||
}
|
||||
}
|
84
java/google/registry/beam/spec11/Subdomain.java
Normal file
84
java/google/registry/beam/spec11/Subdomain.java
Normal file
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2018 The Nomulus Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package google.registry.beam.spec11;
|
||||
|
||||
import static google.registry.beam.BeamUtils.checkFieldsNotNull;
|
||||
import static google.registry.beam.BeamUtils.extractField;
|
||||
|
||||
import com.google.auto.value.AutoValue;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.flogger.FluentLogger;
|
||||
import java.io.Serializable;
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord;
|
||||
|
||||
/**
|
||||
* A POJO representing a single subdomain, parsed from a {@code SchemaAndRecord}.
|
||||
*
|
||||
* <p>This is a trivially serializable class that allows Beam to transform the results of a Bigquery
|
||||
* query into a standard Java representation, giving us the type guarantees and ease of manipulation
|
||||
* Bigquery lacks, while localizing any Bigquery-side failures to the {@link #parseFromRecord}
|
||||
* function.
|
||||
*/
|
||||
@AutoValue
|
||||
public abstract class Subdomain implements Serializable {
|
||||
|
||||
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
|
||||
|
||||
private static final ImmutableList<String> FIELD_NAMES =
|
||||
ImmutableList.of("fullyQualifiedDomainName", "statuses", "creationTime");
|
||||
|
||||
/** Returns the fully qualified domain name. */
|
||||
abstract String fullyQualifiedDomainName();
|
||||
/** Returns the UTC DateTime this domain was created. */
|
||||
abstract ZonedDateTime creationTime();
|
||||
/** Returns the space-delimited list of statuses on this domain. */
|
||||
abstract String statuses();
|
||||
|
||||
/**
|
||||
* Constructs a {@link Subdomain} from an Apache Avro {@code SchemaAndRecord}.
|
||||
*
|
||||
* @see <a
|
||||
* href=http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericData.Record.html>
|
||||
* Apache AVRO GenericRecord</a>
|
||||
*/
|
||||
static Subdomain parseFromRecord(SchemaAndRecord schemaAndRecord) {
|
||||
checkFieldsNotNull(FIELD_NAMES, schemaAndRecord);
|
||||
GenericRecord record = schemaAndRecord.getRecord();
|
||||
return create(
|
||||
extractField(record, "fullyQualifiedDomainName"),
|
||||
// Bigquery provides UNIX timestamps with microsecond precision.
|
||||
Instant.ofEpochMilli(Long.parseLong(extractField(record, "creationTime")) / 1000)
|
||||
.atZone(ZoneId.of("UTC")),
|
||||
extractField(record, "statuses"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a concrete {@link Subdomain}.
|
||||
*
|
||||
* <p>This should only be used outside this class for testing- instances of {@link Subdomain}
|
||||
* should otherwise come from {@link #parseFromRecord}.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static Subdomain create(
|
||||
String fullyQualifiedDomainName, ZonedDateTime creationTime, String statuses) {
|
||||
return new AutoValue_Subdomain(fullyQualifiedDomainName, creationTime, statuses);
|
||||
}
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue