Prepare billing pipeline for production

This makes a few cosmetic changes that prepares the pipeline for production.

Namely:
- Converts file names to include the input yearMonth, mostly mirroring the original invoicing pipeline.
- Factors out the yearMonth logic from the reporting module to the more common backend module. We will likely use the default yearMonth logic in other backend tasks (such as spec11 reporting).
- Adds the "withTemplateCompatability" flag to the Bigquery read, which allows multiple uses of the same template.
- Adds the 'billing' task queue, which retries up to 5 times every 3 minutes, which is about the rate we desire for checking if the pipeline is complete.
- Adds a shell 'invoicing upload' class, which tests the retry semantics we want for post-generation work (e-mailing the invoice to crr-tech, and publishing detail reports)

While this cl may look big, it's mostly just a refactor and setting up boilerplate needed to frame the upload logic.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=179849586
This commit is contained in:
larryruili 2017-12-21 13:13:23 -08:00 committed by Ben McIlwain
parent 53ed6035c4
commit 552ab12314
24 changed files with 436 additions and 115 deletions

View file

@ -17,6 +17,7 @@ java_library(
"//java/google/registry/util",
"@com_google_api_client_appengine",
"@com_google_apis_google_api_services_dataflow",
"@com_google_appengine_api_1_0_sdk",
"@com_google_dagger",
"@com_google_guava",
"@com_google_http_client",

View file

@ -14,6 +14,8 @@
package google.registry.billing;
import static google.registry.request.RequestParameters.extractRequiredParameter;
import com.google.api.client.googleapis.extensions.appengine.auth.oauth2.AppIdentityCredential;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.json.JsonFactory;
@ -22,8 +24,10 @@ import com.google.common.collect.ImmutableSet;
import dagger.Module;
import dagger.Provides;
import google.registry.config.RegistryConfig.Config;
import google.registry.request.Parameter;
import java.util.Set;
import java.util.function.Function;
import javax.servlet.http.HttpServletRequest;
/** Module for dependencies required by monthly billing actions. */
@Module
@ -31,6 +35,15 @@ public final class BillingModule {
private static final String CLOUD_PLATFORM_SCOPE =
"https://www.googleapis.com/auth/cloud-platform";
static final String BILLING_QUEUE = "billing";
static final String PARAM_JOB_ID = "jobId";
/** Provides the invoicing Dataflow jobId enqueued by {@link GenerateInvoicesAction}. */
@Provides
@Parameter(PARAM_JOB_ID)
static String provideJobId(HttpServletRequest req) {
return extractRequiredParameter(req, PARAM_JOB_ID);
}
/** Constructs a {@link Dataflow} API client with default settings. */
@Provides

View file

@ -22,6 +22,9 @@ import com.google.api.services.dataflow.Dataflow;
import com.google.api.services.dataflow.model.LaunchTemplateParameters;
import com.google.api.services.dataflow.model.LaunchTemplateResponse;
import com.google.api.services.dataflow.model.RuntimeEnvironment;
import com.google.appengine.api.taskqueue.QueueFactory;
import com.google.appengine.api.taskqueue.TaskOptions;
import com.google.common.collect.ImmutableMap;
import com.google.common.net.MediaType;
import google.registry.config.RegistryConfig.Config;
import google.registry.request.Action;
@ -30,9 +33,12 @@ import google.registry.request.auth.Auth;
import google.registry.util.FormattingLogger;
import java.io.IOException;
import javax.inject.Inject;
import org.joda.time.Duration;
import org.joda.time.YearMonth;
/**
* Invokes the {@code InvoicingPipeline} beam template via the REST api.
* Invokes the {@code InvoicingPipeline} beam template via the REST api, and enqueues the {@link
* PublishInvoicesAction} to publish the subsequent output.
*
* <p>This action runs the {@link google.registry.beam.InvoicingPipeline} beam template, staged at
* gs://<projectId>-beam/templates/invoicing. The pipeline then generates invoices for the month and
@ -43,25 +49,35 @@ public class GenerateInvoicesAction implements Runnable {
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
@Inject @Config("projectId") String projectId;
@Inject @Config("apacheBeamBucketUrl") String beamBucketUrl;
@Inject
@Config("projectId")
String projectId;
@Inject
@Config("apacheBeamBucketUrl")
String beamBucketUrl;
@Inject YearMonth yearMonth;
@Inject Dataflow dataflow;
@Inject Response response;
@Inject GenerateInvoicesAction() {}
@Inject
GenerateInvoicesAction() {}
static final String PATH = "/_dr/task/generateInvoices";
@Override
public void run() {
logger.info("Launching dataflow job");
logger.infofmt("Launching invoicing pipeline for %s", yearMonth);
try {
LaunchTemplateParameters params =
new LaunchTemplateParameters()
.setJobName("test-invoicing")
.setJobName(String.format("invoicing-%s", yearMonth))
.setEnvironment(
new RuntimeEnvironment()
.setZone("us-east1-c")
.setTempLocation(beamBucketUrl + "/temp"));
.setTempLocation(beamBucketUrl + "/temporary"))
.setParameters(ImmutableMap.of("yearMonth", yearMonth.toString("yyyy-MM")));
LaunchTemplateResponse launchResponse =
dataflow
.projects()
@ -70,6 +86,14 @@ public class GenerateInvoicesAction implements Runnable {
.setGcsPath(beamBucketUrl + "/templates/invoicing")
.execute();
logger.infofmt("Got response: %s", launchResponse.getJob().toPrettyString());
String jobId = launchResponse.getJob().getId();
TaskOptions uploadTask =
TaskOptions.Builder.withUrl(PublishInvoicesAction.PATH)
.method(TaskOptions.Method.POST)
// Dataflow jobs tend to take about 10 minutes to complete.
.countdownMillis(Duration.standardMinutes(10).getMillis())
.param(BillingModule.PARAM_JOB_ID, jobId);
QueueFactory.getQueue(BillingModule.BILLING_QUEUE).add(uploadTask);
} catch (IOException e) {
logger.warningfmt("Template Launch failed due to: %s", e.getMessage());
response.setStatus(SC_INTERNAL_SERVER_ERROR);

View file

@ -0,0 +1,89 @@
// Copyright 2017 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.billing;
import static google.registry.request.Action.Method.POST;
import static javax.servlet.http.HttpServletResponse.SC_INTERNAL_SERVER_ERROR;
import static javax.servlet.http.HttpServletResponse.SC_NOT_MODIFIED;
import static javax.servlet.http.HttpServletResponse.SC_NO_CONTENT;
import static javax.servlet.http.HttpServletResponse.SC_OK;
import com.google.api.services.dataflow.Dataflow;
import com.google.api.services.dataflow.model.Job;
import com.google.common.net.MediaType;
import google.registry.config.RegistryConfig.Config;
import google.registry.request.Action;
import google.registry.request.Parameter;
import google.registry.request.Response;
import google.registry.request.auth.Auth;
import google.registry.util.FormattingLogger;
import java.io.IOException;
import javax.inject.Inject;
/**
* Uploads the results of the {@link google.registry.beam.InvoicingPipeline}.
*
* <p>This relies on the retry semantics in {@code queue.xml} to ensure proper upload, in spite of
* fluctuations in generation timing.
*
* @see <a href=https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState>
* Job States</a>
*/
@Action(path = PublishInvoicesAction.PATH, method = POST, auth = Auth.AUTH_INTERNAL_OR_ADMIN)
public class PublishInvoicesAction implements Runnable {
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
private static final String JOB_DONE = "JOB_STATE_DONE";
private static final String JOB_FAILED = "JOB_STATE_FAILED";
@Inject @Config("projectId") String projectId;
@Inject @Parameter(BillingModule.PARAM_JOB_ID) String jobId;
@Inject Dataflow dataflow;
@Inject Response response;
@Inject PublishInvoicesAction() {}
static final String PATH = "/_dr/task/publishInvoices";
@Override
public void run() {
logger.info("Starting publish job.");
try {
Job job = dataflow.projects().jobs().get(projectId, jobId).execute();
String state = job.getCurrentState();
switch (state) {
case JOB_DONE:
logger.infofmt("Dataflow job %s finished successfully.", jobId);
response.setStatus(SC_OK);
// TODO(larryruili): Implement upload logic.
break;
case JOB_FAILED:
logger.severefmt("Dataflow job %s finished unsuccessfully.", jobId);
// Return a 'success' code to stop task queue retry.
response.setStatus(SC_NO_CONTENT);
// TODO(larryruili): Implement failure response.
break;
default:
logger.infofmt("Job in non-terminal state %s, retrying:", state);
response.setStatus(SC_NOT_MODIFIED);
break;
}
} catch (IOException e) {
logger.warningfmt("Template Launch failed due to: %s", e.getMessage());
response.setStatus(SC_INTERNAL_SERVER_ERROR);
response.setContentType(MediaType.PLAIN_TEXT_UTF_8);
response.setPayload(String.format("Template launch failed: %s", e.getMessage()));
}
}
}