mv com/google/domain/registry google/registry

This change renames directories in preparation for the great package
rename. The repository is now in a broken state because the code
itself hasn't been updated. However this should ensure that git
correctly preserves history for each file.
This commit is contained in:
Justine Tunney 2016-05-13 18:55:08 -04:00
parent a41677aea1
commit 5012893c1d
2396 changed files with 0 additions and 0 deletions

View file

@ -0,0 +1,32 @@
package(
default_visibility = ["//java/com/google/domain/registry:registry_project"],
)
java_library(
name = "bigquery",
srcs = glob(["*.java"]),
visibility = ["//visibility:public"],
deps = [
"//apiserving/discoverydata/bigquery:bigqueryv2",
"//java/com/google/api/client/extensions/appengine/http",
"//java/com/google/api/client/googleapis/auth/oauth2",
"//java/com/google/api/client/googleapis/extensions/appengine/auth/oauth2",
"//java/com/google/api/client/googleapis/json",
"//java/com/google/api/client/http",
"//java/com/google/api/client/json",
"//java/com/google/api/client/json/jackson2",
"//java/com/google/common/annotations",
"//java/com/google/common/base",
"//java/com/google/common/collect",
"//java/com/google/common/io",
"//java/com/google/common/util/concurrent",
"//java/com/google/domain/registry/config",
"//java/com/google/domain/registry/request",
"//java/com/google/domain/registry/util",
"//third_party/java/dagger",
"//third_party/java/joda_time",
"//third_party/java/jsr305_annotations",
"//third_party/java/jsr330_inject",
],
)

View file

@ -0,0 +1,775 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.bigquery;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.base.Verify.verify;
import static com.google.domain.registry.bigquery.BigqueryUtils.toJobReferenceString;
import static org.joda.time.DateTimeZone.UTC;
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
import com.google.api.client.http.AbstractInputStreamContent;
import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.model.Dataset;
import com.google.api.services.bigquery.model.DatasetReference;
import com.google.api.services.bigquery.model.ErrorProto;
import com.google.api.services.bigquery.model.GetQueryResultsResponse;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.JobConfiguration;
import com.google.api.services.bigquery.model.JobConfigurationExtract;
import com.google.api.services.bigquery.model.JobConfigurationLoad;
import com.google.api.services.bigquery.model.JobConfigurationQuery;
import com.google.api.services.bigquery.model.JobReference;
import com.google.api.services.bigquery.model.JobStatistics;
import com.google.api.services.bigquery.model.JobStatus;
import com.google.api.services.bigquery.model.Table;
import com.google.api.services.bigquery.model.TableCell;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.ViewDefinition;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableTable;
import com.google.common.io.BaseEncoding;
import com.google.common.util.concurrent.AsyncFunction;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.domain.registry.bigquery.BigqueryUtils.DestinationFormat;
import com.google.domain.registry.bigquery.BigqueryUtils.SourceFormat;
import com.google.domain.registry.bigquery.BigqueryUtils.TableType;
import com.google.domain.registry.bigquery.BigqueryUtils.WriteDisposition;
import com.google.domain.registry.config.RegistryEnvironment;
import com.google.domain.registry.util.FormattingLogger;
import com.google.domain.registry.util.NonFinalForTesting;
import com.google.domain.registry.util.Sleeper;
import com.google.domain.registry.util.SqlTemplate;
import com.google.domain.registry.util.SystemSleeper;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import javax.annotation.Nullable;
/** Class encapsulating parameters and state for accessing the Bigquery API. */
public class BigqueryConnection implements AutoCloseable {
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
private static final Duration MIN_POLL_INTERVAL = Duration.millis(500);
@NonFinalForTesting
private static Sleeper sleeper = new SystemSleeper();
/** Default name of the default dataset to use for requests to the API. */
public static final String DEFAULT_DATASET_NAME = "testing";
/** Default dataset to use for storing temporary tables. */
private static final String TEMP_DATASET_NAME = "__temp__";
/** Default time to live for temporary tables. */
private static final Duration TEMP_TABLE_TTL = Duration.standardHours(24);
/** Bigquery client instance wrapped by this class. */
private Bigquery bigquery;
/** Executor service for bigquery jobs. */
private ListeningExecutorService service;
/** Credential object to use for initializing HTTP requests to the bigquery API. */
private HttpRequestInitializer credential;
/** HTTP transport object to use for accessing bigquery API. */
private HttpTransport httpTransport;
/** JSON factory object to use for accessing bigquery API. */
private JsonFactory jsonFactory;
/** Pseudo-randomness source to use for creating random table names. */
private Random random = new Random();
/** Name of the default dataset to use for inserting tables. */
private String datasetId = DEFAULT_DATASET_NAME;
/** Whether to automatically overwrite existing tables and views. */
private boolean overwrite = false;
/** Duration to wait between polls for job status. */
private Duration pollInterval = Duration.millis(1000);
/** Builder for a {@link BigqueryConnection}, since the latter is immutable once created. */
public static class Builder {
private BigqueryConnection instance;
public Builder() {
instance = new BigqueryConnection();
}
/**
* The BigqueryConnection takes ownership of this {@link ExecutorService} and will
* shut it down when the BigqueryConnection is closed.
*/
public Builder setExecutorService(ExecutorService executorService) {
instance.service = MoreExecutors.listeningDecorator(executorService);
return this;
}
public Builder setCredential(GoogleCredential credential) {
instance.credential = checkNotNull(credential);
instance.httpTransport = credential.getTransport();
instance.jsonFactory = credential.getJsonFactory();
return this;
}
public Builder setDatasetId(String datasetId) {
instance.datasetId = checkNotNull(datasetId);
return this;
}
public Builder setOverwrite(boolean overwrite) {
instance.overwrite = overwrite;
return this;
}
public Builder setPollInterval(Duration pollInterval) {
checkArgument(
!pollInterval.isShorterThan(MIN_POLL_INTERVAL),
"poll interval must be at least %ldms", MIN_POLL_INTERVAL.getMillis());
instance.pollInterval = pollInterval;
return this;
}
public BigqueryConnection build() {
try {
checkNotNull(instance.service, "Must provide executor service");
return instance;
} finally {
// Clear the internal instance so you can't accidentally mutate it through this builder.
instance = null;
}
}
}
/**
* Class that wraps a normal Bigquery API Table object to make it immutable from the client side
* and give it additional semantics as a "destination" for load or query jobs, with an overwrite
* flag set by the client upon creation.
* <p>
* Additionally provides encapsulation so that clients of BigqueryConnection don't need to take
* any direct dependencies on Bigquery API classes and can instead use DestinationTable.
*/
public static class DestinationTable {
/** The wrapped Bigquery API Table object. */
private final Table table;
/** The type of this table. */
private final TableType type;
/** The write disposition for jobs writing to this destination table. */
private final WriteDisposition writeDisposition;
/**
* A query to package with this table if the type is VIEW; not immutable but also not visible
* to clients.
*/
private String query;
/** A builder for DestinationTable. */
public static final class Builder {
private final Table table = new Table();
private final TableReference tableRef = new TableReference();
private TableType type = TableType.TABLE;
private WriteDisposition writeDisposition = WriteDisposition.WRITE_EMPTY;
public Builder datasetId(String datasetId) {
tableRef.setDatasetId(datasetId);
return this;
}
public Builder name(String name) {
tableRef.setTableId(name);
return this;
}
public Builder description(String description) {
table.setDescription(description);
return this;
}
public Builder type(TableType type) {
this.type = type;
return this;
}
public Builder timeToLive(Duration duration) {
this.table.setExpirationTime(new DateTime(UTC).plus(duration).getMillis());
return this;
}
public Builder overwrite(boolean overwrite) {
if (overwrite) {
this.writeDisposition = WriteDisposition.WRITE_TRUNCATE;
}
return this;
}
public Builder append(boolean append) {
if (append) {
this.writeDisposition = WriteDisposition.WRITE_APPEND;
}
return this;
}
public DestinationTable build() {
tableRef.setProjectId(getEnvironmentProjectId());
table.setTableReference(tableRef);
checkState(!isNullOrEmpty(table.getTableReference().getDatasetId()));
checkState(!isNullOrEmpty(table.getTableReference().getTableId()));
return new DestinationTable(this);
}
}
/** Constructs a new DestinationTable from its Builder. */
private DestinationTable(Builder b) {
table = b.table.clone();
type = b.type;
writeDisposition = b.writeDisposition;
}
/**
* Stores the provided query with this DestinationTable and returns it; used for packaging
* a query along with the DestinationTable before sending it to the table update logic.
*/
private DestinationTable withQuery(String query) {
checkState(type == TableType.VIEW);
this.query = query;
return this;
}
/** Returns a new copy of the Bigquery API Table object wrapped by this DestinationTable. */
private Table getTable() {
Table tableCopy = table.clone();
if (type == TableType.VIEW) {
tableCopy.setView(new ViewDefinition().setQuery(query));
}
return tableCopy;
}
/** Returns the write disposition that should be used for jobs writing to this table. */
private WriteDisposition getWriteDisposition() {
return writeDisposition;
}
/** Returns a new copy of the TableReference for the Table wrapped by this DestinationTable. */
private TableReference getTableReference() {
return table.getTableReference().clone();
}
/** Returns a string representation of the TableReference for the wrapped table. */
public String getStringReference() {
return tableReferenceToString(table.getTableReference());
}
/** Returns a string representation of the given TableReference. */
private static String tableReferenceToString(TableReference tableRef) {
return String.format(
"%s:%s.%s",
tableRef.getProjectId(),
tableRef.getDatasetId(),
tableRef.getTableId());
}
}
/**
* Initializes the BigqueryConnection object by setting up the API client and creating the
* default dataset if it doesn't exist.
*/
public BigqueryConnection initialize() throws Exception {
bigquery = new Bigquery.Builder(httpTransport, jsonFactory, credential)
.setApplicationName(getClass().getSimpleName())
.build();
createDatasetIfNeeded(datasetId);
createDatasetIfNeeded(TEMP_DATASET_NAME);
return this;
}
/**
* Closes the BigqueryConnection object by shutting down the executor service. Clients
* should only call this after all ListenableFutures obtained from BigqueryConnection methods
* have resolved; this method does not block on their completion.
*/
@Override
public void close() {
service.shutdown();
}
/** Returns a partially built DestinationTable with the default dataset and overwrite behavior. */
public DestinationTable.Builder buildDestinationTable(String tableName) {
return new DestinationTable.Builder()
.datasetId(datasetId)
.type(TableType.TABLE)
.name(tableName)
.overwrite(overwrite);
}
/**
* Returns a partially built DestinationTable with a randomly generated name under the default
* temporary table dataset, with the default TTL and overwrite behavior.
*/
public DestinationTable.Builder buildTemporaryTable() {
return new DestinationTable.Builder()
.datasetId(TEMP_DATASET_NAME)
.type(TableType.TABLE)
.name(getRandomTableName())
.timeToLive(TEMP_TABLE_TTL)
.overwrite(overwrite);
}
/** Returns a random table name consisting only of the chars {@code [a-v0-9_]}. */
private String getRandomTableName() {
byte[] randBytes = new byte[8]; // 64 bits of randomness ought to be plenty.
random.nextBytes(randBytes);
return "_" + BaseEncoding.base32Hex().lowerCase().omitPadding().encode(randBytes);
}
/**
* A function that updates the specified Bigquery table to reflect the metadata from the input
* DestinationTable, passing the same DestinationTable through as the output. If the specified
* table does not already exist, it will be inserted into the dataset.
* <p>
* Clients can call this function directly to update a table on demand, or can pass it to
* Futures.transform() to update a table produced as the asynchronous result of a load or query
* job (e.g. to add a description to it).
*/
private class UpdateTableFunction implements Function<DestinationTable, DestinationTable> {
@Override
public DestinationTable apply(final DestinationTable destinationTable) {
Table table = destinationTable.getTable();
TableReference ref = table.getTableReference();
try {
if (checkTableExists(ref.getDatasetId(), ref.getTableId())) {
bigquery.tables()
.update(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(), table)
.execute();
} else {
bigquery.tables()
.insert(ref.getProjectId(), ref.getDatasetId(), table)
.execute();
}
return destinationTable;
} catch (IOException e) {
throw BigqueryJobFailureException.create(e);
}
}
}
/**
* Starts an asynchronous load job to populate the specified destination table with the given
* source URIs and source format. Returns a ListenableFuture that holds the same destination
* table object on success.
*/
public ListenableFuture<DestinationTable> load(
DestinationTable dest,
SourceFormat sourceFormat,
Iterable<String> sourceUris) throws Exception {
Job job = new Job()
.setConfiguration(new JobConfiguration()
.setLoad(new JobConfigurationLoad()
.setWriteDisposition(dest.getWriteDisposition().toString())
.setSourceFormat(sourceFormat.toString())
.setSourceUris(ImmutableList.copyOf(sourceUris))
.setDestinationTable(dest.getTableReference())));
return Futures.transform(runJobToCompletion(job, dest), new UpdateTableFunction());
}
/**
* Starts an asynchronous query job to populate the specified destination table with the results
* of the specified query, or if the table is a view, to update the view to reflect that query.
* Returns a ListenableFuture that holds the same destination table object on success.
*/
public ListenableFuture<DestinationTable> query(
String querySql,
DestinationTable dest) {
if (dest.type == TableType.VIEW) {
// Use Futures.transform() rather than calling apply() directly so that any exceptions thrown
// by calling UpdateTableFunction will be propagated on the get() call, not from here.
return Futures.transform(
Futures.immediateFuture(dest.withQuery(querySql)), new UpdateTableFunction());
} else {
Job job = new Job()
.setConfiguration(new JobConfiguration()
.setQuery(new JobConfigurationQuery()
.setQuery(querySql)
.setDefaultDataset(getDataset())
.setWriteDisposition(dest.getWriteDisposition().toString())
.setDestinationTable(dest.getTableReference())));
return Futures.transform(runJobToCompletion(job, dest), new UpdateTableFunction());
}
}
/**
* Starts an asynchronous query job to dump the results of the specified query into a local
* ImmutableTable object, row-keyed by the row number (indexed from 1), column-keyed by the
* TableFieldSchema for that column, and with the value object as the cell value. Note that null
* values will not actually be null, but they can be checked for using Data.isNull().
* <p>
* Returns a ListenableFuture that holds the ImmutableTable on success.
*/
public ListenableFuture<ImmutableTable<Integer, TableFieldSchema, Object>>
queryToLocalTable(String querySql) throws Exception {
Job job = new Job()
.setConfiguration(new JobConfiguration()
.setQuery(new JobConfigurationQuery()
.setQuery(querySql)
.setDefaultDataset(getDataset())));
return Futures.transform(
runJobToCompletion(job),
new Function<Job, ImmutableTable<Integer, TableFieldSchema, Object>>() {
@Override
public ImmutableTable<Integer, TableFieldSchema, Object> apply(Job job) {
return getQueryResults(job);
}});
}
/**
* Returns the query results for the given job as an ImmutableTable, row-keyed by row number
* (indexed from 1), column-keyed by the TableFieldSchema for that field, and with the value
* object as the cell value. Note that null values will not actually be null (since we're using
* ImmutableTable) but they can be checked for using Data.isNull().
* <p>
* This table is fully materialized in memory (not lazily loaded), so it should not be used with
* queries expected to return large results.
*/
private ImmutableTable<Integer, TableFieldSchema, Object> getQueryResults(Job job) {
try {
ImmutableTable.Builder<Integer, TableFieldSchema, Object> builder =
new ImmutableTable.Builder<>();
String pageToken = null;
int rowNumber = 1;
while (true) {
GetQueryResultsResponse queryResults = bigquery.jobs()
.getQueryResults(getProjectId(), job.getJobReference().getJobId())
.setPageToken(pageToken)
.execute();
// If the job isn't complete yet, retry; getQueryResults() waits for up to 10 seconds on
// each invocation so this will effectively poll for completion.
if (queryResults.getJobComplete()) {
List<TableFieldSchema> schemaFields = queryResults.getSchema().getFields();
for (TableRow row : queryResults.getRows()) {
Iterator<TableFieldSchema> fieldIterator = schemaFields.iterator();
Iterator<TableCell> cellIterator = row.getF().iterator();
while (fieldIterator.hasNext() && cellIterator.hasNext()) {
builder.put(rowNumber, fieldIterator.next(), cellIterator.next().getV());
}
rowNumber++;
}
pageToken = queryResults.getPageToken();
if (pageToken == null) {
break;
}
}
}
return builder.build();
} catch (IOException e) {
throw BigqueryJobFailureException.create(e);
}
}
/**
* Starts an asynchronous job to extract the specified source table and output it to the
* given GCS filepath in the specified destination format, optionally printing headers.
* Returns a ListenableFuture that holds the destination GCS URI on success.
*/
private ListenableFuture<String> extractTable(
DestinationTable sourceTable,
String destinationUri,
DestinationFormat destinationFormat,
boolean printHeader) {
checkArgument(sourceTable.type == TableType.TABLE);
Job job = new Job()
.setConfiguration(new JobConfiguration()
.setExtract(new JobConfigurationExtract()
.setSourceTable(sourceTable.getTableReference())
.setDestinationFormat(destinationFormat.toString())
.setDestinationUris(ImmutableList.of(destinationUri))
.setPrintHeader(printHeader)));
return runJobToCompletion(job, destinationUri);
}
/**
* Starts an asynchronous job to extract the specified source table or view and output it to the
* given GCS filepath in the specified destination format, optionally printing headers.
* Returns a ListenableFuture that holds the destination GCS URI on success.
*/
public ListenableFuture<String> extract(
DestinationTable sourceTable,
String destinationUri,
DestinationFormat destinationFormat,
boolean printHeader) {
if (sourceTable.type == TableType.TABLE) {
return extractTable(sourceTable, destinationUri, destinationFormat, printHeader);
} else {
// We can't extract directly from a view, so instead extract from a query dumping that view.
return extractQuery(
SqlTemplate
.create("SELECT * FROM [%DATASET%.%TABLE%]")
.put("DATASET", sourceTable.getTableReference().getDatasetId())
.put("TABLE", sourceTable.getTableReference().getTableId())
.build(),
destinationUri,
destinationFormat,
printHeader);
}
}
/**
* Starts an asynchronous job to run the provided query, store the results in a temporary table,
* and then extract the contents of that table to the given GCS filepath in the specified
* destination format, optionally printing headers.
* <p>
* Returns a ListenableFuture that holds the destination GCS URI on success.
*/
public ListenableFuture<String> extractQuery(
String querySql,
final String destinationUri,
final DestinationFormat destinationFormat,
final boolean printHeader) {
// Note: although BigQuery queries save their results to an auto-generated anonymous table,
// we can't rely on that for running the extract job because it may not be fully replicated.
// Tracking bug for query-to-GCS support is b/13777340.
DestinationTable tempTable = buildTemporaryTable().build();
return Futures.transformAsync(
query(querySql, tempTable), new AsyncFunction<DestinationTable, String>() {
@Override
public ListenableFuture<String> apply(DestinationTable tempTable) {
return extractTable(tempTable, destinationUri, destinationFormat, printHeader);
}
});
}
/** @see #runJob(Job, AbstractInputStreamContent) */
public Job runJob(Job job) {
return runJob(job, null);
}
/**
* Lanuch a job, wait for it to complete, but <i>do not</i> check for errors.
*
* @throws BigqueryJobFailureException
*/
public Job runJob(Job job, @Nullable AbstractInputStreamContent data) {
return checkJob(waitForJob(launchJob(job, data)));
}
/**
* Lanuch a job, but do not wait for it to complete.
*
* @throws BigqueryJobFailureException
*/
private Job launchJob(Job job, @Nullable AbstractInputStreamContent data) {
verify(job.getStatus() == null);
try {
return data != null
? bigquery.jobs().insert(getProjectId(), job, data).execute()
: bigquery.jobs().insert(getProjectId(), job).execute();
} catch (IOException e) {
throw BigqueryJobFailureException.create(e);
}
}
/**
* Synchronously waits for a job to complete that's already been launched.
*
* @throws BigqueryJobFailureException
*/
private Job waitForJob(Job job) {
verify(job.getStatus() != null);
while (!job.getStatus().getState().equals("DONE")) {
sleeper.sleepUninterruptibly(pollInterval);
JobReference ref = job.getJobReference();
try {
job = bigquery.jobs().get(ref.getProjectId(), ref.getJobId()).execute();
} catch (IOException e) {
throw BigqueryJobFailureException.create(e);
}
}
return job;
}
/**
* Checks completed job for errors.
*
* @throws BigqueryJobFailureException
*/
private static Job checkJob(Job job) {
verify(job.getStatus() != null);
JobStatus jobStatus = job.getStatus();
if (jobStatus.getErrorResult() != null) {
throw BigqueryJobFailureException.create(jobStatus);
} else {
logger.info(summarizeCompletedJob(job));
if (jobStatus.getErrors() != null) {
for (ErrorProto error : jobStatus.getErrors()) {
logger.warning(String.format("%s: %s", error.getReason(), error.getMessage()));
}
}
return job;
}
}
/** Returns a summarization of a completed job's statistics for logging. */
private static String summarizeCompletedJob(Job job) {
JobStatistics stats = job.getStatistics();
return String.format(
"Job took %,.3f seconds after a %,.3f second delay and processed %,d bytes (%s)",
(stats.getEndTime() - stats.getStartTime()) / 1000.0,
(stats.getStartTime() - stats.getCreationTime()) / 1000.0,
stats.getTotalBytesProcessed(),
toJobReferenceString(job.getJobReference()));
}
private <T> ListenableFuture<T> runJobToCompletion(Job job, T result) {
return runJobToCompletion(job, result, null);
}
/** Runs job and returns a future that yields {@code result} when {@code job} is completed. */
private <T> ListenableFuture<T> runJobToCompletion(
final Job job,
final T result,
@Nullable final AbstractInputStreamContent data) {
return service.submit(new Callable<T>() {
@Override
public T call() {
runJob(job, data);
return result;
}});
}
private ListenableFuture<Job> runJobToCompletion(final Job job) {
return service.submit(new Callable<Job>() {
@Override
public Job call() {
return runJob(job, null);
}});
}
/** Helper that returns true if a dataset with this name exists. */
public boolean checkDatasetExists(String datasetName) throws IOException {
try {
bigquery.datasets().get(getProjectId(), datasetName).execute();
return true;
} catch (GoogleJsonResponseException e) {
if (e.getDetails().getCode() == 404) {
return false;
}
throw e;
}
}
/** Helper that returns true if a table with this name and dataset name exists. */
public boolean checkTableExists(String datasetName, String tableName) throws IOException {
try {
bigquery.tables().get(getProjectId(), datasetName, tableName).execute();
return true;
} catch (GoogleJsonResponseException e) {
if (e.getDetails().getCode() == 404) {
return false;
}
throw e;
}
}
/** Returns the projectId set by the environment, or {@code null} if none is set. */
public static String getEnvironmentProjectId() {
return RegistryEnvironment.get().config().getProjectId();
}
/** Returns the projectId associated with this bigquery connection. */
public String getProjectId() {
return getEnvironmentProjectId();
}
/** Returns the dataset name that this bigquery connection uses by default. */
public String getDatasetId() {
return datasetId;
}
/** Returns dataset reference that can be used to avoid having to specify dataset in SQL code. */
public DatasetReference getDataset() {
return new DatasetReference()
.setProjectId(getProjectId())
.setDatasetId(getDatasetId());
}
/** Returns table reference with the projectId and datasetId filled out for you. */
public TableReference getTable(String tableName) {
return new TableReference()
.setProjectId(getProjectId())
.setDatasetId(getDatasetId())
.setTableId(tableName);
}
/**
* Helper that creates a dataset with this name if it doesn't already exist, and returns true
* if creation took place.
*/
public boolean createDatasetIfNeeded(String datasetName) throws IOException {
if (!checkDatasetExists(datasetName)) {
bigquery.datasets()
.insert(getProjectId(), new Dataset().setDatasetReference(new DatasetReference()
.setProjectId(getProjectId())
.setDatasetId(datasetName)))
.execute();
System.err.printf("Created dataset: %s:%s\n", getProjectId(), datasetName);
return true;
}
return false;
}
/** Create a table from a SQL query if it doesn't already exist. */
public TableReference ensureTable(TableReference table, String sqlQuery) {
try {
runJob(new Job()
.setConfiguration(new JobConfiguration()
.setQuery(new JobConfigurationQuery()
.setQuery(sqlQuery)
.setDefaultDataset(getDataset())
.setDestinationTable(table))));
} catch (BigqueryJobFailureException e) {
if (e.getReason().equals("duplicate")) {
// Table already exists.
} else {
throw e;
}
}
return table;
}
}

View file

@ -0,0 +1,167 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.bigquery;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.Sets.newConcurrentHashSet;
import static com.google.domain.registry.util.FormattingLogger.getLoggerForCallerClass;
import com.google.api.client.extensions.appengine.http.UrlFetchTransport;
import com.google.api.client.googleapis.extensions.appengine.auth.oauth2.AppIdentityCredential;
import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.BigqueryScopes;
import com.google.api.services.bigquery.model.Dataset;
import com.google.api.services.bigquery.model.DatasetReference;
import com.google.api.services.bigquery.model.Table;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.common.collect.ImmutableList;
import com.google.domain.registry.util.FormattingLogger;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.inject.Inject;
/** Factory for creating {@link Bigquery} connections. */
public class BigqueryFactory {
private static final FormattingLogger logger = getLoggerForCallerClass();
// Cross-request caches to avoid unnecessary RPCs.
private static Set<String> knownExistingDatasets = newConcurrentHashSet();
private static Set<String> knownExistingTables = newConcurrentHashSet();
@Inject Map<String, ImmutableList<TableFieldSchema>> bigquerySchemas;
@Inject Subfactory subfactory;
@Inject BigqueryFactory() {}
/** This class is broken out solely so that it can be mocked inside of tests. */
static class Subfactory {
@Inject Subfactory() {}
public Bigquery create(
String applicationName,
HttpTransport transport,
JsonFactory jsonFactory,
HttpRequestInitializer httpRequestInitializer) {
return new Bigquery.Builder(transport, jsonFactory, httpRequestInitializer)
.setApplicationName(applicationName)
.build();
}
}
/** Returns a new connection to BigQuery. */
public Bigquery create(
String applicationName,
HttpTransport transport,
JsonFactory jsonFactory,
HttpRequestInitializer httpRequestInitializer) {
return subfactory.create(applicationName, transport, jsonFactory, httpRequestInitializer);
}
/**
* Returns a new connection to Bigquery, first ensuring that the given dataset exists in the
* project with the given id, creating it if required.
*/
public Bigquery create(String projectId, String datasetId) throws IOException {
Bigquery bigquery = create(
getClass().getSimpleName(),
new UrlFetchTransport(),
new JacksonFactory(),
new AppIdentityCredential(BigqueryScopes.all()));
// Note: it's safe for multiple threads to call this as the dataset will only be created once.
if (!knownExistingDatasets.contains(datasetId)) {
ensureDataset(bigquery, projectId, datasetId);
knownExistingDatasets.add(datasetId);
}
return bigquery;
}
/**
* Returns a new connection to Bigquery, first ensuring that the given dataset and table exist in
* project with the given id, creating them if required.
*/
public Bigquery create(String projectId, String datasetId, String tableId)
throws IOException {
Bigquery bigquery = create(projectId, datasetId);
checkArgument(bigquerySchemas.containsKey(tableId), "Unknown table ID: %s", tableId);
if (!knownExistingTables.contains(tableId)) {
ensureTable(
bigquery,
new TableReference()
.setDatasetId(datasetId)
.setProjectId(projectId)
.setTableId(tableId),
bigquerySchemas.get(tableId));
knownExistingTables.add(tableId);
}
return bigquery;
}
/**
* Ensures the dataset exists by trying to create it. Note that it's not appreciably cheaper
* to check for dataset existence than it is to try to create it and check for exceptions.
*/
// Note that these are not static so they can be mocked for testing.
private void ensureDataset(Bigquery bigquery, String projectId, String datasetId)
throws IOException {
try {
bigquery.datasets()
.insert(projectId,
new Dataset().setDatasetReference(
new DatasetReference()
.setProjectId(projectId)
.setDatasetId(datasetId)))
.execute();
} catch (IOException e) {
// Swallow errors about a duplicate dataset, and throw any other ones.
if (!BigqueryJobFailureException.create(e).getReason().equals("duplicate")) {
throw e;
}
}
}
/** Ensures the table exists in Bigquery. */
private void ensureTable(Bigquery bigquery, TableReference table, List<TableFieldSchema> schema)
throws IOException {
try {
bigquery.tables().insert(table.getProjectId(), table.getDatasetId(), new Table()
.setSchema(new TableSchema().setFields(schema))
.setTableReference(table))
.execute();
logger.infofmt("Created BigQuery table %s:%s.%s", table.getProjectId(), table.getDatasetId(),
table.getTableId());
} catch (IOException e) {
// Swallow errors about a table that exists, and throw any other ones.
if (!BigqueryJobFailureException.create(e).getReason().equals("duplicate")) {
throw e;
}
}
}
}

View file

@ -0,0 +1,120 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.bigquery;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.api.client.googleapis.json.GoogleJsonError;
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
import com.google.api.services.bigquery.model.ErrorProto;
import com.google.api.services.bigquery.model.JobStatus;
import com.google.common.collect.Iterables;
import java.io.IOException;
import javax.annotation.Nullable;
/** Generic exception to throw if a Bigquery job fails. */
public final class BigqueryJobFailureException extends RuntimeException {
/** Delegate {@link IOException} errors, checking for {@link GoogleJsonResponseException} */
public static BigqueryJobFailureException create(IOException cause) {
if (cause instanceof GoogleJsonResponseException) {
return create(((GoogleJsonResponseException) cause).getDetails());
} else {
return new BigqueryJobFailureException(cause.getMessage(), cause, null, null);
}
}
/** Create an error for JSON server response errors. */
public static BigqueryJobFailureException create(GoogleJsonError error) {
return new BigqueryJobFailureException(error.getMessage(), null, null, error);
}
/** Create an error from a failed job. */
public static BigqueryJobFailureException create(JobStatus jobStatus) {
checkArgument(jobStatus.getErrorResult() != null, "this job didn't fail!");
return new BigqueryJobFailureException(
describeError(jobStatus.getErrorResult()), null, jobStatus, null);
}
@Nullable
private final JobStatus jobStatus;
@Nullable
private final GoogleJsonError jsonError;
private BigqueryJobFailureException(
String message,
@Nullable Throwable cause,
@Nullable JobStatus jobStatus,
@Nullable GoogleJsonError jsonError) {
super(message, cause);
this.jobStatus = jobStatus;
this.jsonError = jsonError;
}
/**
* Returns a short error code describing why this job failed.
*
* <h3>Sample Reasons</h3>
*
* <ul>
* <li>{@code "duplicate"}: The table you're trying to create already exists.
* <li>{@code "invalidQuery"}: Query syntax error of some sort.
* <li>{@code "unknown"}: Non-Bigquery errors.
* </ul>
*
* @see "https://cloud.google.com/bigquery/troubleshooting-errors"
*/
public String getReason() {
if (jobStatus != null) {
return jobStatus.getErrorResult().getReason();
} else if (jsonError != null) {
return Iterables.getLast(jsonError.getErrors()).getReason();
} else {
return "unknown";
}
}
@Override
public String toString() {
StringBuilder result = new StringBuilder();
result.append(String.format("%s: %s", getClass().getSimpleName(), getMessage()));
try {
if (jobStatus != null) {
for (ErrorProto error : jobStatus.getErrors()) {
result.append("\n---------------------------------- BEGIN DEBUG INFO\n");
result.append(describeError(error));
result.append('\n');
result.append(error.getDebugInfo());
result.append("\n---------------------------------- END DEBUG INFO");
}
}
if (jsonError != null) {
String extraInfo = jsonError.toPrettyString();
result.append('\n');
result.append(extraInfo);
}
} catch (IOException e) {
result.append(e);
}
return result.toString();
}
private static String describeError(ErrorProto error) {
return String.format("%s: %s", error.getReason(), error.getMessage());
}
}

View file

@ -0,0 +1,72 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.bigquery;
import static dagger.Provides.Type.SET_VALUES;
import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.BigqueryScopes;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.common.collect.ImmutableList;
import com.google.domain.registry.config.ConfigModule.Config;
import com.google.domain.registry.request.OAuthScopes;
import dagger.Module;
import dagger.Multibindings;
import dagger.Provides;
import java.util.Map;
import java.util.Set;
/**
* Dagger module for Google {@link Bigquery} connection objects.
*
* @see com.google.domain.registry.config.ConfigModule
* @see com.google.domain.registry.request.Modules.UrlFetchTransportModule
* @see com.google.domain.registry.request.Modules.Jackson2Module
* @see com.google.domain.registry.request.Modules.AppIdentityCredentialModule
* @see com.google.domain.registry.request.Modules.UseAppIdentityCredentialForGoogleApisModule
*/
@Module
public final class BigqueryModule {
@Multibindings
interface BigQueryMultibindings {
/** Provides a map of BigQuery table names to field names. */
Map<String, ImmutableList<TableFieldSchema>> bigquerySchemas();
}
/** Provides OAuth2 scopes for the Bigquery service needed by Domain Registry. */
@Provides(type = SET_VALUES)
@OAuthScopes
static Set<String> provideBigqueryOAuthScopes() {
return BigqueryScopes.all();
}
@Provides
static Bigquery provideBigquery(
HttpTransport transport,
JsonFactory jsonFactory,
HttpRequestInitializer httpRequestInitializer,
@Config("projectId") String projectId) {
return new Bigquery.Builder(transport, jsonFactory, httpRequestInitializer)
.setApplicationName(projectId)
.build();
}
}

View file

@ -0,0 +1,169 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.domain.registry.bigquery;
import com.google.api.services.bigquery.model.JobReference;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.DateTimeFormatterBuilder;
import org.joda.time.format.DateTimeParser;
import org.joda.time.format.ISODateTimeFormat;
import java.util.concurrent.TimeUnit;
/** Utilities related to Bigquery. */
public class BigqueryUtils {
/** Bigquery modes for schema fields. */
public enum FieldMode {
NULLABLE,
REQUIRED,
REPEATED;
/** Return the name of the field mode as it should appear in the Bigquery schema. */
public String schemaName() {
return name();
}
}
/** Bigquery schema field types. */
public enum FieldType {
STRING,
INTEGER,
FLOAT,
TIMESTAMP,
RECORD,
BOOLEAN;
/** Return the name of the field type as it should appear in the Bigquery schema. */
public String schemaName() {
return name();
}
}
/** Source formats for Bigquery load jobs. */
public enum SourceFormat {
CSV,
NEWLINE_DELIMITED_JSON,
DATASTORE_BACKUP
}
/** Destination formats for Bigquery extract jobs. */
public enum DestinationFormat {
CSV,
NEWLINE_DELIMITED_JSON
}
/** Bigquery table types (i.e. regular table or view). */
public enum TableType {
TABLE,
VIEW
}
/**
* Bigquery write dispositions (i.e. what to do about writing to an existing table).
*
* @see <a href="https://developers.google.com/bigquery/docs/reference/v2/jobs">API docs</a>
*/
public enum WriteDisposition {
/** Only write to the table if there is no existing table or if it is empty. */
WRITE_EMPTY,
/** If the table already exists, overwrite it with the new data. */
WRITE_TRUNCATE,
/** If the table already exists, append the data to the table. */
WRITE_APPEND
}
/**
* A {@code DateTimeFormatter} that defines how to print DateTimes in a string format that
* BigQuery can interpret and how to parse the string formats that BigQuery emits into DateTimes.
* <p>
* The general format definition is "YYYY-MM-DD HH:MM:SS.SSS[ ZZ]", where the fractional seconds
* portion can have 0-6 decimal places (although we restrict it to 0-3 here since Joda DateTime
* only supports up to millisecond precision) and the zone if not specified defaults to UTC.
* <p>
* Although we expect a zone specification of "UTC" when parsing, we don't emit it when printing
* because in some cases BigQuery does not allow any time zone specification (instead it assumes
* UTC for whatever input you provide) for input timestamp strings (see b/16380363).
*
* @see "https://developers.google.com/bigquery/timestamp"
*/
public static final DateTimeFormatter BIGQUERY_TIMESTAMP_FORMAT = new DateTimeFormatterBuilder()
.append(ISODateTimeFormat.date())
.appendLiteral(' ')
.append(
// For printing, always print out the milliseconds.
ISODateTimeFormat.hourMinuteSecondMillis().getPrinter(),
// For parsing, we need a series of parsers to correctly handle the milliseconds.
new DateTimeParser[] {
// Try to parse the time with milliseconds first, which requires at least one
// fractional second digit, and if that fails try to parse without milliseconds.
ISODateTimeFormat.hourMinuteSecondMillis().getParser(),
ISODateTimeFormat.hourMinuteSecond().getParser()})
// Print UTC as the empty string since BigQuery's TIMESTAMP() function does not accept any
// time zone specification, but require "UTC" on parsing. Since we force this formatter to
// always use UTC below, the other arguments do not matter.
//
// TODO(b/26162667): replace this with appendLiteral(" UTC") if b/16380363 gets resolved.
.appendTimeZoneOffset("", " UTC", false, 1, 1)
.toFormatter()
.withZoneUTC();
/**
* Returns the human-readable string version of the given DateTime, suitable for conversion
* within BigQuery from a string literal into a BigQuery timestamp type.
*/
public static String toBigqueryTimestampString(DateTime dateTime) {
return BIGQUERY_TIMESTAMP_FORMAT.print(dateTime);
}
/** Returns the DateTime for a given human-readable string-formatted BigQuery timestamp. */
public static DateTime fromBigqueryTimestampString(String timestampString) {
return BIGQUERY_TIMESTAMP_FORMAT.parseDateTime(timestampString);
}
/**
* Converts a time (in TimeUnits since the epoch) into a numeric string that BigQuery understands
* as a timestamp: the decimal number of seconds since the epoch, precise up to microseconds.
*
* @see "https://developers.google.com/bigquery/timestamp"
*/
public static String toBigqueryTimestamp(long timestamp, TimeUnit unit) {
long seconds = unit.toSeconds(timestamp);
long fractionalSeconds = unit.toMicros(timestamp) % 1000000;
return String.format("%d.%06d", seconds, fractionalSeconds);
}
/**
* Converts a {@link DateTime} into a numeric string that BigQuery understands as a timestamp:
* the decimal number of seconds since the epoch, precise up to microseconds.
*
* <p>Note that since {@code DateTime} only stores milliseconds, the last 3 digits will be zero.
*
* @see "https://developers.google.com/bigquery/timestamp"
*/
public static String toBigqueryTimestamp(DateTime dateTime) {
return toBigqueryTimestamp(dateTime.getMillis(), TimeUnit.MILLISECONDS);
}
/**
* Returns the canonical string format for a JobReference object (the project ID and then job ID,
* delimited by a single colon) since JobReference.toString() is not customized to return it.
*/
public static String toJobReferenceString(JobReference jobRef) {
return jobRef.getProjectId() + ":" + jobRef.getJobId();
}
}

View file

@ -0,0 +1,16 @@
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
@javax.annotation.ParametersAreNonnullByDefault
package com.google.domain.registry.bigquery;