// Copyright 2016 The Nomulus Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package google.registry.bigquery; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Strings.isNullOrEmpty; import static com.google.common.base.Verify.verify; import static google.registry.bigquery.BigqueryUtils.toJobReferenceString; import static org.joda.time.DateTimeZone.UTC; import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; import com.google.api.client.googleapis.json.GoogleJsonResponseException; import com.google.api.client.http.AbstractInputStreamContent; import com.google.api.client.http.HttpRequestInitializer; import com.google.api.client.http.HttpTransport; import com.google.api.client.json.JsonFactory; import com.google.api.services.bigquery.Bigquery; import com.google.api.services.bigquery.model.Dataset; import com.google.api.services.bigquery.model.DatasetReference; import com.google.api.services.bigquery.model.ErrorProto; import com.google.api.services.bigquery.model.GetQueryResultsResponse; import com.google.api.services.bigquery.model.Job; import com.google.api.services.bigquery.model.JobConfiguration; import com.google.api.services.bigquery.model.JobConfigurationExtract; import com.google.api.services.bigquery.model.JobConfigurationLoad; import com.google.api.services.bigquery.model.JobConfigurationQuery; import com.google.api.services.bigquery.model.JobReference; import com.google.api.services.bigquery.model.JobStatistics; import com.google.api.services.bigquery.model.JobStatus; import com.google.api.services.bigquery.model.Table; import com.google.api.services.bigquery.model.TableCell; import com.google.api.services.bigquery.model.TableFieldSchema; import com.google.api.services.bigquery.model.TableReference; import com.google.api.services.bigquery.model.TableRow; import com.google.api.services.bigquery.model.ViewDefinition; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableTable; import com.google.common.io.BaseEncoding; import com.google.common.util.concurrent.AsyncFunction; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import google.registry.bigquery.BigqueryUtils.DestinationFormat; import google.registry.bigquery.BigqueryUtils.SourceFormat; import google.registry.bigquery.BigqueryUtils.TableType; import google.registry.bigquery.BigqueryUtils.WriteDisposition; import google.registry.config.RegistryEnvironment; import google.registry.util.FormattingLogger; import google.registry.util.NonFinalForTesting; import google.registry.util.Sleeper; import google.registry.util.SqlTemplate; import google.registry.util.SystemSleeper; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Random; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import javax.annotation.Nullable; import org.joda.time.DateTime; import org.joda.time.Duration; /** Class encapsulating parameters and state for accessing the Bigquery API. */ public class BigqueryConnection implements AutoCloseable { private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass(); private static final Duration MIN_POLL_INTERVAL = Duration.millis(500); @NonFinalForTesting private static Sleeper sleeper = new SystemSleeper(); /** Default name of the default dataset to use for requests to the API. */ public static final String DEFAULT_DATASET_NAME = "testing"; /** Default dataset to use for storing temporary tables. */ private static final String TEMP_DATASET_NAME = "__temp__"; /** Default time to live for temporary tables. */ private static final Duration TEMP_TABLE_TTL = Duration.standardHours(24); /** Bigquery client instance wrapped by this class. */ private Bigquery bigquery; /** Executor service for bigquery jobs. */ private ListeningExecutorService service; /** Credential object to use for initializing HTTP requests to the bigquery API. */ private HttpRequestInitializer credential; /** HTTP transport object to use for accessing bigquery API. */ private HttpTransport httpTransport; /** JSON factory object to use for accessing bigquery API. */ private JsonFactory jsonFactory; /** Pseudo-randomness source to use for creating random table names. */ private Random random = new Random(); /** Name of the default dataset to use for inserting tables. */ private String datasetId = DEFAULT_DATASET_NAME; /** Whether to automatically overwrite existing tables and views. */ private boolean overwrite = false; /** Duration to wait between polls for job status. */ private Duration pollInterval = Duration.millis(1000); /** Builder for a {@link BigqueryConnection}, since the latter is immutable once created. */ public static class Builder { private BigqueryConnection instance; public Builder() { instance = new BigqueryConnection(); } /** * The BigqueryConnection takes ownership of this {@link ExecutorService} and will * shut it down when the BigqueryConnection is closed. */ public Builder setExecutorService(ExecutorService executorService) { instance.service = MoreExecutors.listeningDecorator(executorService); return this; } public Builder setCredential(GoogleCredential credential) { instance.credential = checkNotNull(credential); instance.httpTransport = credential.getTransport(); instance.jsonFactory = credential.getJsonFactory(); return this; } public Builder setDatasetId(String datasetId) { instance.datasetId = checkNotNull(datasetId); return this; } public Builder setOverwrite(boolean overwrite) { instance.overwrite = overwrite; return this; } public Builder setPollInterval(Duration pollInterval) { checkArgument( !pollInterval.isShorterThan(MIN_POLL_INTERVAL), "poll interval must be at least %ldms", MIN_POLL_INTERVAL.getMillis()); instance.pollInterval = pollInterval; return this; } public BigqueryConnection build() { try { checkNotNull(instance.service, "Must provide executor service"); return instance; } finally { // Clear the internal instance so you can't accidentally mutate it through this builder. instance = null; } } } /** * Class that wraps a normal Bigquery API Table object to make it immutable from the client side * and give it additional semantics as a "destination" for load or query jobs, with an overwrite * flag set by the client upon creation. * *
Additionally provides encapsulation so that clients of BigqueryConnection don't need to take * any direct dependencies on Bigquery API classes and can instead use DestinationTable. */ public static class DestinationTable { /** The wrapped Bigquery API Table object. */ private final Table table; /** The type of this table. */ private final TableType type; /** The write disposition for jobs writing to this destination table. */ private final WriteDisposition writeDisposition; /** * A query to package with this table if the type is VIEW; not immutable but also not visible * to clients. */ private String query; /** A builder for DestinationTable. */ public static final class Builder { private final Table table = new Table(); private final TableReference tableRef = new TableReference(); private TableType type = TableType.TABLE; private WriteDisposition writeDisposition = WriteDisposition.WRITE_EMPTY; public Builder datasetId(String datasetId) { tableRef.setDatasetId(datasetId); return this; } public Builder name(String name) { tableRef.setTableId(name); return this; } public Builder description(String description) { table.setDescription(description); return this; } public Builder type(TableType type) { this.type = type; return this; } public Builder timeToLive(Duration duration) { this.table.setExpirationTime(new DateTime(UTC).plus(duration).getMillis()); return this; } public Builder overwrite(boolean overwrite) { if (overwrite) { this.writeDisposition = WriteDisposition.WRITE_TRUNCATE; } return this; } public Builder append(boolean append) { if (append) { this.writeDisposition = WriteDisposition.WRITE_APPEND; } return this; } public DestinationTable build() { tableRef.setProjectId(getEnvironmentProjectId()); table.setTableReference(tableRef); checkState(!isNullOrEmpty(table.getTableReference().getDatasetId())); checkState(!isNullOrEmpty(table.getTableReference().getTableId())); return new DestinationTable(this); } } /** Constructs a new DestinationTable from its Builder. */ private DestinationTable(Builder b) { table = b.table.clone(); type = b.type; writeDisposition = b.writeDisposition; } /** * Stores the provided query with this DestinationTable and returns it; used for packaging * a query along with the DestinationTable before sending it to the table update logic. */ private DestinationTable withQuery(String query) { checkState(type == TableType.VIEW); this.query = query; return this; } /** Returns a new copy of the Bigquery API Table object wrapped by this DestinationTable. */ private Table getTable() { Table tableCopy = table.clone(); if (type == TableType.VIEW) { tableCopy.setView(new ViewDefinition().setQuery(query)); } return tableCopy; } /** Returns the write disposition that should be used for jobs writing to this table. */ private WriteDisposition getWriteDisposition() { return writeDisposition; } /** Returns a new copy of the TableReference for the Table wrapped by this DestinationTable. */ private TableReference getTableReference() { return table.getTableReference().clone(); } /** Returns a string representation of the TableReference for the wrapped table. */ public String getStringReference() { return tableReferenceToString(table.getTableReference()); } /** Returns a string representation of the given TableReference. */ private static String tableReferenceToString(TableReference tableRef) { return String.format( "%s:%s.%s", tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId()); } } /** * Initializes the BigqueryConnection object by setting up the API client and creating the * default dataset if it doesn't exist. */ public BigqueryConnection initialize() throws Exception { bigquery = new Bigquery.Builder(httpTransport, jsonFactory, credential) .setApplicationName(getClass().getSimpleName()) .build(); createDatasetIfNeeded(datasetId); createDatasetIfNeeded(TEMP_DATASET_NAME); return this; } /** * Closes the BigqueryConnection object by shutting down the executor service. Clients * should only call this after all ListenableFutures obtained from BigqueryConnection methods * have resolved; this method does not block on their completion. */ @Override public void close() { service.shutdown(); } /** Returns a partially built DestinationTable with the default dataset and overwrite behavior. */ public DestinationTable.Builder buildDestinationTable(String tableName) { return new DestinationTable.Builder() .datasetId(datasetId) .type(TableType.TABLE) .name(tableName) .overwrite(overwrite); } /** * Returns a partially built DestinationTable with a randomly generated name under the default * temporary table dataset, with the default TTL and overwrite behavior. */ public DestinationTable.Builder buildTemporaryTable() { return new DestinationTable.Builder() .datasetId(TEMP_DATASET_NAME) .type(TableType.TABLE) .name(getRandomTableName()) .timeToLive(TEMP_TABLE_TTL) .overwrite(overwrite); } /** Returns a random table name consisting only of the chars {@code [a-v0-9_]}. */ private String getRandomTableName() { byte[] randBytes = new byte[8]; // 64 bits of randomness ought to be plenty. random.nextBytes(randBytes); return "_" + BaseEncoding.base32Hex().lowerCase().omitPadding().encode(randBytes); } /** * A function that updates the specified Bigquery table to reflect the metadata from the input * DestinationTable, passing the same DestinationTable through as the output. If the specified * table does not already exist, it will be inserted into the dataset. * *
Clients can call this function directly to update a table on demand, or can pass it to
* Futures.transform() to update a table produced as the asynchronous result of a load or query
* job (e.g. to add a description to it).
*/
private class UpdateTableFunction implements Function Returns a ListenableFuture that holds the ImmutableTable on success.
*/
public ListenableFuture This table is fully materialized in memory (not lazily loaded), so it should not be used
* with queries expected to return large results.
*/
private ImmutableTable Returns a ListenableFuture that holds the destination GCS URI on success.
*/
public ListenableFuture