// Copyright 2016 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.domain.registry.bigquery; import com.google.api.services.bigquery.model.JobReference; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.DateTimeFormatterBuilder; import org.joda.time.format.DateTimeParser; import org.joda.time.format.ISODateTimeFormat; import java.util.concurrent.TimeUnit; /** Utilities related to Bigquery. */ public class BigqueryUtils { /** Bigquery modes for schema fields. */ public enum FieldMode { NULLABLE, REQUIRED, REPEATED; /** Return the name of the field mode as it should appear in the Bigquery schema. */ public String schemaName() { return name(); } } /** Bigquery schema field types. */ public enum FieldType { STRING, INTEGER, FLOAT, TIMESTAMP, RECORD, BOOLEAN; /** Return the name of the field type as it should appear in the Bigquery schema. */ public String schemaName() { return name(); } } /** Source formats for Bigquery load jobs. */ public enum SourceFormat { CSV, NEWLINE_DELIMITED_JSON, DATASTORE_BACKUP } /** Destination formats for Bigquery extract jobs. */ public enum DestinationFormat { CSV, NEWLINE_DELIMITED_JSON } /** Bigquery table types (i.e. regular table or view). */ public enum TableType { TABLE, VIEW } /** * Bigquery write dispositions (i.e. what to do about writing to an existing table). * * @see API docs */ public enum WriteDisposition { /** Only write to the table if there is no existing table or if it is empty. */ WRITE_EMPTY, /** If the table already exists, overwrite it with the new data. */ WRITE_TRUNCATE, /** If the table already exists, append the data to the table. */ WRITE_APPEND } /** * A {@code DateTimeFormatter} that defines how to print DateTimes in a string format that * BigQuery can interpret and how to parse the string formats that BigQuery emits into DateTimes. *

* The general format definition is "YYYY-MM-DD HH:MM:SS.SSS[ ZZ]", where the fractional seconds * portion can have 0-6 decimal places (although we restrict it to 0-3 here since Joda DateTime * only supports up to millisecond precision) and the zone if not specified defaults to UTC. *

* Although we expect a zone specification of "UTC" when parsing, we don't emit it when printing * because in some cases BigQuery does not allow any time zone specification (instead it assumes * UTC for whatever input you provide) for input timestamp strings (see b/16380363). * * @see "https://developers.google.com/bigquery/timestamp" */ public static final DateTimeFormatter BIGQUERY_TIMESTAMP_FORMAT = new DateTimeFormatterBuilder() .append(ISODateTimeFormat.date()) .appendLiteral(' ') .append( // For printing, always print out the milliseconds. ISODateTimeFormat.hourMinuteSecondMillis().getPrinter(), // For parsing, we need a series of parsers to correctly handle the milliseconds. new DateTimeParser[] { // Try to parse the time with milliseconds first, which requires at least one // fractional second digit, and if that fails try to parse without milliseconds. ISODateTimeFormat.hourMinuteSecondMillis().getParser(), ISODateTimeFormat.hourMinuteSecond().getParser()}) // Print UTC as the empty string since BigQuery's TIMESTAMP() function does not accept any // time zone specification, but require "UTC" on parsing. Since we force this formatter to // always use UTC below, the other arguments do not matter. // // TODO(b/26162667): replace this with appendLiteral(" UTC") if b/16380363 gets resolved. .appendTimeZoneOffset("", " UTC", false, 1, 1) .toFormatter() .withZoneUTC(); /** * Returns the human-readable string version of the given DateTime, suitable for conversion * within BigQuery from a string literal into a BigQuery timestamp type. */ public static String toBigqueryTimestampString(DateTime dateTime) { return BIGQUERY_TIMESTAMP_FORMAT.print(dateTime); } /** Returns the DateTime for a given human-readable string-formatted BigQuery timestamp. */ public static DateTime fromBigqueryTimestampString(String timestampString) { return BIGQUERY_TIMESTAMP_FORMAT.parseDateTime(timestampString); } /** * Converts a time (in TimeUnits since the epoch) into a numeric string that BigQuery understands * as a timestamp: the decimal number of seconds since the epoch, precise up to microseconds. * * @see "https://developers.google.com/bigquery/timestamp" */ public static String toBigqueryTimestamp(long timestamp, TimeUnit unit) { long seconds = unit.toSeconds(timestamp); long fractionalSeconds = unit.toMicros(timestamp) % 1000000; return String.format("%d.%06d", seconds, fractionalSeconds); } /** * Converts a {@link DateTime} into a numeric string that BigQuery understands as a timestamp: * the decimal number of seconds since the epoch, precise up to microseconds. * *

Note that since {@code DateTime} only stores milliseconds, the last 3 digits will be zero. * * @see "https://developers.google.com/bigquery/timestamp" */ public static String toBigqueryTimestamp(DateTime dateTime) { return toBigqueryTimestamp(dateTime.getMillis(), TimeUnit.MILLISECONDS); } /** * Returns the canonical string format for a JobReference object (the project ID and then job ID, * delimited by a single colon) since JobReference.toString() is not customized to return it. */ public static String toJobReferenceString(JobReference jobRef) { return jobRef.getProjectId() + ":" + jobRef.getJobId(); } }