mirror of
https://github.com/google/nomulus.git
synced 2025-04-30 03:57:51 +02:00
This change renames directories in preparation for the great package rename. The repository is now in a broken state because the code itself hasn't been updated. However this should ensure that git correctly preserves history for each file.
169 lines
6.4 KiB
Java
169 lines
6.4 KiB
Java
// Copyright 2016 The Domain Registry Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package com.google.domain.registry.bigquery;
|
|
|
|
import com.google.api.services.bigquery.model.JobReference;
|
|
|
|
import org.joda.time.DateTime;
|
|
import org.joda.time.format.DateTimeFormatter;
|
|
import org.joda.time.format.DateTimeFormatterBuilder;
|
|
import org.joda.time.format.DateTimeParser;
|
|
import org.joda.time.format.ISODateTimeFormat;
|
|
|
|
import java.util.concurrent.TimeUnit;
|
|
|
|
/** Utilities related to Bigquery. */
|
|
public class BigqueryUtils {
|
|
|
|
/** Bigquery modes for schema fields. */
|
|
public enum FieldMode {
|
|
NULLABLE,
|
|
REQUIRED,
|
|
REPEATED;
|
|
|
|
/** Return the name of the field mode as it should appear in the Bigquery schema. */
|
|
public String schemaName() {
|
|
return name();
|
|
}
|
|
}
|
|
|
|
/** Bigquery schema field types. */
|
|
public enum FieldType {
|
|
STRING,
|
|
INTEGER,
|
|
FLOAT,
|
|
TIMESTAMP,
|
|
RECORD,
|
|
BOOLEAN;
|
|
|
|
/** Return the name of the field type as it should appear in the Bigquery schema. */
|
|
public String schemaName() {
|
|
return name();
|
|
}
|
|
}
|
|
|
|
/** Source formats for Bigquery load jobs. */
|
|
public enum SourceFormat {
|
|
CSV,
|
|
NEWLINE_DELIMITED_JSON,
|
|
DATASTORE_BACKUP
|
|
}
|
|
|
|
/** Destination formats for Bigquery extract jobs. */
|
|
public enum DestinationFormat {
|
|
CSV,
|
|
NEWLINE_DELIMITED_JSON
|
|
}
|
|
|
|
/** Bigquery table types (i.e. regular table or view). */
|
|
public enum TableType {
|
|
TABLE,
|
|
VIEW
|
|
}
|
|
|
|
/**
|
|
* Bigquery write dispositions (i.e. what to do about writing to an existing table).
|
|
*
|
|
* @see <a href="https://developers.google.com/bigquery/docs/reference/v2/jobs">API docs</a>
|
|
*/
|
|
public enum WriteDisposition {
|
|
/** Only write to the table if there is no existing table or if it is empty. */
|
|
WRITE_EMPTY,
|
|
/** If the table already exists, overwrite it with the new data. */
|
|
WRITE_TRUNCATE,
|
|
/** If the table already exists, append the data to the table. */
|
|
WRITE_APPEND
|
|
}
|
|
|
|
/**
|
|
* A {@code DateTimeFormatter} that defines how to print DateTimes in a string format that
|
|
* BigQuery can interpret and how to parse the string formats that BigQuery emits into DateTimes.
|
|
* <p>
|
|
* The general format definition is "YYYY-MM-DD HH:MM:SS.SSS[ ZZ]", where the fractional seconds
|
|
* portion can have 0-6 decimal places (although we restrict it to 0-3 here since Joda DateTime
|
|
* only supports up to millisecond precision) and the zone if not specified defaults to UTC.
|
|
* <p>
|
|
* Although we expect a zone specification of "UTC" when parsing, we don't emit it when printing
|
|
* because in some cases BigQuery does not allow any time zone specification (instead it assumes
|
|
* UTC for whatever input you provide) for input timestamp strings (see b/16380363).
|
|
*
|
|
* @see "https://developers.google.com/bigquery/timestamp"
|
|
*/
|
|
public static final DateTimeFormatter BIGQUERY_TIMESTAMP_FORMAT = new DateTimeFormatterBuilder()
|
|
.append(ISODateTimeFormat.date())
|
|
.appendLiteral(' ')
|
|
.append(
|
|
// For printing, always print out the milliseconds.
|
|
ISODateTimeFormat.hourMinuteSecondMillis().getPrinter(),
|
|
// For parsing, we need a series of parsers to correctly handle the milliseconds.
|
|
new DateTimeParser[] {
|
|
// Try to parse the time with milliseconds first, which requires at least one
|
|
// fractional second digit, and if that fails try to parse without milliseconds.
|
|
ISODateTimeFormat.hourMinuteSecondMillis().getParser(),
|
|
ISODateTimeFormat.hourMinuteSecond().getParser()})
|
|
// Print UTC as the empty string since BigQuery's TIMESTAMP() function does not accept any
|
|
// time zone specification, but require "UTC" on parsing. Since we force this formatter to
|
|
// always use UTC below, the other arguments do not matter.
|
|
//
|
|
// TODO(b/26162667): replace this with appendLiteral(" UTC") if b/16380363 gets resolved.
|
|
.appendTimeZoneOffset("", " UTC", false, 1, 1)
|
|
.toFormatter()
|
|
.withZoneUTC();
|
|
|
|
/**
|
|
* Returns the human-readable string version of the given DateTime, suitable for conversion
|
|
* within BigQuery from a string literal into a BigQuery timestamp type.
|
|
*/
|
|
public static String toBigqueryTimestampString(DateTime dateTime) {
|
|
return BIGQUERY_TIMESTAMP_FORMAT.print(dateTime);
|
|
}
|
|
|
|
/** Returns the DateTime for a given human-readable string-formatted BigQuery timestamp. */
|
|
public static DateTime fromBigqueryTimestampString(String timestampString) {
|
|
return BIGQUERY_TIMESTAMP_FORMAT.parseDateTime(timestampString);
|
|
}
|
|
|
|
/**
|
|
* Converts a time (in TimeUnits since the epoch) into a numeric string that BigQuery understands
|
|
* as a timestamp: the decimal number of seconds since the epoch, precise up to microseconds.
|
|
*
|
|
* @see "https://developers.google.com/bigquery/timestamp"
|
|
*/
|
|
public static String toBigqueryTimestamp(long timestamp, TimeUnit unit) {
|
|
long seconds = unit.toSeconds(timestamp);
|
|
long fractionalSeconds = unit.toMicros(timestamp) % 1000000;
|
|
return String.format("%d.%06d", seconds, fractionalSeconds);
|
|
}
|
|
|
|
/**
|
|
* Converts a {@link DateTime} into a numeric string that BigQuery understands as a timestamp:
|
|
* the decimal number of seconds since the epoch, precise up to microseconds.
|
|
*
|
|
* <p>Note that since {@code DateTime} only stores milliseconds, the last 3 digits will be zero.
|
|
*
|
|
* @see "https://developers.google.com/bigquery/timestamp"
|
|
*/
|
|
public static String toBigqueryTimestamp(DateTime dateTime) {
|
|
return toBigqueryTimestamp(dateTime.getMillis(), TimeUnit.MILLISECONDS);
|
|
}
|
|
|
|
/**
|
|
* Returns the canonical string format for a JobReference object (the project ID and then job ID,
|
|
* delimited by a single colon) since JobReference.toString() is not customized to return it.
|
|
*/
|
|
public static String toJobReferenceString(JobReference jobRef) {
|
|
return jobRef.getProjectId() + ":" + jobRef.getJobId();
|
|
}
|
|
}
|