Cut over to synthetic OneTime billing events

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=147048115
This commit is contained in:
ctingue 2017-02-09 10:11:22 -08:00 committed by Ben McIlwain
parent 090983cc7f
commit cb215adac3
4 changed files with 20 additions and 363 deletions

View file

@ -49,8 +49,6 @@ final class MakeBillingTablesCommand extends BigqueryCommand {
private static final SqlTemplate REGISTRY_DATA_SQL = getSql("registry_data_view.sql");
private static final SqlTemplate CREDIT_DATA_SQL = getSql("credit_data_view.sql");
private static final SqlTemplate CREDIT_BALANCE_DATA_SQL = getSql("credit_balance_data_view.sql");
private static final SqlTemplate PREMIUM_LIST_DATA_SQL = getSql("premium_list_data_view.sql");
private static final SqlTemplate RECURRING_DATA_SQL = getSql("recurring_event_data_view.sql");
private static final SqlTemplate BILLING_DATA_SQL = getSql("billing_data_view.sql");
/** Runs the main billing table/view creation logic. */
@ -66,8 +64,6 @@ final class MakeBillingTablesCommand extends BigqueryCommand {
makeRegistryView();
makeCreditView();
makeCreditBalanceView();
makePremiumListView();
makeRecurringEventView();
makeBillingView();
} catch (TableCreationException e) {
// Swallow since we already will have printed an error message.
@ -161,36 +157,6 @@ final class MakeBillingTablesCommand extends BigqueryCommand {
.build()));
}
/** Generates a view of premium list data for each TLD. */
private void makePremiumListView() throws Exception {
handleTableCreation(
"premium list data view",
bigquery().query(
PREMIUM_LIST_DATA_SQL
.put("SOURCE_DATASET", sourceDatasetId)
.put("DEST_DATASET", bigquery().getDatasetId())
.build(),
bigquery().buildDestinationTable("PremiumListData")
.description("Synthetic view of premium list data.")
.type(TableType.VIEW)
.build()));
}
/** Generates a view of recurring billing events expanded into individual recurrences. */
private void makeRecurringEventView() throws Exception {
handleTableCreation(
"recurring event data view",
bigquery().query(
RECURRING_DATA_SQL
.put("SOURCE_DATASET", sourceDatasetId)
.put("DEST_DATASET", bigquery().getDatasetId())
.build(),
bigquery().buildDestinationTable("RecurringEventData")
.description("Synthetic view of recurring billing event recurrence data.")
.type(TableType.VIEW)
.build()));
}
/**
* Generates a view of consolidated billing information that includes currency conversions,
* registrar details, and cancellation flags on top of the original BillingEvent.OneTime data.

View file

@ -14,8 +14,7 @@
-- Billing Data View SQL
--
-- This query post-processes the OneTime billing events, combines them with
-- synthetic OneTime events generated from Recurring events, and then annotates
-- This query post-processes the OneTime billing events and then annotates
-- the resulting data with additional information from the Registrar,
-- DomainBase, Currency, and Cancellation tables.
SELECT
@ -40,18 +39,15 @@ SELECT
Cancellation.cancellationId IS NOT NULL AS cancelled,
Cancellation.cancellationTime AS cancellationTime,
FROM (
-- Subquery for common processing shared across OneTime and Recurring data,
-- which is parsing the string cost into currency and amountMinor.
SELECT
id,
kind, -- Needed for joining Cancellations correctly.
__key__.id AS id,
billingTime,
eventTime,
clientId,
tld,
reason,
targetId,
domainRepoId,
REGEXP_EXTRACT(__key__.path, '"DomainBase", "([^"]+)"') AS domainRepoId,
periodYears,
cost,
-- TODO(b/19031545): Find cleaner way to parse out currency and amount.
@ -61,62 +57,23 @@ FROM (
-- (i.e. currency, space, and period) and then converting to integer.
INTEGER(REGEXP_REPLACE(cost, r'\D+', '')) AS amountMinor,
-- Convert repeated flags field into flat comma-delimited string field.
flags,
GROUP_CONCAT(flags) WITHIN RECORD AS flags,
-- Cancellations for recurring events will point to the recurring event's
-- key, which is stored in cancellationMatchingBillingEvent. The path
-- contains kind, id, and domainRepoId, all of which must match, so just
-- use the path.
COALESCE(cancellationMatchingBillingEvent.path, __key__.path)
AS cancellationMatchingPath,
FROM (
-- Extract OneTime records from raw snapshot data.
SELECT
__key__.id AS id,
__key__.kind AS kind,
billingTime,
eventTime,
clientId,
tld,
reason,
targetId,
-- TODO(b/20828509): see if this can be expressed more cleanly.
REGEXP_EXTRACT(__key__.path, '"DomainBase", "([^"]+)"') AS domainRepoId,
periodYears,
cost,
GROUP_CONCAT(flags) WITHIN RECORD AS flags,
FROM (
SELECT
*,
-- TODO(b/20828509): make this robust to multi-part TLDS.
LAST(SPLIT(targetId, '.')) AS tld
FROM
[%SOURCE_DATASET%.OneTime]
WHERE
-- Filter out Registry 1.0 data - TODO(b/20828509): remove this.
__key__.namespace = '')
WHERE
-- Filter out prober data.
tld IN
(SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL')
-- TODO(b/27562876): Filter out synthetic OneTime events until we
-- verify that expanded OneTime events via MapReduce are correct and
-- complete.
AND syntheticCreationTime IS NULL
), (
-- Extract synthetic recurring events from view of Recurring data.
--
-- TODO(b/27562876): Drop this section of the query once we verify
-- that expanded OneTime events via MapReduce are correct and complete.
SELECT
id,
kind,
billingTime,
eventTime,
clientId,
tld,
reason,
targetId,
domainRepoId,
periodYears,
cost,
flags,
FROM
[%DEST_DATASET%.RecurringEventData]
)
*,
-- TODO(b/20828509): make this robust to multi-part TLDS.
LAST(SPLIT(targetId, '.')) AS tld
FROM [%SOURCE_DATASET%.OneTime])
WHERE
-- Filter out prober data.
tld IN
(SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL')
) AS BillingEvent
-- Join to pick up billing ID from registrar table.
@ -136,12 +93,7 @@ LEFT JOIN EACH (
__key__.id AS cancellationId,
-- Coalesce matching fields from refOneTime and refRecurring (only one or
-- the other will ever be populated) for joining against referenced event.
COALESCE(refOneTime.kind, refRecurring.kind) AS cancelledEventKind,
COALESCE(refOneTime.id, refRecurring.id) AS cancelledEventId,
-- TODO(b/20828509): see if this can be expressed more cleanly.
REGEXP_EXTRACT(
COALESCE(refOneTime.path, refRecurring.path),
'"DomainBase", "([^"]+)"') AS cancelledEventDomainRepoId,
COALESCE(refOneTime.path, refRecurring.path) AS cancelledEventPath,
eventTime AS cancellationTime,
billingTime AS cancellationBillingTime,
FROM (
@ -160,11 +112,7 @@ LEFT JOIN EACH (
(SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL')
) AS Cancellation
ON
BillingEvent.kind = Cancellation.cancelledEventKind
AND BillingEvent.id = Cancellation.cancelledEventId
-- Note: we need to include the repoId here to handle old pre-Registry-2.0
-- billing events that would have had ID collisions across TLDs.
AND BillingEvent.domainRepoId = Cancellation.cancelledEventDomainRepoId
BillingEvent.cancellationMatchingPath = Cancellation.cancelledEventPath
-- Require billing times to match so that cancellations for Recurring events
-- only apply to the specific recurrence being cancelled.
AND BillingEvent.billingTime = Cancellation.cancellationBillingTime

View file

@ -1,74 +0,0 @@
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Premium List Data View SQL
--
-- This query generates a table of all current premium list data that
-- is active on a TLD in RegistryData.
SELECT
listName,
tld,
label,
CONCAT(label, '.', tld) AS domain,
price,
FROM (
-- Join the PremiumList info with RegistryData so that we can properly map
-- a given TLD to a given PremiumList (even though by convention the list
-- name equals the TLD name, this is not guaranteed). This effectively
-- produces one PremiumList copy for each TLD where it's the active list.
SELECT
listName,
tld,
-- We use the revision that's active on the PremiumList to determine
-- which PremiumListEntry entries constitute the current list data.
-- Since backups are only eventually consistent, this could bite us
-- if a PremiumList's data is ever replaced and the full replacement
-- entry-wise doesn't make it into the backup snapshot.
-- TODO(b/21445712): Figure out a way to avoid this issue?
revisionKeyPath,
FROM (
SELECT
__key__.name AS listName,
__key__.path AS keyPath,
revisionKey.path AS revisionKeyPath,
FROM
[%SOURCE_DATASET%.PremiumList]
) AS PremiumList
JOIN (
SELECT
tld,
premiumListPath,
FROM
[%DEST_DATASET%.RegistryData]
) AS RegistryData
ON
PremiumList.keyPath = RegistryData.premiumListPath
) AS PremiumList
-- Left join against the entries and pick up those parented on the active
-- revision that we got from PremiumList.
LEFT JOIN EACH (
SELECT
REGEXP_EXTRACT(__key__.path, '(.+), "PremiumListEntry", .*')
AS parentKeyPath,
__key__.name AS label,
price,
FROM
[%SOURCE_DATASET%.PremiumListEntry]
) AS PremiumListEntry
ON
PremiumList.revisionKeyPath = PremiumListEntry.parentKeyPath
ORDER BY
listName,
tld,
label

View file

@ -1,183 +0,0 @@
-- Copyright 2017 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Recurring Billing Event Data View SQL
--
-- This query expands Recurring billing events into individual rows for each
-- recurrence of the event that has happened up to the time the query is run.
-- Since the only Recurring events are for automatic renewals, this means each
-- row stands in for the renewal OneTime that would be created for a given
-- Recurring event if it were materialized as individual explicit renewals.
--
-- TODO(b/27562876): Drop this query once we begin materializing Recurring
-- billing events as synthetic OneTime events.
SELECT
id,
kind, -- Needed for joining Cancellation information downstream.
-- Add the appropriate autorenew grace period length for this TLD to determine
-- the time at which this auto-renewal should be billed.
DATE_ADD(eventTime, registry.autorenewGracePeriodSeconds, 'SECOND')
AS billingTime,
eventTime,
clientId,
recurrence.tld AS tld,
'RENEW' AS reason, -- These are always auto-renewals, so use RENEW.
targetId,
domainRepoId,
1 AS periodYears, -- Auto-renewals are always for 1 year.
-- Use the premium price if one's configured in the current list, otherwise
-- fall back to the TLD-wide standard renewal cost.
IF(premium.price IS NOT NULL, premium.price, renewCost) AS cost,
flags,
FROM (
-- Nested query to compute eventTime of this recurrence so it's available to
-- the JOIN against RegistryData.
SELECT
id,
kind,
tld,
-- Construct the event time for this recurrence by adding years to the
-- Recurring event's original event time. Leap years are not an issue since
-- we ensure that domains never expire (and hence never start to auto-renew)
-- on Feb. 29th, so the recurrence time (as month, day, and millis of day)
-- will always be preserved if we add whole numbers of years.
DATE_ADD(eventTime, yearsToAdd, 'YEAR') AS eventTime,
targetId,
domainRepoId,
clientId,
reason,
recurrenceEndTime,
flags,
FROM
-- Nested flattened query that expands single Recurring rows into multiple
-- rows for each recurrrence of that Recurring event. It does this by
-- computing the number of recurrences to generate, constructing a repeated
-- field with that cardinality, and then flattening that repeated field.
FLATTEN(
(
SELECT
id,
kind,
tld,
eventTime,
targetId,
domainRepoId,
clientId,
reason,
recurrenceEndTime,
flags,
-- Make a repeated field with N elements (N = "numRecurrenceYears") by
-- padding a string to length N, using the SPLIT() function with the
-- empty string delimiter to generate a repeated field with one element
-- per character, and then using POSITION() to convert that field into
-- one whose values are numeric ranging from 0 to N - 1.
-- TODO(b/20829992): replace this hack with a UDF?
POSITION(SPLIT(RPAD('', numRecurrenceYears, '.'), '')) - 1
AS yearsToAdd,
FROM (
-- Nested query to compute the number of recurrences in scope for each
-- Recurring event, to pass to the outer row-expansion query.
SELECT
__key__.id AS id,
__key__.kind AS kind,
-- TODO(b/20828509): make this robust to multi-part TLDs.
LAST(SPLIT(targetId, '.')) AS tld,
eventTime,
targetId,
-- TODO(b/20828509): see if this can be expressed more cleanly.
REGEXP_EXTRACT(__key__.path, '"DomainBase", "([^"]+)"') AS domainRepoId,
clientId,
reason,
recurrenceEndTime,
-- The number of recurrences we generate is one per year for every
-- year from the event time (first recurrence) to the earlier of
-- either the current time or the end time, inclusive. This may
-- generate an extra recurrence at the end (if the recurrence end
-- time is earlier in the year than the recurrence) but we filter
-- it out below.
GREATEST(
0, -- Bound below by zero.
LEAST(YEAR(CURRENT_TIMESTAMP()), YEAR(recurrenceEndTime))
- YEAR(eventTime) + 1
) AS numRecurrenceYears,
-- Note: there has never been a Recurring event with a flag to date,
-- so the flags column does not exist for the Recurring table. If
-- we ever want to use real Recurring flags, this line would look
-- like "GROUP_CONCAT_UNQUOTED(flags) WITHIN RECORD AS flags".
-- Create a synthetic AUTO_RENEW flag to differentiate this record
-- from a manual renewal.
'AUTO_RENEW' AS flags,
FROM
[%SOURCE_DATASET%.Recurring]
WHERE
-- Filter out Registry 1.0 data - TODO(b/20828509): remove this.
__key__.namespace = ''
)
WHERE
-- Filter out prober data.
tld IN
(SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL')
-- Exclude recurring events that would generate zero recurrences,
-- which are those with an event time beyond the current year.
AND numRecurrenceYears > 0
),
-- Second argument to FLATTEN(), explained above.
yearsToAdd)
) AS recurrence
-- Join recurrence information by TLD to policy information for that TLD that
-- determines recurrence properties - grace period length and renewal cost.
LEFT OUTER JOIN (
SELECT
tld,
-- TODO(b/20764952): If we ever want to have more than one renew billing
-- cost, the logic here will need to pick the right cost based on the
-- eventTime value of the recurrence.
renewBillingCost AS renewCost,
autorenewGracePeriodSeconds,
FROM
[%DEST_DATASET%.RegistryData]
) AS registry
ON
recurrence.tld = registry.tld
-- Join recurrence information by SLD to premium pricing information for that
-- SLD, taken from the currently active premium list. Note that this doesn't
-- account in any way for a premium list that changes over time; we always use
-- the current list, regardless of when the recurrence is happening.
-- TODO(b/21445712): Make this reflect the pricing at the recurrence times.
LEFT JOIN EACH (
SELECT
domain,
price,
FROM
[%DEST_DATASET%.PremiumListData]
) AS premium
ON
recurrence.targetId = premium.domain
WHERE
-- Filter out generated recurrences where the event time of that recurrence
-- is at or after the end time of the recurring event (this handles the case
-- of generating an extra final recurrence, described above).
eventTime < recurrenceEndTime
-- Restrict this view to just recurrences that have "happened" by the time
-- the query is executed.
AND eventTime <= CURRENT_TIMESTAMP()
ORDER BY
-- Show the newest recurrences first, then break ties by ID and TLD, like
-- we do for BillingData.
billingTime DESC,
id,
tld