From cb215adac3f188bb75145108b6139fa873f6ae70 Mon Sep 17 00:00:00 2001 From: ctingue Date: Thu, 9 Feb 2017 10:11:22 -0800 Subject: [PATCH] Cut over to synthetic OneTime billing events ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=147048115 --- .../tools/MakeBillingTablesCommand.java | 34 ---- .../registry/tools/sql/billing_data_view.sql | 92 ++------- .../tools/sql/premium_list_data_view.sql | 74 ------- .../tools/sql/recurring_event_data_view.sql | 183 ------------------ 4 files changed, 20 insertions(+), 363 deletions(-) delete mode 100644 java/google/registry/tools/sql/premium_list_data_view.sql delete mode 100644 java/google/registry/tools/sql/recurring_event_data_view.sql diff --git a/java/google/registry/tools/MakeBillingTablesCommand.java b/java/google/registry/tools/MakeBillingTablesCommand.java index 091e1b47c..b51d1d0fe 100644 --- a/java/google/registry/tools/MakeBillingTablesCommand.java +++ b/java/google/registry/tools/MakeBillingTablesCommand.java @@ -49,8 +49,6 @@ final class MakeBillingTablesCommand extends BigqueryCommand { private static final SqlTemplate REGISTRY_DATA_SQL = getSql("registry_data_view.sql"); private static final SqlTemplate CREDIT_DATA_SQL = getSql("credit_data_view.sql"); private static final SqlTemplate CREDIT_BALANCE_DATA_SQL = getSql("credit_balance_data_view.sql"); - private static final SqlTemplate PREMIUM_LIST_DATA_SQL = getSql("premium_list_data_view.sql"); - private static final SqlTemplate RECURRING_DATA_SQL = getSql("recurring_event_data_view.sql"); private static final SqlTemplate BILLING_DATA_SQL = getSql("billing_data_view.sql"); /** Runs the main billing table/view creation logic. */ @@ -66,8 +64,6 @@ final class MakeBillingTablesCommand extends BigqueryCommand { makeRegistryView(); makeCreditView(); makeCreditBalanceView(); - makePremiumListView(); - makeRecurringEventView(); makeBillingView(); } catch (TableCreationException e) { // Swallow since we already will have printed an error message. @@ -161,36 +157,6 @@ final class MakeBillingTablesCommand extends BigqueryCommand { .build())); } - /** Generates a view of premium list data for each TLD. */ - private void makePremiumListView() throws Exception { - handleTableCreation( - "premium list data view", - bigquery().query( - PREMIUM_LIST_DATA_SQL - .put("SOURCE_DATASET", sourceDatasetId) - .put("DEST_DATASET", bigquery().getDatasetId()) - .build(), - bigquery().buildDestinationTable("PremiumListData") - .description("Synthetic view of premium list data.") - .type(TableType.VIEW) - .build())); - } - - /** Generates a view of recurring billing events expanded into individual recurrences. */ - private void makeRecurringEventView() throws Exception { - handleTableCreation( - "recurring event data view", - bigquery().query( - RECURRING_DATA_SQL - .put("SOURCE_DATASET", sourceDatasetId) - .put("DEST_DATASET", bigquery().getDatasetId()) - .build(), - bigquery().buildDestinationTable("RecurringEventData") - .description("Synthetic view of recurring billing event recurrence data.") - .type(TableType.VIEW) - .build())); - } - /** * Generates a view of consolidated billing information that includes currency conversions, * registrar details, and cancellation flags on top of the original BillingEvent.OneTime data. diff --git a/java/google/registry/tools/sql/billing_data_view.sql b/java/google/registry/tools/sql/billing_data_view.sql index 24f85a3c2..5a8a7f14d 100644 --- a/java/google/registry/tools/sql/billing_data_view.sql +++ b/java/google/registry/tools/sql/billing_data_view.sql @@ -14,8 +14,7 @@ -- Billing Data View SQL -- --- This query post-processes the OneTime billing events, combines them with --- synthetic OneTime events generated from Recurring events, and then annotates +-- This query post-processes the OneTime billing events and then annotates -- the resulting data with additional information from the Registrar, -- DomainBase, Currency, and Cancellation tables. SELECT @@ -40,18 +39,15 @@ SELECT Cancellation.cancellationId IS NOT NULL AS cancelled, Cancellation.cancellationTime AS cancellationTime, FROM ( - -- Subquery for common processing shared across OneTime and Recurring data, - -- which is parsing the string cost into currency and amountMinor. SELECT - id, - kind, -- Needed for joining Cancellations correctly. + __key__.id AS id, billingTime, eventTime, clientId, tld, reason, targetId, - domainRepoId, + REGEXP_EXTRACT(__key__.path, '"DomainBase", "([^"]+)"') AS domainRepoId, periodYears, cost, -- TODO(b/19031545): Find cleaner way to parse out currency and amount. @@ -61,62 +57,23 @@ FROM ( -- (i.e. currency, space, and period) and then converting to integer. INTEGER(REGEXP_REPLACE(cost, r'\D+', '')) AS amountMinor, -- Convert repeated flags field into flat comma-delimited string field. - flags, + GROUP_CONCAT(flags) WITHIN RECORD AS flags, + -- Cancellations for recurring events will point to the recurring event's + -- key, which is stored in cancellationMatchingBillingEvent. The path + -- contains kind, id, and domainRepoId, all of which must match, so just + -- use the path. + COALESCE(cancellationMatchingBillingEvent.path, __key__.path) + AS cancellationMatchingPath, FROM ( - -- Extract OneTime records from raw snapshot data. SELECT - __key__.id AS id, - __key__.kind AS kind, - billingTime, - eventTime, - clientId, - tld, - reason, - targetId, - -- TODO(b/20828509): see if this can be expressed more cleanly. - REGEXP_EXTRACT(__key__.path, '"DomainBase", "([^"]+)"') AS domainRepoId, - periodYears, - cost, - GROUP_CONCAT(flags) WITHIN RECORD AS flags, - FROM ( - SELECT - *, - -- TODO(b/20828509): make this robust to multi-part TLDS. - LAST(SPLIT(targetId, '.')) AS tld - FROM - [%SOURCE_DATASET%.OneTime] - WHERE - -- Filter out Registry 1.0 data - TODO(b/20828509): remove this. - __key__.namespace = '') - WHERE - -- Filter out prober data. - tld IN - (SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL') - -- TODO(b/27562876): Filter out synthetic OneTime events until we - -- verify that expanded OneTime events via MapReduce are correct and - -- complete. - AND syntheticCreationTime IS NULL - ), ( - -- Extract synthetic recurring events from view of Recurring data. - -- - -- TODO(b/27562876): Drop this section of the query once we verify - -- that expanded OneTime events via MapReduce are correct and complete. - SELECT - id, - kind, - billingTime, - eventTime, - clientId, - tld, - reason, - targetId, - domainRepoId, - periodYears, - cost, - flags, - FROM - [%DEST_DATASET%.RecurringEventData] - ) + *, + -- TODO(b/20828509): make this robust to multi-part TLDS. + LAST(SPLIT(targetId, '.')) AS tld + FROM [%SOURCE_DATASET%.OneTime]) + WHERE + -- Filter out prober data. + tld IN + (SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL') ) AS BillingEvent -- Join to pick up billing ID from registrar table. @@ -136,12 +93,7 @@ LEFT JOIN EACH ( __key__.id AS cancellationId, -- Coalesce matching fields from refOneTime and refRecurring (only one or -- the other will ever be populated) for joining against referenced event. - COALESCE(refOneTime.kind, refRecurring.kind) AS cancelledEventKind, - COALESCE(refOneTime.id, refRecurring.id) AS cancelledEventId, - -- TODO(b/20828509): see if this can be expressed more cleanly. - REGEXP_EXTRACT( - COALESCE(refOneTime.path, refRecurring.path), - '"DomainBase", "([^"]+)"') AS cancelledEventDomainRepoId, + COALESCE(refOneTime.path, refRecurring.path) AS cancelledEventPath, eventTime AS cancellationTime, billingTime AS cancellationBillingTime, FROM ( @@ -160,11 +112,7 @@ LEFT JOIN EACH ( (SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL') ) AS Cancellation ON - BillingEvent.kind = Cancellation.cancelledEventKind - AND BillingEvent.id = Cancellation.cancelledEventId - -- Note: we need to include the repoId here to handle old pre-Registry-2.0 - -- billing events that would have had ID collisions across TLDs. - AND BillingEvent.domainRepoId = Cancellation.cancelledEventDomainRepoId + BillingEvent.cancellationMatchingPath = Cancellation.cancelledEventPath -- Require billing times to match so that cancellations for Recurring events -- only apply to the specific recurrence being cancelled. AND BillingEvent.billingTime = Cancellation.cancellationBillingTime diff --git a/java/google/registry/tools/sql/premium_list_data_view.sql b/java/google/registry/tools/sql/premium_list_data_view.sql deleted file mode 100644 index b1c3e49d4..000000000 --- a/java/google/registry/tools/sql/premium_list_data_view.sql +++ /dev/null @@ -1,74 +0,0 @@ --- Copyright 2017 The Nomulus Authors. All Rights Reserved. --- --- Licensed under the Apache License, Version 2.0 (the "License"); --- you may not use this file except in compliance with the License. --- You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. - --- Premium List Data View SQL --- --- This query generates a table of all current premium list data that --- is active on a TLD in RegistryData. -SELECT - listName, - tld, - label, - CONCAT(label, '.', tld) AS domain, - price, -FROM ( - -- Join the PremiumList info with RegistryData so that we can properly map - -- a given TLD to a given PremiumList (even though by convention the list - -- name equals the TLD name, this is not guaranteed). This effectively - -- produces one PremiumList copy for each TLD where it's the active list. - SELECT - listName, - tld, - -- We use the revision that's active on the PremiumList to determine - -- which PremiumListEntry entries constitute the current list data. - -- Since backups are only eventually consistent, this could bite us - -- if a PremiumList's data is ever replaced and the full replacement - -- entry-wise doesn't make it into the backup snapshot. - -- TODO(b/21445712): Figure out a way to avoid this issue? - revisionKeyPath, - FROM ( - SELECT - __key__.name AS listName, - __key__.path AS keyPath, - revisionKey.path AS revisionKeyPath, - FROM - [%SOURCE_DATASET%.PremiumList] - ) AS PremiumList - JOIN ( - SELECT - tld, - premiumListPath, - FROM - [%DEST_DATASET%.RegistryData] - ) AS RegistryData - ON - PremiumList.keyPath = RegistryData.premiumListPath - ) AS PremiumList --- Left join against the entries and pick up those parented on the active --- revision that we got from PremiumList. -LEFT JOIN EACH ( - SELECT - REGEXP_EXTRACT(__key__.path, '(.+), "PremiumListEntry", .*') - AS parentKeyPath, - __key__.name AS label, - price, - FROM - [%SOURCE_DATASET%.PremiumListEntry] - ) AS PremiumListEntry -ON - PremiumList.revisionKeyPath = PremiumListEntry.parentKeyPath -ORDER BY - listName, - tld, - label diff --git a/java/google/registry/tools/sql/recurring_event_data_view.sql b/java/google/registry/tools/sql/recurring_event_data_view.sql deleted file mode 100644 index 3d679e96d..000000000 --- a/java/google/registry/tools/sql/recurring_event_data_view.sql +++ /dev/null @@ -1,183 +0,0 @@ --- Copyright 2017 The Nomulus Authors. All Rights Reserved. --- --- Licensed under the Apache License, Version 2.0 (the "License"); --- you may not use this file except in compliance with the License. --- You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. - --- Recurring Billing Event Data View SQL --- --- This query expands Recurring billing events into individual rows for each --- recurrence of the event that has happened up to the time the query is run. --- Since the only Recurring events are for automatic renewals, this means each --- row stands in for the renewal OneTime that would be created for a given --- Recurring event if it were materialized as individual explicit renewals. --- --- TODO(b/27562876): Drop this query once we begin materializing Recurring --- billing events as synthetic OneTime events. -SELECT - id, - kind, -- Needed for joining Cancellation information downstream. - -- Add the appropriate autorenew grace period length for this TLD to determine - -- the time at which this auto-renewal should be billed. - DATE_ADD(eventTime, registry.autorenewGracePeriodSeconds, 'SECOND') - AS billingTime, - eventTime, - clientId, - recurrence.tld AS tld, - 'RENEW' AS reason, -- These are always auto-renewals, so use RENEW. - targetId, - domainRepoId, - 1 AS periodYears, -- Auto-renewals are always for 1 year. - -- Use the premium price if one's configured in the current list, otherwise - -- fall back to the TLD-wide standard renewal cost. - IF(premium.price IS NOT NULL, premium.price, renewCost) AS cost, - flags, -FROM ( - -- Nested query to compute eventTime of this recurrence so it's available to - -- the JOIN against RegistryData. - SELECT - id, - kind, - tld, - -- Construct the event time for this recurrence by adding years to the - -- Recurring event's original event time. Leap years are not an issue since - -- we ensure that domains never expire (and hence never start to auto-renew) - -- on Feb. 29th, so the recurrence time (as month, day, and millis of day) - -- will always be preserved if we add whole numbers of years. - DATE_ADD(eventTime, yearsToAdd, 'YEAR') AS eventTime, - targetId, - domainRepoId, - clientId, - reason, - recurrenceEndTime, - flags, - FROM - -- Nested flattened query that expands single Recurring rows into multiple - -- rows for each recurrrence of that Recurring event. It does this by - -- computing the number of recurrences to generate, constructing a repeated - -- field with that cardinality, and then flattening that repeated field. - FLATTEN( - ( - SELECT - id, - kind, - tld, - eventTime, - targetId, - domainRepoId, - clientId, - reason, - recurrenceEndTime, - flags, - -- Make a repeated field with N elements (N = "numRecurrenceYears") by - -- padding a string to length N, using the SPLIT() function with the - -- empty string delimiter to generate a repeated field with one element - -- per character, and then using POSITION() to convert that field into - -- one whose values are numeric ranging from 0 to N - 1. - -- TODO(b/20829992): replace this hack with a UDF? - POSITION(SPLIT(RPAD('', numRecurrenceYears, '.'), '')) - 1 - AS yearsToAdd, - FROM ( - -- Nested query to compute the number of recurrences in scope for each - -- Recurring event, to pass to the outer row-expansion query. - SELECT - __key__.id AS id, - __key__.kind AS kind, - -- TODO(b/20828509): make this robust to multi-part TLDs. - LAST(SPLIT(targetId, '.')) AS tld, - eventTime, - targetId, - -- TODO(b/20828509): see if this can be expressed more cleanly. - REGEXP_EXTRACT(__key__.path, '"DomainBase", "([^"]+)"') AS domainRepoId, - clientId, - reason, - recurrenceEndTime, - -- The number of recurrences we generate is one per year for every - -- year from the event time (first recurrence) to the earlier of - -- either the current time or the end time, inclusive. This may - -- generate an extra recurrence at the end (if the recurrence end - -- time is earlier in the year than the recurrence) but we filter - -- it out below. - GREATEST( - 0, -- Bound below by zero. - LEAST(YEAR(CURRENT_TIMESTAMP()), YEAR(recurrenceEndTime)) - - YEAR(eventTime) + 1 - ) AS numRecurrenceYears, - -- Note: there has never been a Recurring event with a flag to date, - -- so the flags column does not exist for the Recurring table. If - -- we ever want to use real Recurring flags, this line would look - -- like "GROUP_CONCAT_UNQUOTED(flags) WITHIN RECORD AS flags". - -- Create a synthetic AUTO_RENEW flag to differentiate this record - -- from a manual renewal. - 'AUTO_RENEW' AS flags, - FROM - [%SOURCE_DATASET%.Recurring] - WHERE - -- Filter out Registry 1.0 data - TODO(b/20828509): remove this. - __key__.namespace = '' - ) - WHERE - -- Filter out prober data. - tld IN - (SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL') - -- Exclude recurring events that would generate zero recurrences, - -- which are those with an event time beyond the current year. - AND numRecurrenceYears > 0 - ), - -- Second argument to FLATTEN(), explained above. - yearsToAdd) - ) AS recurrence - --- Join recurrence information by TLD to policy information for that TLD that --- determines recurrence properties - grace period length and renewal cost. -LEFT OUTER JOIN ( - SELECT - tld, - -- TODO(b/20764952): If we ever want to have more than one renew billing - -- cost, the logic here will need to pick the right cost based on the - -- eventTime value of the recurrence. - renewBillingCost AS renewCost, - autorenewGracePeriodSeconds, - FROM - [%DEST_DATASET%.RegistryData] - ) AS registry -ON - recurrence.tld = registry.tld - --- Join recurrence information by SLD to premium pricing information for that --- SLD, taken from the currently active premium list. Note that this doesn't --- account in any way for a premium list that changes over time; we always use --- the current list, regardless of when the recurrence is happening. --- TODO(b/21445712): Make this reflect the pricing at the recurrence times. -LEFT JOIN EACH ( - SELECT - domain, - price, - FROM - [%DEST_DATASET%.PremiumListData] - ) AS premium -ON - recurrence.targetId = premium.domain - -WHERE - -- Filter out generated recurrences where the event time of that recurrence - -- is at or after the end time of the recurring event (this handles the case - -- of generating an extra final recurrence, described above). - eventTime < recurrenceEndTime - -- Restrict this view to just recurrences that have "happened" by the time - -- the query is executed. - AND eventTime <= CURRENT_TIMESTAMP() -ORDER BY - -- Show the newest recurrences first, then break ties by ID and TLD, like - -- we do for BillingData. - billingTime DESC, - id, - tld