mv com/google/domain/registry google/registry

This change renames directories in preparation for the great package
rename. The repository is now in a broken state because the code
itself hasn't been updated. However this should ensure that git
correctly preserves history for each file.
This commit is contained in:
Justine Tunney 2016-05-13 18:55:08 -04:00
parent a41677aea1
commit 5012893c1d
2396 changed files with 0 additions and 0 deletions

View file

@ -0,0 +1,38 @@
SELECT
modificationTime AS timestamp,
HistoryEntry.namespace AS tld,
clientId AS registrar,
type AS command,
CASE WHEN ReportingIdentifiers.kind = 'DomainBase' THEN 'DOMAIN'
WHEN ReportingIdentifiers.kind = 'HostResource' THEN 'HOST'
WHEN ReportingIdentifiers.kind = 'ContactResource' THEN 'CONTACT'
END AS resourceType,
ReportingIdentifiers.value AS resource,
trid.clientTransactionId,
trid.serverTransactionId,
period
FROM (
SELECT
type,
clientId,
modificationTime,
trid.clientTransactionId,
trid.serverTransactionId,
CASE WHEN period.value IS NOT NULL
THEN CONCAT(STRING(period.value), ' ', LOWER(period.unit))
END AS period,
__key__.namespace AS namespace,
REGEXP_EXTRACT(__key__.path, r'per-tld", "([^"]+)"') AS kind,
INTEGER(REGEXP_EXTRACT(__key__.path, r'per-tld", "[^"]+", (\d+)')) AS id
FROM
HistoryEntry
WHERE
clientId <> 'prober'
AND __key__.namespace <> 'test'
AND NOT __key__.namespace CONTAINS '.test'
AND NOT bySuperuser) AS HistoryEntry
JOIN
ReportingIdentifiers
ON ReportingIdentifiers.namespace = HistoryEntry.namespace
AND ReportingIdentifiers.kind = HistoryEntry.kind
AND ReportingIdentifiers.id = HistoryEntry.id

View file

@ -0,0 +1,30 @@
SELECT
namespace,
kind,
id,
value
FROM
(SELECT
__key__.namespace as namespace,
__key__.kind as kind,
__key__.id as id,
fullyQualifiedDomainName AS value
FROM
DomainBase),
(SELECT
__key__.namespace as namespace,
__key__.kind as kind,
__key__.id as id,
fullyQualifiedHostName AS value
FROM
HostResource),
(SELECT
__key__.namespace as namespace,
__key__.kind as kind,
__key__.id as id,
contactId AS value
FROM
ContactResource)
WHERE
namespace <> 'test'
AND NOT namespace CONTAINS '.test'

View file

@ -0,0 +1,175 @@
-- Billing Data View SQL
--
-- This query post-processes the OneTime billing events, combines them with
-- synthetic OneTime events generated from Recurring events, and then annotates
-- the resulting data with additional information from the Registrar,
-- DomainBase, Currency, and Cancellation tables.
SELECT
id,
BillingEvent.billingTime AS billingTime,
BillingEvent.eventTime AS eventTime,
BillingEvent.clientId AS registrarId,
Registrar.billingIdentifier AS billingId,
BillingEvent.tld AS tld,
IF(
CONCAT(',', BillingEvent.flags, ',') CONTAINS (',ALLOCATION,'),
'ALLOCATE',
BillingEvent.reason) AS action,
BillingEvent.targetId AS domain,
BillingEvent.domainRepoId AS repositoryId,
periodYears AS years,
BillingEvent.currency AS currency,
amountMinor,
REGEXP_EXTRACT(cost, ' (.+)') AS amountString,
ROUND(amountMinor * Currency.conversionToUsd, 2) AS estimatedUsd,
flags,
Cancellation.cancellationId IS NOT NULL AS cancelled,
Cancellation.cancellationTime AS cancellationTime,
FROM (
-- Subquery for common processing shared across OneTime and Recurring data,
-- which is parsing the string cost into currency and amountMinor.
SELECT
id,
kind, -- Needed for joining Cancellations correctly.
billingTime,
eventTime,
clientId,
tld,
reason,
targetId,
domainRepoId,
periodYears,
cost,
-- TODO(b/19031545): Find cleaner way to parse out currency and amount.
-- Parse out the currency code as the substring of 'cost' up to the space.
REGEXP_EXTRACT(cost, '(.+) ') AS currency,
-- Parse out the amount of minor units by stripping out non-digit chars
-- (i.e. currency, space, and period) and then converting to integer.
INTEGER(REGEXP_REPLACE(cost, r'\D+', '')) AS amountMinor,
-- Convert repeated flags field into flat comma-delimited string field.
flags,
FROM (
-- Extract OneTime records from raw snapshot data.
SELECT
__key__.id AS id,
__key__.kind AS kind,
billingTime,
eventTime,
clientId,
tld,
reason,
targetId,
-- TODO(b/20828509): see if this can be expressed more cleanly.
REGEXP_EXTRACT(__key__.path, '"DomainBase", "([^"]+)"') AS domainRepoId,
periodYears,
cost,
GROUP_CONCAT(flags) WITHIN RECORD AS flags,
FROM (
SELECT
*,
-- TODO(b/20828509): make this robust to multi-part TLDS.
LAST(SPLIT(targetId, '.')) AS tld
FROM
[%SOURCE_DATASET%.OneTime]
WHERE
-- Filter out Registry 1.0 data - TODO(b/20828509): remove this.
__key__.namespace = '')
WHERE
-- Filter out prober data.
tld IN
(SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL')
), (
-- Extract synthetic recurring events from view of Recurring data.
SELECT
id,
kind,
billingTime,
eventTime,
clientId,
tld,
reason,
targetId,
domainRepoId,
periodYears,
cost,
flags,
FROM
[%DEST_DATASET%.RecurringEventData]
)
) AS BillingEvent
-- Join to pick up billing ID from registrar table.
LEFT JOIN EACH (
SELECT
__key__.name AS clientId,
billingIdentifier,
FROM
[%SOURCE_DATASET%.Registrar]
) AS Registrar
ON
BillingEvent.clientId = Registrar.clientId
-- Join to pick up cancellations for billing events.
LEFT JOIN EACH (
SELECT
__key__.id AS cancellationId,
-- Coalesce matching fields from refOneTime and refRecurring (only one or
-- the other will ever be populated) for joining against referenced event.
COALESCE(refOneTime.kind, refRecurring.kind) AS cancelledEventKind,
COALESCE(refOneTime.id, refRecurring.id) AS cancelledEventId,
-- TODO(b/20828509): see if this can be expressed more cleanly.
REGEXP_EXTRACT(
COALESCE(refOneTime.path, refRecurring.path),
'"DomainBase", "([^"]+)"') AS cancelledEventDomainRepoId,
eventTime AS cancellationTime,
billingTime AS cancellationBillingTime,
FROM (
SELECT
*,
-- TODO(b/20828509): make this robust to multi-part TLDS.
LAST(SPLIT(targetId, '.')) AS tld
FROM
[%SOURCE_DATASET%.Cancellation]
WHERE
-- Filter out Registry 1.0 data - TODO(b/20828509): remove this.
__key__.namespace = '')
WHERE
-- Filter out prober data.
tld IN
(SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL')
) AS Cancellation
ON
BillingEvent.kind = Cancellation.cancelledEventKind
AND BillingEvent.id = Cancellation.cancelledEventId
-- Note: we need to include the repoId here to handle old pre-Registry-2.0
-- billing events that would have had ID collisions across TLDs.
AND BillingEvent.domainRepoId = Cancellation.cancelledEventDomainRepoId
-- Require billing times to match so that cancellations for Recurring events
-- only apply to the specific recurrence being cancelled.
AND BillingEvent.billingTime = Cancellation.cancellationBillingTime
-- Join to pick up currency conversion factor.
LEFT JOIN EACH (
SELECT
currency,
conversionToUsd,
FROM
[%DEST_DATASET%.Currency]
) AS Currency
ON
BillingEvent.currency = Currency.currency
WHERE
-- Filter down to whitelisted TLDs that are "billable".
-- TODO(b/18092292): determine this automatically.
BillingEvent.tld IN
(SELECT tld FROM FLATTEN((
-- %TLDS% is passed in as a comma-delimited string of TLDs.
SELECT SPLIT('%TLDS%') AS tld FROM (SELECT 1 as unused)), tld))
-- Sort rows to show the latest billed items first.
ORDER BY
billingTime DESC,
-- Break ties in billing time using ID then TLD, to be deterministic.
id,
tld

View file

@ -0,0 +1,47 @@
-- Credit Balance Data View SQL
--
-- This query post-processes RegistrarCreditBalance entities to collapse them
-- down to one 'true' amount per credit ID and effective time, eliminating any
-- duplicates by choosing the most recently written balance entry of the set,
-- and then joins these 'real' balances to the CreditData view.
--
-- The result is a list showing how each credit's balance has developed over
-- time, which can be used to find the actual balance of a given credit at
-- any particular point in time (e.g. the time when invoices are run).
SELECT
CreditData.registrarId AS registrarId,
CreditData.creditId AS creditId,
effectiveTime,
REGEXP_EXTRACT(amount, '(.+) ') AS currency,
INTEGER(REGEXP_REPLACE(amount, r'\D+', '')) AS amountMinor
FROM (
SELECT
creditId,
effectiveTime,
amount,
ROW_NUMBER() OVER (
PARTITION BY
creditId,
effectiveTime
ORDER BY
writtenTime DESC
) AS recencyRank
FROM (
SELECT
INTEGER(REGEXP_EXTRACT(__key__.path, '"RegistrarCredit", (.+?),'))
AS creditId,
effectiveTime,
writtenTime,
amount
FROM [%SOURCE_DATASET%.RegistrarCreditBalance]
)
) AS BalanceData
LEFT JOIN EACH
[%DEST_DATASET%.CreditData] AS CreditData
ON
BalanceData.creditId = CreditData.creditId
WHERE
recencyRank = 1
ORDER BY
creditId,
effectiveTime

View file

@ -0,0 +1,38 @@
-- Credit Data View SQL
--
-- This query post-processes RegistrarCredit entities and joins them to the
-- corresponding owning Registrar entities. The join is mostly a no-op, but
-- it does ensure that if extracting the registrar parent from a credit fails
-- then this view will indicate that by showing the registrar ID as null.
-- TODO(b/19031915): add optional sanity-checking for that type of invariant.
SELECT
Registrar.registrarId AS registrarId,
RegistrarCredit.__key__.id AS creditId,
RegistrarCredit.type AS type,
CreditType.priority AS typePriority,
creationTime,
currency,
description,
tld,
FROM (
SELECT
*,
REGEXP_EXTRACT(__key__.path, '"Registrar", "(.+?)"') AS registrarId
FROM
[%SOURCE_DATASET%.RegistrarCredit]
) AS RegistrarCredit
LEFT JOIN
(SELECT registrarId FROM [%DEST_DATASET%.RegistrarData]) AS Registrar
ON
RegistrarCredit.registrarId = Registrar.registrarId
LEFT JOIN (
-- TODO(b/19031546): Generate this table from the CreditType enum.
SELECT * FROM
(SELECT 'AUCTION' AS type, 1 AS priority),
(SELECT 'PROMOTION' AS type, 2 AS priority),
) AS CreditType
ON
RegistrarCredit.type = CreditType.type
ORDER BY
creationTime,
creditId

View file

@ -0,0 +1,8 @@
-- Currency Table Creation SQL
--
-- This query generates a static table of currency information.
SELECT
currency, conversionToUsd, exponent
FROM
(SELECT 'JPY' AS currency, 0.0098 AS conversionToUsd, 0 AS exponent),
(SELECT 'USD' AS currency, 0.0100 AS conversionToUsd, 2 AS exponent)

View file

@ -0,0 +1,60 @@
-- Premium List Data View SQL
--
-- This query generates a table of all current premium list data that
-- is active on a TLD in RegistryData.
SELECT
listName,
tld,
label,
CONCAT(label, '.', tld) AS domain,
price,
FROM (
-- Join the PremiumList info with RegistryData so that we can properly map
-- a given TLD to a given PremiumList (even though by convention the list
-- name equals the TLD name, this is not guaranteed). This effectively
-- produces one PremiumList copy for each TLD where it's the active list.
SELECT
listName,
tld,
-- We use the revision that's active on the PremiumList to determine
-- which PremiumListEntry entries constitute the current list data.
-- Since backups are only eventually consistent, this could bite us
-- if a PremiumList's data is ever replaced and the full replacement
-- entry-wise doesn't make it into the backup snapshot.
-- TODO(b/21445712): Figure out a way to avoid this issue?
revisionKeyPath,
FROM (
SELECT
__key__.name AS listName,
__key__.path AS keyPath,
revisionKey.path AS revisionKeyPath,
FROM
[%SOURCE_DATASET%.PremiumList]
) AS PremiumList
JOIN (
SELECT
tld,
premiumListPath,
FROM
[%DEST_DATASET%.RegistryData]
) AS RegistryData
ON
PremiumList.keyPath = RegistryData.premiumListPath
) AS PremiumList
-- Left join against the entries and pick up those parented on the active
-- revision that we got from PremiumList.
LEFT JOIN EACH (
SELECT
REGEXP_EXTRACT(__key__.path, '(.+), "PremiumListEntry", .*')
AS parentKeyPath,
__key__.name AS label,
price,
FROM
[%SOURCE_DATASET%.PremiumListEntry]
) AS PremiumListEntry
ON
PremiumList.revisionKeyPath = PremiumListEntry.parentKeyPath
ORDER BY
listName,
tld,
label

View file

@ -0,0 +1,166 @@
-- Recurring Billing Event Data View SQL
--
-- This query expands Recurring billing events into individual rows for each
-- recurrence of the event that has happened up to the time the query is run.
-- Since the only Recurring events are for automatic renewals, this means each
-- row stands in for the renewal OneTime that would be created for a given
-- Recurring event if it were materialized as individual explicit renewals.
SELECT
id,
kind, -- Needed for joining Cancellation information downstream.
-- Add the appropriate autorenew grace period length for this TLD to determine
-- the time at which this auto-renewal should be billed.
DATE_ADD(eventTime, registry.autorenewGracePeriodSeconds, 'SECOND')
AS billingTime,
eventTime,
clientId,
recurrence.tld AS tld,
'RENEW' AS reason, -- These are always auto-renewals, so use RENEW.
targetId,
domainRepoId,
1 AS periodYears, -- Auto-renewals are always for 1 year.
-- Use the premium price if one's configured in the current list, otherwise
-- fall back to the TLD-wide standard renewal cost.
IF(premium.price IS NOT NULL, premium.price, renewCost) AS cost,
flags,
FROM (
-- Nested query to compute eventTime of this recurrence so it's available to
-- the JOIN against RegistryData.
SELECT
id,
kind,
tld,
-- Construct the event time for this recurrence by adding years to the
-- Recurring event's original event time. Leap years are not an issue since
-- we ensure that domains never expire (and hence never start to auto-renew)
-- on Feb. 29th, so the recurrence time (as month, day, and millis of day)
-- will always be preserved if we add whole numbers of years.
DATE_ADD(eventTime, yearsToAdd, 'YEAR') AS eventTime,
targetId,
domainRepoId,
clientId,
reason,
recurrenceEndTime,
flags,
FROM
-- Nested flattened query that expands single Recurring rows into multiple
-- rows for each recurrrence of that Recurring event. It does this by
-- computing the number of recurrences to generate, constructing a repeated
-- field with that cardinality, and then flattening that repeated field.
FLATTEN(
(
SELECT
id,
kind,
tld,
eventTime,
targetId,
domainRepoId,
clientId,
reason,
recurrenceEndTime,
flags,
-- Make a repeated field with N elements (N = "numRecurrenceYears") by
-- padding a string to length N, using the SPLIT() function with the
-- empty string delimiter to generate a repeated field with one element
-- per character, and then using POSITION() to convert that field into
-- one whose values are numeric ranging from 0 to N - 1.
-- TODO(b/20829992): replace this hack with a UDF?
POSITION(SPLIT(RPAD('', numRecurrenceYears, '.'), '')) - 1
AS yearsToAdd,
FROM (
-- Nested query to compute the number of recurrences in scope for each
-- Recurring event, to pass to the outer row-expansion query.
SELECT
__key__.id AS id,
__key__.kind AS kind,
-- TODO(b/20828509): make this robust to multi-part TLDs.
LAST(SPLIT(targetId, '.')) AS tld,
eventTime,
targetId,
-- TODO(b/20828509): see if this can be expressed more cleanly.
REGEXP_EXTRACT(__key__.path, '"DomainBase", "([^"]+)"') AS domainRepoId,
clientId,
reason,
recurrenceEndTime,
-- The number of recurrences we generate is one per year for every
-- year from the event time (first recurrence) to the earlier of
-- either the current time or the end time, inclusive. This may
-- generate an extra recurrence at the end (if the recurrence end
-- time is earlier in the year than the recurrence) but we filter
-- it out below.
GREATEST(
0, -- Bound below by zero.
LEAST(YEAR(CURRENT_TIMESTAMP()), YEAR(recurrenceEndTime))
- YEAR(eventTime) + 1
) AS numRecurrenceYears,
-- Note: there has never been a Recurring event with a flag to date,
-- so the flags column does not exist for the Recurring table. If
-- we ever want to use real Recurring flags, this line would look
-- like "GROUP_CONCAT_UNQUOTED(flags) WITHIN RECORD AS flags".
-- Create a synthetic AUTO_RENEW flag to differentiate this record
-- from a manual renewal.
'AUTO_RENEW' AS flags,
FROM
[%SOURCE_DATASET%.Recurring]
WHERE
-- Filter out Registry 1.0 data - TODO(b/20828509): remove this.
__key__.namespace = ''
)
WHERE
-- Filter out prober data.
tld IN
(SELECT tld FROM [%DEST_DATASET%.RegistryData] WHERE type = 'REAL')
-- Exclude recurring events that would generate zero recurrences,
-- which are those with an event time beyond the current year.
AND numRecurrenceYears > 0
),
-- Second argument to FLATTEN(), explained above.
yearsToAdd)
) AS recurrence
-- Join recurrence information by TLD to policy information for that TLD that
-- determines recurrence properties - grace period length and renewal cost.
LEFT OUTER JOIN (
SELECT
tld,
-- TODO(b/20764952): If we ever want to have more than one renew billing
-- cost, the logic here will need to pick the right cost based on the
-- eventTime value of the recurrence.
renewBillingCost AS renewCost,
autorenewGracePeriodSeconds,
FROM
[%DEST_DATASET%.RegistryData]
) AS registry
ON
recurrence.tld = registry.tld
-- Join recurrence information by SLD to premium pricing information for that
-- SLD, taken from the currently active premium list. Note that this doesn't
-- account in any way for a premium list that changes over time; we always use
-- the current list, regardless of when the recurrence is happening.
-- TODO(b/21445712): Make this reflect the pricing at the recurrence times.
LEFT JOIN EACH (
SELECT
domain,
price,
FROM
[%DEST_DATASET%.PremiumListData]
) AS premium
ON
recurrence.targetId = premium.domain
WHERE
-- Filter out generated recurrences where the event time of that recurrence
-- is at or after the end time of the recurring event (this handles the case
-- of generating an extra final recurrence, described above).
eventTime < recurrenceEndTime
-- Restrict this view to just recurrences that have "happened" by the time
-- the query is executed.
AND eventTime <= CURRENT_TIMESTAMP()
ORDER BY
-- Show the newest recurrences first, then break ties by ID and TLD, like
-- we do for BillingData.
billingTime DESC,
id,
tld

View file

@ -0,0 +1,17 @@
SELECT
STRING(timestamp) AS date,
command,
resourceType,
resource,
trid_clientTransactionId,
trid_serverTransactionId,
period
FROM
ReportingHistory
WHERE
tld = '%TLD%'
AND registrar = '%REGISTRAR%'
AND timestamp >= TIMESTAMP('%STARTTIME%')
AND timestamp < TIMESTAMP('%ENDTIME%')
ORDER BY
timestamp

View file

@ -0,0 +1,13 @@
-- Registrar Data View SQL
--
-- This query lists registrar IDs with billing IDs and allowed TLDs.
SELECT
__key__.name AS registrarId,
billingIdentifier AS billingId,
allowedTlds
FROM
[%SOURCE_DATASET%.Registrar]
-- TODO(b/19031620): Add filter to just include registrars with type=REAL.
-- Note: We can't ORDER BY registrarId here because ORDER/GROUP BY will
-- flatten results, and the allowedTlds field above is repeated.
-- TODO(b/19031339): Add "ORDER BY" if the BigQuery known issue is fixed.

View file

@ -0,0 +1,32 @@
-- Registry Data View SQL
--
-- This query lists registry fields necessary for billing.
--
-- TODO(b/20764952): extend this view to support timed transition properties.
-- TODO(b/18092292): add a column for whether the TLD is "billable" or not.
SELECT
tldStr AS tld,
tldType AS type,
-- This relies on the fact that we currently haven't ever used more than one
-- value with renewBillingCostTransitions.
-- TODO(b/20764952): fix this limitation.
FIRST(renewBillingCostTransitions.billingCost) WITHIN RECORD
AS renewBillingCost,
-- Grace period lengths are stored in the ISO 8601 duration format as a
-- duration in seconds, which produces a string of the form "PT####S".
INTEGER(REGEXP_EXTRACT(autoRenewGracePeriodLength, r'PT(.+)S'))
AS autorenewGracePeriodSeconds,
premiumList.path AS premiumListPath,
-- TODO(b/18265521): remove this column once the referenced bug is fixed;
-- this column is only included because without it BigQuery will complain
-- that we aren't consuming reservedLists (due to it having a repeated .path
-- child field, which overlaps with premiumList.path above).
GROUP_CONCAT_UNQUOTED(reservedLists.path, "/") WITHIN RECORD
AS reservedListPaths,
FROM
[%SOURCE_DATASET%.Registry]
WHERE
-- Filter out Registry 1.0 data - TODO(b/20828509): remove this.
__key__.namespace = ''
ORDER BY
tld