mirror of
https://github.com/google/nomulus.git
synced 2025-05-16 17:37:13 +02:00
Update input/output of Spec11 pipeline to final format
This changes the BigQuery input to the fields we ultimately want (fqdn, registrarName, registrarEmailAddress) and the output to a structured POJO holding the results from the API. This POJO is then converted to its final text output, i.e.: Map from registrar e-mail to list of threat-detected subdomains: {"registrarEmail": "c@fake.com", "threats": [{"url": "a.com", "threatType": "MALWARE"}]} {"registrarEmail": "d@fake.com", "threats": [{"url": "x.com", "threatType": "MALWARE"}, {"url": "y.com", "threatType": "MALWARE"}]} This gives us all the data we want in a JSON structured format, to be acted upon downstream by the to-be-constructed PublishSpec11ReportAction. Ideally, we would send an e-mail directly from the beam pipeline, but this is only possible through third-party providers (as opposed to app engine itself). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=209416880
This commit is contained in:
parent
7dcadaecf6
commit
f7bc17fbe8
11 changed files with 393 additions and 130 deletions
49
java/google/registry/beam/spec11/sql/subdomains.sql
Normal file
49
java/google/registry/beam/spec11/sql/subdomains.sql
Normal file
|
@ -0,0 +1,49 @@
|
|||
#standardSQL
|
||||
-- Copyright 2018 The Nomulus Authors. All Rights Reserved.
|
||||
--
|
||||
-- Licensed under the Apache License, Version 2.0 (the "License");
|
||||
-- you may not use this file except in compliance with the License.
|
||||
-- You may obtain a copy of the License at
|
||||
--
|
||||
-- http://www.apache.org/licenses/LICENSE-2.0
|
||||
--
|
||||
-- Unless required by applicable law or agreed to in writing, software
|
||||
-- distributed under the License is distributed on an "AS IS" BASIS,
|
||||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
-- See the License for the specific language governing permissions and
|
||||
-- limitations under the License.
|
||||
|
||||
-- This query gathers all Subdomains active within a given yearMonth
|
||||
-- and emits a row containing its fully qualified domain name
|
||||
-- [SLD].[TLD], the current registrar's name, and the current registrar's
|
||||
-- email address.
|
||||
|
||||
SELECT
|
||||
domain.fullyQualifiedDomainName AS fullyQualifiedDomainName,
|
||||
registrar.name AS registrarName,
|
||||
registrar.emailAddress AS registrarEmailAddress
|
||||
FROM ( (
|
||||
SELECT
|
||||
fullyQualifiedDomainName,
|
||||
currentSponsorClientId,
|
||||
creationTime
|
||||
FROM
|
||||
`%PROJECT_ID%.%DATASTORE_EXPORT_DATASET%.%DOMAIN_BASE_TABLE%`
|
||||
WHERE
|
||||
-- Only include active registrations
|
||||
-- Registrations that are active (not deleted) will have null deletionTime
|
||||
-- because END_OF_TIME is an invalid timestamp in standardSQL
|
||||
(SAFE_CAST(deletionTime AS STRING) IS NULL
|
||||
OR deletionTime > CURRENT_TIMESTAMP)) AS domain
|
||||
JOIN (
|
||||
SELECT
|
||||
__key__.name AS name,
|
||||
emailAddress
|
||||
FROM
|
||||
`%PROJECT_ID%.%DATASTORE_EXPORT_DATASET%.%REGISTRAR_TABLE%`
|
||||
WHERE
|
||||
type = 'REAL') AS registrar
|
||||
ON
|
||||
domain.currentSponsorClientId = registrar.name)
|
||||
ORDER BY
|
||||
creationTime DESC
|
Loading…
Add table
Add a link
Reference in a new issue