Update input/output of Spec11 pipeline to final format

This changes the BigQuery input to the fields we ultimately want (fqdn,
registrarName, registrarEmailAddress) and the output to a structured POJO
holding the results from the API. This POJO is then converted to its final text output, i.e.:

Map from registrar e-mail to list of threat-detected subdomains:
{"registrarEmail": "c@fake.com", "threats": [{"url": "a.com", "threatType": "MALWARE"}]}
{"registrarEmail": "d@fake.com", "threats": [{"url": "x.com", "threatType": "MALWARE"}, {"url": "y.com", "threatType": "MALWARE"}]}

This gives us all the data we want in a JSON structured format, to be acted upon downstream by the to-be-constructed PublishSpec11ReportAction. Ideally, we would send an e-mail directly from the beam pipeline, but this is only possible through third-party providers (as opposed to app engine itself).

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=209416880
This commit is contained in:
larryruili 2018-08-20 07:54:31 -07:00 committed by jianglai
parent 7dcadaecf6
commit f7bc17fbe8
11 changed files with 393 additions and 130 deletions

View file

@ -0,0 +1,72 @@
// Copyright 2018 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.beam.spec11;
import com.google.auto.value.AutoValue;
import java.io.Serializable;
import org.json.JSONException;
import org.json.JSONObject;
/** A POJO representing a threat match response from the {@code SafeBrowsing API}. */
@AutoValue
public abstract class ThreatMatch implements Serializable {
private static final String THREAT_TYPE_FIELD = "threatType";
private static final String PLATFORM_TYPE_FIELD = "platformType";
private static final String METADATA_FIELD = "threatEntryMetadata";
/** Returns what kind of threat it is (malware, phishing etc.) */
abstract String threatType();
/** Returns what platforms it affects (Windows, Linux etc.) */
abstract String platformType();
/**
* Returns a String representing a JSON Object containing arbitrary metadata associated with this
* threat, or "NONE" if there is no metadata to retrieve.
*
* <p>This ideally would be a {@link JSONObject} type, but can't be due to serialization
* requirements.
*/
abstract String metadata();
/** Returns the fully qualified domain name [SLD].[TLD] of the matched threat. */
abstract String fullyQualifiedDomainName();
/**
* Constructs a {@link ThreatMatch} by parsing a {@code SafeBrowsing API} response {@link
* JSONObject}.
*
* @throws JSONException when encountering parse errors in the response format
*/
static ThreatMatch create(JSONObject threatMatchJSON, String fullyQualifiedDomainName)
throws JSONException {
return new AutoValue_ThreatMatch(
threatMatchJSON.getString(THREAT_TYPE_FIELD),
threatMatchJSON.getString(PLATFORM_TYPE_FIELD),
threatMatchJSON.has(METADATA_FIELD)
? threatMatchJSON.getJSONObject(METADATA_FIELD).toString()
: "NONE",
fullyQualifiedDomainName);
}
/** Returns a {@link String} containing the simplest details about this threat. */
String getSimpleDetails() {
return String.format("%s;%s", this.fullyQualifiedDomainName(), this.threatType());
}
/** Returns a {@link JSONObject} representing a subset of this object's data. */
JSONObject toJSON() throws JSONException {
return new JSONObject()
.put("fullyQualifiedDomainName", fullyQualifiedDomainName())
.put("threatType", threatType());
}
}