Refactor SMDRL + Claims CSV parsing (#1704)

This uses the Apache commons CSV parsing instead of rolling our own.

Annoyingly, the results that we're given aren't exactly proper CSVs
since they have a non-standard line of data at the top, and the header
is actually the second line.
This commit is contained in:
gbrodman 2022-07-12 15:42:12 -04:00 committed by GitHub
parent 2a502261bb
commit d922e01f6b
6 changed files with 56 additions and 62 deletions

View file

@ -16,18 +16,21 @@ package google.registry.tmch;
import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import google.registry.model.tmch.ClaimsList; import google.registry.model.tmch.ClaimsList;
import java.io.IOException;
import java.io.StringReader;
import java.util.List; import java.util.List;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.joda.time.DateTime; import org.joda.time.DateTime;
/** /**
* Claims List (MarksDB DNL CSV) Parser. * Claims List (MarksDB DNL CSV) Parser.
* *
* <p>This is a quick and dirty CSV parser made specifically for the DNL CSV format defined in the
* TMCH specification. It doesn't support any fancy CSV features like quotes.
*
* @see <a href="http://tools.ietf.org/html/draft-lozano-tmch-func-spec-08#section-6.1"> * @see <a href="http://tools.ietf.org/html/draft-lozano-tmch-func-spec-08#section-6.1">
* TMCH functional specifications - DNL List file</a> * TMCH functional specifications - DNL List file</a>
*/ */
@ -38,7 +41,7 @@ public class ClaimsListParser {
* *
* <p>Please note that this does <b>not</b> insert the object into the DB. * <p>Please note that this does <b>not</b> insert the object into the DB.
*/ */
public static ClaimsList parse(List<String> lines) { public static ClaimsList parse(List<String> lines) throws IOException {
ImmutableMap.Builder<String, String> builder = new ImmutableMap.Builder<>(); ImmutableMap.Builder<String, String> builder = new ImmutableMap.Builder<>();
// First line: <version>,<DNL List creation datetime> // First line: <version>,<DNL List creation datetime>
@ -51,26 +54,16 @@ public class ClaimsListParser {
checkArgument(version == 1, String.format( checkArgument(version == 1, String.format(
"Line 1: Expected version 1, found %d", version)); "Line 1: Expected version 1, found %d", version));
// Second line contains headers: DNL,lookup-key,insertion-datetime // Note: we have to skip the first line because it contains the version metadata
List<String> secondLine = Splitter.on(',').splitToList(lines.get(1)); CSVParser csv =
checkArgument(secondLine.size() == 3, String.format( CSVFormat.Builder.create(CSVFormat.DEFAULT)
"Line 2: Expected 3 elements, found %d", secondLine.size())); .setHeader()
checkArgument("DNL".equals(secondLine.get(0)), String.format( .setSkipHeaderRecord(true)
"Line 2: Expected header \"DNL\", found \"%s\"", secondLine.get(0))); .build()
checkArgument("lookup-key".equals(secondLine.get(1)), String.format( .parse(new StringReader(Joiner.on('\n').join(lines.subList(1, lines.size()))));
"Line 2: Expected header \"lookup-key\", found \"%s\"", secondLine.get(1))); for (CSVRecord record : csv) {
checkArgument("insertion-datetime".equals(secondLine.get(2)), String.format( String label = record.get("DNL");
"Line 2: Expected header \"insertion-datetime\", found \"%s\"", secondLine.get(2))); String lookupKey = record.get("lookup-key");
// Subsequent lines: <DNL>,<lookup key>,<DNL insertion datetime>
for (int i = 2; i < lines.size(); i++) {
List<String> currentLine = Splitter.on(',').splitToList(lines.get(i));
checkArgument(currentLine.size() == 3, String.format(
"Line %d: Expected 3 elements, found %d", i + 1, currentLine.size()));
String label = currentLine.get(0);
String lookupKey = currentLine.get(1);
DateTime.parse(currentLine.get(2)); // This is the insertion time, currently unused.
builder.put(label, lookupKey); builder.put(label, lookupKey);
} }

View file

@ -36,7 +36,6 @@ import java.security.GeneralSecurityException;
import java.security.Security; import java.security.Security;
import java.security.SignatureException; import java.security.SignatureException;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
import java.util.Optional; import java.util.Optional;
import javax.annotation.Tainted; import javax.annotation.Tainted;
import javax.inject.Inject; import javax.inject.Inject;
@ -125,7 +124,8 @@ public final class Marksdb {
} }
} }
List<String> fetchSignedCsv(Optional<String> loginAndPassword, String csvPath, String sigPath) ImmutableList<String> fetchSignedCsv(
Optional<String> loginAndPassword, String csvPath, String sigPath)
throws IOException, GeneralSecurityException, PGPException { throws IOException, GeneralSecurityException, PGPException {
checkArgument( checkArgument(
loginAndPassword.isPresent(), "Cannot fetch from MarksDB without login credentials"); loginAndPassword.isPresent(), "Cannot fetch from MarksDB without login credentials");

View file

@ -17,55 +17,52 @@ package google.registry.tmch;
import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkArgument;
import static org.joda.time.DateTimeZone.UTC; import static org.joda.time.DateTimeZone.UTC;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import google.registry.model.smd.SignedMarkRevocationList; import google.registry.model.smd.SignedMarkRevocationList;
import java.io.IOException;
import java.io.StringReader;
import java.util.List; import java.util.List;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.joda.time.DateTime; import org.joda.time.DateTime;
/** /**
* Signed Mark Data Revocation List (SMDRL) CSV Parser * Signed Mark Data Revocation List (SMDRL) CSV Parser
* *
* <p>This is a quick and dirty CSV parser made specifically for the SMDRL CSV format defined in
* the TMCH specification. It doesn't support any fancy CSV features like quotes.
*
* @see <a href="http://tools.ietf.org/html/draft-lozano-tmch-func-spec-08#section-6.2"> * @see <a href="http://tools.ietf.org/html/draft-lozano-tmch-func-spec-08#section-6.2">
* TMCH functional specifications - SMD Revocation List</a> * TMCH functional specifications - SMD Revocation List</a>
*/ */
public final class SmdrlCsvParser { public final class SmdrlCsvParser {
/** Converts the lines from the DNL CSV file into a data structure. */ /** Converts the lines from the DNL CSV file into a data structure. */
public static SignedMarkRevocationList parse(List<String> lines) { public static SignedMarkRevocationList parse(List<String> lines) throws IOException {
ImmutableMap.Builder<String, DateTime> revokes = new ImmutableMap.Builder<>(); ImmutableMap.Builder<String, DateTime> revokes = new ImmutableMap.Builder<>();
// First line: <version>,<SMD Revocation List creation datetime> // First line: <version>,<SMD Revocation List creation datetime>
List<String> firstLine = Splitter.on(',').splitToList(lines.get(0)); List<String> firstLine = Splitter.on(',').splitToList(lines.get(0));
checkArgument(firstLine.size() == 2, String.format( checkArgument(
"Line 1: Expected 2 elements, found %d", firstLine.size())); firstLine.size() == 2,
String.format("Line 1: Expected 2 elements, found %d", firstLine.size()));
int version = Integer.parseInt(firstLine.get(0)); int version = Integer.parseInt(firstLine.get(0));
checkArgument(version == 1, String.format( checkArgument(version == 1, String.format("Line 1: Expected version 1, found %d", version));
"Line 1: Expected version 1, found %d", version));
DateTime creationTime = DateTime.parse(firstLine.get(1)).withZone(UTC); DateTime creationTime = DateTime.parse(firstLine.get(1)).withZone(UTC);
// Second line contains headers: smd-id,insertion-datetime // Note: we have to skip the first line because it contains the version metadata
List<String> secondLine = Splitter.on(',').splitToList(lines.get(1)); CSVParser csv =
checkArgument(secondLine.size() == 2, String.format( CSVFormat.Builder.create(CSVFormat.DEFAULT)
"Line 2: Expected 2 elements, found %d", secondLine.size())); .setHeader()
checkArgument("smd-id".equals(secondLine.get(0)), String.format( .setSkipHeaderRecord(true)
"Line 2: Expected header \"smd-id\", found \"%s\"", secondLine.get(0))); .build()
checkArgument("insertion-datetime".equals(secondLine.get(1)), String.format( .parse(new StringReader(Joiner.on('\n').join(lines.subList(1, lines.size()))));
"Line 2: Expected header \"insertion-datetime\", found \"%s\"", secondLine.get(1)));
// Subsequent lines: <smd-id>,<revoked SMD datetime> for (CSVRecord record : csv) {
for (int i = 2; i < lines.size(); i++) { String smdId = record.get("smd-id");
List<String> currentLine = Splitter.on(',').splitToList(lines.get(i)); DateTime revokedTime = DateTime.parse(record.get("insertion-datetime"));
checkArgument(currentLine.size() == 2, String.format(
"Line %d: Expected 2 elements, found %d", i + 1, currentLine.size()));
String smdId = currentLine.get(0);
DateTime revokedTime = DateTime.parse(currentLine.get(1));
revokes.put(smdId, revokedTime); revokes.put(smdId, revokedTime);
} }
return SignedMarkRevocationList.create(creationTime, revokes.build()); return SignedMarkRevocationList.create(creationTime, revokes.build());
} }
} }

View file

@ -16,6 +16,7 @@ package google.registry.tmch;
import static google.registry.request.Action.Method.POST; import static google.registry.request.Action.Method.POST;
import com.google.common.collect.ImmutableList;
import com.google.common.flogger.FluentLogger; import com.google.common.flogger.FluentLogger;
import google.registry.keyring.api.KeyModule.Key; import google.registry.keyring.api.KeyModule.Key;
import google.registry.model.tmch.ClaimsList; import google.registry.model.tmch.ClaimsList;
@ -24,7 +25,6 @@ import google.registry.request.Action;
import google.registry.request.auth.Auth; import google.registry.request.auth.Auth;
import java.io.IOException; import java.io.IOException;
import java.security.GeneralSecurityException; import java.security.GeneralSecurityException;
import java.util.List;
import java.util.Optional; import java.util.Optional;
import javax.inject.Inject; import javax.inject.Inject;
import org.bouncycastle.openpgp.PGPException; import org.bouncycastle.openpgp.PGPException;
@ -49,13 +49,14 @@ public final class TmchDnlAction implements Runnable {
/** Synchronously fetches latest domain name list and saves it to Cloud SQL. */ /** Synchronously fetches latest domain name list and saves it to Cloud SQL. */
@Override @Override
public void run() { public void run() {
List<String> lines; ClaimsList claims;
try { try {
lines = marksdb.fetchSignedCsv(marksdbDnlLoginAndPassword, DNL_CSV_PATH, DNL_SIG_PATH); ImmutableList<String> lines =
marksdb.fetchSignedCsv(marksdbDnlLoginAndPassword, DNL_CSV_PATH, DNL_SIG_PATH);
claims = ClaimsListParser.parse(lines);
} catch (GeneralSecurityException | IOException | PGPException e) { } catch (GeneralSecurityException | IOException | PGPException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
ClaimsList claims = ClaimsListParser.parse(lines);
ClaimsListDao.save(claims); ClaimsListDao.save(claims);
logger.atInfo().log( logger.atInfo().log(
"Inserted %,d claims into the DB(s), created at %s.", "Inserted %,d claims into the DB(s), created at %s.",

View file

@ -16,6 +16,7 @@ package google.registry.tmch;
import static google.registry.request.Action.Method.POST; import static google.registry.request.Action.Method.POST;
import com.google.common.collect.ImmutableList;
import com.google.common.flogger.FluentLogger; import com.google.common.flogger.FluentLogger;
import google.registry.keyring.api.KeyModule.Key; import google.registry.keyring.api.KeyModule.Key;
import google.registry.model.smd.SignedMarkRevocationList; import google.registry.model.smd.SignedMarkRevocationList;
@ -23,7 +24,6 @@ import google.registry.request.Action;
import google.registry.request.auth.Auth; import google.registry.request.auth.Auth;
import java.io.IOException; import java.io.IOException;
import java.security.GeneralSecurityException; import java.security.GeneralSecurityException;
import java.util.List;
import java.util.Optional; import java.util.Optional;
import javax.inject.Inject; import javax.inject.Inject;
import org.bouncycastle.openpgp.PGPException; import org.bouncycastle.openpgp.PGPException;
@ -48,13 +48,14 @@ public final class TmchSmdrlAction implements Runnable {
/** Synchronously fetches latest signed mark revocation list and saves it to the database. */ /** Synchronously fetches latest signed mark revocation list and saves it to the database. */
@Override @Override
public void run() { public void run() {
List<String> lines; SignedMarkRevocationList smdrl;
try { try {
lines = marksdb.fetchSignedCsv(marksdbSmdrlLoginAndPassword, SMDRL_CSV_PATH, SMDRL_SIG_PATH); ImmutableList<String> lines =
marksdb.fetchSignedCsv(marksdbSmdrlLoginAndPassword, SMDRL_CSV_PATH, SMDRL_SIG_PATH);
smdrl = SmdrlCsvParser.parse(lines);
} catch (GeneralSecurityException | IOException | PGPException e) { } catch (GeneralSecurityException | IOException | PGPException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
SignedMarkRevocationList smdrl = SmdrlCsvParser.parse(lines);
smdrl.save(); smdrl.save();
logger.atInfo().log( logger.atInfo().log(
"Inserted %,d smd revocations into the database, created at %s.", "Inserted %,d smd revocations into the database, created at %s.",

View file

@ -79,7 +79,7 @@ class SmdrlCsvParserTest {
} }
@Test @Test
void testOneRow() { void testOneRow() throws Exception {
SignedMarkRevocationList smdrl = SignedMarkRevocationList smdrl =
SmdrlCsvParser.parse( SmdrlCsvParser.parse(
ImmutableList.of( ImmutableList.of(
@ -93,7 +93,7 @@ class SmdrlCsvParserTest {
} }
@Test @Test
void testEmpty() { void testEmpty() throws Exception {
SignedMarkRevocationList smdrl = SignedMarkRevocationList smdrl =
SmdrlCsvParser.parse( SmdrlCsvParser.parse(
ImmutableList.of("1,2014-11-24T23:30:04.3Z", "smd-id,insertion-datetime")); ImmutableList.of("1,2014-11-24T23:30:04.3Z", "smd-id,insertion-datetime"));
@ -128,7 +128,9 @@ class SmdrlCsvParserTest {
"1,2013-11-24T23:30:04.3Z", "1,2013-11-24T23:30:04.3Z",
"lol,cat", "lol,cat",
"0000001681375789102250-65535,2013-08-09T12:00:00.0Z"))); "0000001681375789102250-65535,2013-08-09T12:00:00.0Z")));
assertThat(thrown).hasMessageThat().contains("header"); assertThat(thrown)
.hasMessageThat()
.isEqualTo("Mapping for smd-id not found, expected one of [lol, cat]");
} }
@Test @Test
@ -142,6 +144,6 @@ class SmdrlCsvParserTest {
"1,2013-11-24T23:30:04.3Z", "1,2013-11-24T23:30:04.3Z",
"smd-id,insertion-datetime", "smd-id,insertion-datetime",
"0000001681375789102250-65535,haha,2013-08-09T12:00:00.0Z"))); "0000001681375789102250-65535,haha,2013-08-09T12:00:00.0Z")));
assertThat(thrown).hasMessageThat().contains("elements"); assertThat(thrown).hasMessageThat().isEqualTo("Invalid format: \"haha\"");
} }
} }