Refactor SMDRL + Claims CSV parsing (#1704)

This uses the Apache commons CSV parsing instead of rolling our own.

Annoyingly, the results that we're given aren't exactly proper CSVs
since they have a non-standard line of data at the top, and the header
is actually the second line.
This commit is contained in:
gbrodman 2022-07-12 15:42:12 -04:00 committed by GitHub
parent 2a502261bb
commit d922e01f6b
6 changed files with 56 additions and 62 deletions

View file

@ -16,18 +16,21 @@ package google.registry.tmch;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import google.registry.model.tmch.ClaimsList;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.joda.time.DateTime;
/**
* Claims List (MarksDB DNL CSV) Parser.
*
* <p>This is a quick and dirty CSV parser made specifically for the DNL CSV format defined in the
* TMCH specification. It doesn't support any fancy CSV features like quotes.
*
* @see <a href="http://tools.ietf.org/html/draft-lozano-tmch-func-spec-08#section-6.1">
* TMCH functional specifications - DNL List file</a>
*/
@ -38,7 +41,7 @@ public class ClaimsListParser {
*
* <p>Please note that this does <b>not</b> insert the object into the DB.
*/
public static ClaimsList parse(List<String> lines) {
public static ClaimsList parse(List<String> lines) throws IOException {
ImmutableMap.Builder<String, String> builder = new ImmutableMap.Builder<>();
// First line: <version>,<DNL List creation datetime>
@ -51,26 +54,16 @@ public class ClaimsListParser {
checkArgument(version == 1, String.format(
"Line 1: Expected version 1, found %d", version));
// Second line contains headers: DNL,lookup-key,insertion-datetime
List<String> secondLine = Splitter.on(',').splitToList(lines.get(1));
checkArgument(secondLine.size() == 3, String.format(
"Line 2: Expected 3 elements, found %d", secondLine.size()));
checkArgument("DNL".equals(secondLine.get(0)), String.format(
"Line 2: Expected header \"DNL\", found \"%s\"", secondLine.get(0)));
checkArgument("lookup-key".equals(secondLine.get(1)), String.format(
"Line 2: Expected header \"lookup-key\", found \"%s\"", secondLine.get(1)));
checkArgument("insertion-datetime".equals(secondLine.get(2)), String.format(
"Line 2: Expected header \"insertion-datetime\", found \"%s\"", secondLine.get(2)));
// Subsequent lines: <DNL>,<lookup key>,<DNL insertion datetime>
for (int i = 2; i < lines.size(); i++) {
List<String> currentLine = Splitter.on(',').splitToList(lines.get(i));
checkArgument(currentLine.size() == 3, String.format(
"Line %d: Expected 3 elements, found %d", i + 1, currentLine.size()));
String label = currentLine.get(0);
String lookupKey = currentLine.get(1);
DateTime.parse(currentLine.get(2)); // This is the insertion time, currently unused.
// Note: we have to skip the first line because it contains the version metadata
CSVParser csv =
CSVFormat.Builder.create(CSVFormat.DEFAULT)
.setHeader()
.setSkipHeaderRecord(true)
.build()
.parse(new StringReader(Joiner.on('\n').join(lines.subList(1, lines.size()))));
for (CSVRecord record : csv) {
String label = record.get("DNL");
String lookupKey = record.get("lookup-key");
builder.put(label, lookupKey);
}

View file

@ -36,7 +36,6 @@ import java.security.GeneralSecurityException;
import java.security.Security;
import java.security.SignatureException;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import javax.annotation.Tainted;
import javax.inject.Inject;
@ -125,7 +124,8 @@ public final class Marksdb {
}
}
List<String> fetchSignedCsv(Optional<String> loginAndPassword, String csvPath, String sigPath)
ImmutableList<String> fetchSignedCsv(
Optional<String> loginAndPassword, String csvPath, String sigPath)
throws IOException, GeneralSecurityException, PGPException {
checkArgument(
loginAndPassword.isPresent(), "Cannot fetch from MarksDB without login credentials");

View file

@ -17,55 +17,52 @@ package google.registry.tmch;
import static com.google.common.base.Preconditions.checkArgument;
import static org.joda.time.DateTimeZone.UTC;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import google.registry.model.smd.SignedMarkRevocationList;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.joda.time.DateTime;
/**
* Signed Mark Data Revocation List (SMDRL) CSV Parser
*
* <p>This is a quick and dirty CSV parser made specifically for the SMDRL CSV format defined in
* the TMCH specification. It doesn't support any fancy CSV features like quotes.
*
* @see <a href="http://tools.ietf.org/html/draft-lozano-tmch-func-spec-08#section-6.2">
* TMCH functional specifications - SMD Revocation List</a>
*/
public final class SmdrlCsvParser {
/** Converts the lines from the DNL CSV file into a data structure. */
public static SignedMarkRevocationList parse(List<String> lines) {
public static SignedMarkRevocationList parse(List<String> lines) throws IOException {
ImmutableMap.Builder<String, DateTime> revokes = new ImmutableMap.Builder<>();
// First line: <version>,<SMD Revocation List creation datetime>
List<String> firstLine = Splitter.on(',').splitToList(lines.get(0));
checkArgument(firstLine.size() == 2, String.format(
"Line 1: Expected 2 elements, found %d", firstLine.size()));
checkArgument(
firstLine.size() == 2,
String.format("Line 1: Expected 2 elements, found %d", firstLine.size()));
int version = Integer.parseInt(firstLine.get(0));
checkArgument(version == 1, String.format(
"Line 1: Expected version 1, found %d", version));
checkArgument(version == 1, String.format("Line 1: Expected version 1, found %d", version));
DateTime creationTime = DateTime.parse(firstLine.get(1)).withZone(UTC);
// Second line contains headers: smd-id,insertion-datetime
List<String> secondLine = Splitter.on(',').splitToList(lines.get(1));
checkArgument(secondLine.size() == 2, String.format(
"Line 2: Expected 2 elements, found %d", secondLine.size()));
checkArgument("smd-id".equals(secondLine.get(0)), String.format(
"Line 2: Expected header \"smd-id\", found \"%s\"", secondLine.get(0)));
checkArgument("insertion-datetime".equals(secondLine.get(1)), String.format(
"Line 2: Expected header \"insertion-datetime\", found \"%s\"", secondLine.get(1)));
// Note: we have to skip the first line because it contains the version metadata
CSVParser csv =
CSVFormat.Builder.create(CSVFormat.DEFAULT)
.setHeader()
.setSkipHeaderRecord(true)
.build()
.parse(new StringReader(Joiner.on('\n').join(lines.subList(1, lines.size()))));
// Subsequent lines: <smd-id>,<revoked SMD datetime>
for (int i = 2; i < lines.size(); i++) {
List<String> currentLine = Splitter.on(',').splitToList(lines.get(i));
checkArgument(currentLine.size() == 2, String.format(
"Line %d: Expected 2 elements, found %d", i + 1, currentLine.size()));
String smdId = currentLine.get(0);
DateTime revokedTime = DateTime.parse(currentLine.get(1));
for (CSVRecord record : csv) {
String smdId = record.get("smd-id");
DateTime revokedTime = DateTime.parse(record.get("insertion-datetime"));
revokes.put(smdId, revokedTime);
}
return SignedMarkRevocationList.create(creationTime, revokes.build());
}
}

View file

@ -16,6 +16,7 @@ package google.registry.tmch;
import static google.registry.request.Action.Method.POST;
import com.google.common.collect.ImmutableList;
import com.google.common.flogger.FluentLogger;
import google.registry.keyring.api.KeyModule.Key;
import google.registry.model.tmch.ClaimsList;
@ -24,7 +25,6 @@ import google.registry.request.Action;
import google.registry.request.auth.Auth;
import java.io.IOException;
import java.security.GeneralSecurityException;
import java.util.List;
import java.util.Optional;
import javax.inject.Inject;
import org.bouncycastle.openpgp.PGPException;
@ -49,13 +49,14 @@ public final class TmchDnlAction implements Runnable {
/** Synchronously fetches latest domain name list and saves it to Cloud SQL. */
@Override
public void run() {
List<String> lines;
ClaimsList claims;
try {
lines = marksdb.fetchSignedCsv(marksdbDnlLoginAndPassword, DNL_CSV_PATH, DNL_SIG_PATH);
ImmutableList<String> lines =
marksdb.fetchSignedCsv(marksdbDnlLoginAndPassword, DNL_CSV_PATH, DNL_SIG_PATH);
claims = ClaimsListParser.parse(lines);
} catch (GeneralSecurityException | IOException | PGPException e) {
throw new RuntimeException(e);
}
ClaimsList claims = ClaimsListParser.parse(lines);
ClaimsListDao.save(claims);
logger.atInfo().log(
"Inserted %,d claims into the DB(s), created at %s.",

View file

@ -16,6 +16,7 @@ package google.registry.tmch;
import static google.registry.request.Action.Method.POST;
import com.google.common.collect.ImmutableList;
import com.google.common.flogger.FluentLogger;
import google.registry.keyring.api.KeyModule.Key;
import google.registry.model.smd.SignedMarkRevocationList;
@ -23,7 +24,6 @@ import google.registry.request.Action;
import google.registry.request.auth.Auth;
import java.io.IOException;
import java.security.GeneralSecurityException;
import java.util.List;
import java.util.Optional;
import javax.inject.Inject;
import org.bouncycastle.openpgp.PGPException;
@ -48,13 +48,14 @@ public final class TmchSmdrlAction implements Runnable {
/** Synchronously fetches latest signed mark revocation list and saves it to the database. */
@Override
public void run() {
List<String> lines;
SignedMarkRevocationList smdrl;
try {
lines = marksdb.fetchSignedCsv(marksdbSmdrlLoginAndPassword, SMDRL_CSV_PATH, SMDRL_SIG_PATH);
ImmutableList<String> lines =
marksdb.fetchSignedCsv(marksdbSmdrlLoginAndPassword, SMDRL_CSV_PATH, SMDRL_SIG_PATH);
smdrl = SmdrlCsvParser.parse(lines);
} catch (GeneralSecurityException | IOException | PGPException e) {
throw new RuntimeException(e);
}
SignedMarkRevocationList smdrl = SmdrlCsvParser.parse(lines);
smdrl.save();
logger.atInfo().log(
"Inserted %,d smd revocations into the database, created at %s.",

View file

@ -79,7 +79,7 @@ class SmdrlCsvParserTest {
}
@Test
void testOneRow() {
void testOneRow() throws Exception {
SignedMarkRevocationList smdrl =
SmdrlCsvParser.parse(
ImmutableList.of(
@ -93,7 +93,7 @@ class SmdrlCsvParserTest {
}
@Test
void testEmpty() {
void testEmpty() throws Exception {
SignedMarkRevocationList smdrl =
SmdrlCsvParser.parse(
ImmutableList.of("1,2014-11-24T23:30:04.3Z", "smd-id,insertion-datetime"));
@ -128,7 +128,9 @@ class SmdrlCsvParserTest {
"1,2013-11-24T23:30:04.3Z",
"lol,cat",
"0000001681375789102250-65535,2013-08-09T12:00:00.0Z")));
assertThat(thrown).hasMessageThat().contains("header");
assertThat(thrown)
.hasMessageThat()
.isEqualTo("Mapping for smd-id not found, expected one of [lol, cat]");
}
@Test
@ -142,6 +144,6 @@ class SmdrlCsvParserTest {
"1,2013-11-24T23:30:04.3Z",
"smd-id,insertion-datetime",
"0000001681375789102250-65535,haha,2013-08-09T12:00:00.0Z")));
assertThat(thrown).hasMessageThat().contains("elements");
assertThat(thrown).hasMessageThat().isEqualTo("Invalid format: \"haha\"");
}
}