diff --git a/java/google/registry/tools/LevelDbLogReader.java b/java/google/registry/tools/LevelDbLogReader.java new file mode 100644 index 000000000..6562acdbc --- /dev/null +++ b/java/google/registry/tools/LevelDbLogReader.java @@ -0,0 +1,173 @@ +// Copyright 2017 The Nomulus Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package google.registry.tools; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; + +/** + * Reads records from a set of LevelDB files and builds a gigantic ImmutableList from them. + * + *

See log_format.md for the + * leveldb log format specification. + * + *

There are several other implementations of this, none of which appeared suitable for our use + * case: The original C++ implementation. + * com.google.appengine.api.files.RecordWriteChannel - Exactly what we need but deprecated. The + * referenced replacement: The App Engine GCS + * Client - Does not appear to have any support for working with LevelDB. + */ +public final class LevelDbLogReader { + + @VisibleForTesting static final int BLOCK_SIZE = 32 * 1024; + @VisibleForTesting static final int HEADER_SIZE = 7; + + private final ByteArrayOutputStream recordContents = new ByteArrayOutputStream(); + private final ImmutableList.Builder recordListBuilder = new ImmutableList.Builder<>(); + + /** Read a complete block, which must be exactly 32 KB. */ + private void processBlock(byte[] block) { + // Read records from the block until there is no longer enough space for a record (i.e. until + // we're at HEADER_SIZE - 1 bytes from the end of the block). + int i = 0; + while (i < BLOCK_SIZE - (HEADER_SIZE - 1)) { + RecordHeader recordHeader = readRecordHeader(block, i); + if (recordHeader.type == ChunkType.END) { + // A type of zero indicates that we've reached the padding zeroes at the end of the block. + break; + } + + // Copy the contents of the record into recordContents. + recordContents.write(block, i + HEADER_SIZE, recordHeader.size); + + // If this is the last (or only) chunk in the record, store the full contents into the List. + if (recordHeader.type == ChunkType.FULL || recordHeader.type == ChunkType.LAST) { + recordListBuilder.add(recordContents.toByteArray()); + recordContents.reset(); + } + + i += recordHeader.size + HEADER_SIZE; + } + } + + /** + * Gets a byte from "block" as an unsigned value. + * + *

Java bytes are signed, which doesn't work very well for our bit-shifting operations. + */ + private int getUnsignedByte(byte[] block, int pos) { + return block[pos] & 0xFF; + } + + /** Reads the 7 byte record header. */ + private RecordHeader readRecordHeader(byte[] block, int pos) { + // Read checksum (4 bytes, LE). + int checksum = + getUnsignedByte(block, pos) + | (getUnsignedByte(block, pos + 1) << 8) + | (getUnsignedByte(block, pos + 2) << 16) + | (getUnsignedByte(block, pos + 3) << 24); + // Read size (2 bytes, LE). + int size = getUnsignedByte(block, pos + 4) | (getUnsignedByte(block, pos + 5) << 8); + // Read type (1 byte). + int type = getUnsignedByte(block, pos + 6); + + return new RecordHeader(checksum, size, ChunkType.fromCode(type)); + } + + /** Reads all records in the Reader into the record set. */ + public void readFrom(InputStream source) throws IOException { + byte[] block = new byte[BLOCK_SIZE]; + + // read until we have no more. + while (true) { + int amountRead = source.read(block, 0, BLOCK_SIZE); + if (amountRead <= 0) { + break; + } + assert amountRead == BLOCK_SIZE; + + processBlock(block); + } + } + + /** Reads all records from the file specified by "path" into the record set. */ + public void readFrom(Path path) throws IOException { + readFrom(Files.newInputStream(path)); + } + + /** Reads all records from the specified file into the record set. */ + public void readFrom(String filename) throws IOException { + readFrom(FileSystems.getDefault().getPath(filename)); + } + + /** + * Gets the list of records constructed so far. + * + *

Note that this does not invalidate the internal state of the object: we return a copy and + * this can be called multiple times. + */ + ImmutableList getRecords() { + return recordListBuilder.build(); + } + + /** Aggregates the fields in a record header. */ + private static final class RecordHeader { + final int checksum; + final int size; + final ChunkType type; + + public RecordHeader(int checksum, int size, ChunkType type) { + this.checksum = checksum; + this.size = size; + this.type = type; + } + } + + @VisibleForTesting + enum ChunkType { + // Warning: these values must map to their array indices. If this relationship is broken, + // you'll need to change fromCode() to not simply index into values(). + END(0), + FULL(1), + FIRST(2), + MIDDLE(3), + LAST(4); + + private final int code; + + ChunkType(int code) { + this.code = code; + } + + int getCode() { + return code; + } + + /** Construct a record type from the numeric record type code. */ + static ChunkType fromCode(int code) { + return values()[code]; + } + } +} diff --git a/javatests/google/registry/tools/LevelDbLogReaderTest.java b/javatests/google/registry/tools/LevelDbLogReaderTest.java new file mode 100644 index 000000000..606c0a765 --- /dev/null +++ b/javatests/google/registry/tools/LevelDbLogReaderTest.java @@ -0,0 +1,164 @@ +// Copyright 2017 The Nomulus Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package google.registry.tools; + +import static com.google.common.truth.Truth.assertThat; +import static google.registry.tools.LevelDbLogReader.ChunkType; + +import com.google.common.collect.ImmutableList; +import com.google.common.primitives.Bytes; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.List; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** LevelDbLogReader tests. */ +@RunWith(JUnit4.class) +public final class LevelDbLogReaderTest { + + private static final int MAX_RECORD = LevelDbLogReader.BLOCK_SIZE - LevelDbLogReader.HEADER_SIZE; + + // Size of the test record. Any value < 256 will do. + private static final int TEST_RECORD_SIZE = 231; + + // The maximum offset at which a test record can be inserted, measured in bytes from the beginning + // of the block. + private static final int MAX_TEST_RECORD_OFFSET = + LevelDbLogReader.BLOCK_SIZE - (LevelDbLogReader.HEADER_SIZE + TEST_RECORD_SIZE); + + /** + * Adds a record of bytes of 'val' of the given size to bytes. + * + *

This currently doesn't write a real checksum since we're not doing anything with that in the + * leveldb reader. + * + *

Returns the new offset for the next block. + */ + private static int addRecord( + byte[] bytes, int pos, ChunkType type, int size, int val) { + + // Write a bogus checksum. + for (int i = 0; i < 4; ++i) { + bytes[pos++] = -1; + } + + // Write size and type. + bytes[pos++] = (byte) size; + bytes[pos++] = (byte) (size >> 8); + bytes[pos++] = (byte) type.getCode(); + + // Write "size" bytes of data. + for (int i = 0; i < size; ++i) { + bytes[pos + i] = (byte) val; + + // Swap the least significant bytes in val so we can have more than 256 different same-sized + // records. + val = (val >> 8) | ((val & 0xff) << 8); + } + + return pos + size; + } + + private TestBlock makeBlockOfRepeatingBytes(int startVal) { + byte[] block = new byte[LevelDbLogReader.BLOCK_SIZE]; + int pos = 0; + int recordCount = 0; + while (pos < MAX_TEST_RECORD_OFFSET) { + pos = + addRecord( + block, + pos, + ChunkType.FULL, + TEST_RECORD_SIZE, + 0xffff & (pos + startVal)); + ++recordCount; + } + return new TestBlock(block, recordCount); + } + + @Test + public void testSimpleBlock() throws IOException { + TestBlock block = makeBlockOfRepeatingBytes(0); + LevelDbLogReader reader = new LevelDbLogReader(); + reader.readFrom(new ByteArrayInputStream(block.data)); + ImmutableList records = reader.getRecords(); + assertThat(records).hasSize(block.recordCount); + } + + @Test + public void testLargeRecord() throws IOException { + byte[] block = new byte[LevelDbLogReader.BLOCK_SIZE]; + addRecord(block, 0, ChunkType.FIRST, MAX_RECORD, (byte) 1); + LevelDbLogReader reader = new LevelDbLogReader(); + reader.readFrom(new ByteArrayInputStream(block)); + assertThat(reader.getRecords()).isEmpty(); + + addRecord(block, 0, ChunkType.MIDDLE, MAX_RECORD, (byte) 2); + reader.readFrom(new ByteArrayInputStream(block)); + assertThat(reader.getRecords()).isEmpty(); + + addRecord(block, 0, ChunkType.LAST, MAX_RECORD, (byte) 3); + reader.readFrom(new ByteArrayInputStream(block)); + + List records = reader.getRecords(); + assertThat(records).hasSize(1); + byte[] record = records.get(0); + + for (int i = 0; i < MAX_RECORD; ++i) { + assertThat(record[i]).isEqualTo((i % 2 == 1) ? 0 : 1); + } + for (int i = MAX_RECORD; i < MAX_RECORD * 2; ++i) { + // Note that we have to invert the byte check here because MAX_RECORD is not divisible by two. + assertThat(record[i]).isEqualTo((i % 2 == 0) ? 0 : 2); + } + for (int i = MAX_RECORD * 2; i < MAX_RECORD * 3; ++i) { + assertThat(record[i]).isEqualTo((i % 2 == 1) ? 0 : 3); + } + } + + @Test + public void readFromMultiBlockStream() throws IOException { + TestBlock block0 = makeBlockOfRepeatingBytes(0); + TestBlock block1 = makeBlockOfRepeatingBytes(138); + ByteArrayInputStream source = new ByteArrayInputStream(Bytes.concat(block0.data, block1.data)); + + LevelDbLogReader reader = new LevelDbLogReader(); + reader.readFrom(source); + assertThat(reader.getRecords()).hasSize(block0.recordCount + block1.recordCount); + } + + @Test + public void testChunkTypesToCode() { + // Verify that we're translating chunk types to code values correctly.z + assertThat(ChunkType.fromCode(ChunkType.END.getCode())).isEqualTo(ChunkType.END); + assertThat(ChunkType.fromCode(ChunkType.FULL.getCode())).isEqualTo(ChunkType.FULL); + assertThat(ChunkType.fromCode(ChunkType.FIRST.getCode())).isEqualTo(ChunkType.FIRST); + assertThat(ChunkType.fromCode(ChunkType.MIDDLE.getCode())).isEqualTo(ChunkType.MIDDLE); + assertThat(ChunkType.fromCode(ChunkType.LAST.getCode())).isEqualTo(ChunkType.LAST); + } + + /** Aggregates the bytes of a test block with the record count. */ + private static final class TestBlock { + final byte[] data; + final int recordCount; + + TestBlock(byte[] data, int recordCount) { + this.data = data; + this.recordCount = recordCount; + } + } +}