mirror of
https://github.com/google/nomulus.git
synced 2025-05-13 16:07:15 +02:00
Create a LevelDBLogReader class for java
This is a first step towards porting our database backup comparison tool (which was written in Crack and subsequently in Python) to Java so that we can run it in a reasonable amount of time. Ideally, this functionality should be available in Java, however the only instance of it that I can find is currently deprecated without a replacement. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165433766
This commit is contained in:
parent
38abe9fa48
commit
808f40890a
2 changed files with 337 additions and 0 deletions
173
java/google/registry/tools/LevelDbLogReader.java
Normal file
173
java/google/registry/tools/LevelDbLogReader.java
Normal file
|
@ -0,0 +1,173 @@
|
||||||
|
// Copyright 2017 The Nomulus Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package google.registry.tools;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.file.FileSystems;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads records from a set of LevelDB files and builds a gigantic ImmutableList from them.
|
||||||
|
*
|
||||||
|
* <p>See <a
|
||||||
|
* href="https://github.com/google/leveldb/blob/master/doc/log_format.md">log_format.md</a> for the
|
||||||
|
* leveldb log format specification.</a>
|
||||||
|
*
|
||||||
|
* <p>There are several other implementations of this, none of which appeared suitable for our use
|
||||||
|
* case: <a href="https://github.com/google/leveldb">The original C++ implementation</a>. <a
|
||||||
|
* href="https://cloud.google.com/appengine/docs/standard/java/javadoc/com/google/appengine/api/files/RecordWriteChannel">
|
||||||
|
* com.google.appengine.api.files.RecordWriteChannel</a> - Exactly what we need but deprecated. The
|
||||||
|
* referenced replacement: <a
|
||||||
|
* href="https://github.com/GoogleCloudPlatform/appengine-gcs-client.git">The App Engine GCS
|
||||||
|
* Client</a> - Does not appear to have any support for working with LevelDB.
|
||||||
|
*/
|
||||||
|
public final class LevelDbLogReader {
|
||||||
|
|
||||||
|
@VisibleForTesting static final int BLOCK_SIZE = 32 * 1024;
|
||||||
|
@VisibleForTesting static final int HEADER_SIZE = 7;
|
||||||
|
|
||||||
|
private final ByteArrayOutputStream recordContents = new ByteArrayOutputStream();
|
||||||
|
private final ImmutableList.Builder<byte[]> recordListBuilder = new ImmutableList.Builder<>();
|
||||||
|
|
||||||
|
/** Read a complete block, which must be exactly 32 KB. */
|
||||||
|
private void processBlock(byte[] block) {
|
||||||
|
// Read records from the block until there is no longer enough space for a record (i.e. until
|
||||||
|
// we're at HEADER_SIZE - 1 bytes from the end of the block).
|
||||||
|
int i = 0;
|
||||||
|
while (i < BLOCK_SIZE - (HEADER_SIZE - 1)) {
|
||||||
|
RecordHeader recordHeader = readRecordHeader(block, i);
|
||||||
|
if (recordHeader.type == ChunkType.END) {
|
||||||
|
// A type of zero indicates that we've reached the padding zeroes at the end of the block.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the contents of the record into recordContents.
|
||||||
|
recordContents.write(block, i + HEADER_SIZE, recordHeader.size);
|
||||||
|
|
||||||
|
// If this is the last (or only) chunk in the record, store the full contents into the List.
|
||||||
|
if (recordHeader.type == ChunkType.FULL || recordHeader.type == ChunkType.LAST) {
|
||||||
|
recordListBuilder.add(recordContents.toByteArray());
|
||||||
|
recordContents.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
i += recordHeader.size + HEADER_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a byte from "block" as an unsigned value.
|
||||||
|
*
|
||||||
|
* <p>Java bytes are signed, which doesn't work very well for our bit-shifting operations.
|
||||||
|
*/
|
||||||
|
private int getUnsignedByte(byte[] block, int pos) {
|
||||||
|
return block[pos] & 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reads the 7 byte record header. */
|
||||||
|
private RecordHeader readRecordHeader(byte[] block, int pos) {
|
||||||
|
// Read checksum (4 bytes, LE).
|
||||||
|
int checksum =
|
||||||
|
getUnsignedByte(block, pos)
|
||||||
|
| (getUnsignedByte(block, pos + 1) << 8)
|
||||||
|
| (getUnsignedByte(block, pos + 2) << 16)
|
||||||
|
| (getUnsignedByte(block, pos + 3) << 24);
|
||||||
|
// Read size (2 bytes, LE).
|
||||||
|
int size = getUnsignedByte(block, pos + 4) | (getUnsignedByte(block, pos + 5) << 8);
|
||||||
|
// Read type (1 byte).
|
||||||
|
int type = getUnsignedByte(block, pos + 6);
|
||||||
|
|
||||||
|
return new RecordHeader(checksum, size, ChunkType.fromCode(type));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reads all records in the Reader into the record set. */
|
||||||
|
public void readFrom(InputStream source) throws IOException {
|
||||||
|
byte[] block = new byte[BLOCK_SIZE];
|
||||||
|
|
||||||
|
// read until we have no more.
|
||||||
|
while (true) {
|
||||||
|
int amountRead = source.read(block, 0, BLOCK_SIZE);
|
||||||
|
if (amountRead <= 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assert amountRead == BLOCK_SIZE;
|
||||||
|
|
||||||
|
processBlock(block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reads all records from the file specified by "path" into the record set. */
|
||||||
|
public void readFrom(Path path) throws IOException {
|
||||||
|
readFrom(Files.newInputStream(path));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reads all records from the specified file into the record set. */
|
||||||
|
public void readFrom(String filename) throws IOException {
|
||||||
|
readFrom(FileSystems.getDefault().getPath(filename));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the list of records constructed so far.
|
||||||
|
*
|
||||||
|
* <p>Note that this does not invalidate the internal state of the object: we return a copy and
|
||||||
|
* this can be called multiple times.
|
||||||
|
*/
|
||||||
|
ImmutableList<byte[]> getRecords() {
|
||||||
|
return recordListBuilder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Aggregates the fields in a record header. */
|
||||||
|
private static final class RecordHeader {
|
||||||
|
final int checksum;
|
||||||
|
final int size;
|
||||||
|
final ChunkType type;
|
||||||
|
|
||||||
|
public RecordHeader(int checksum, int size, ChunkType type) {
|
||||||
|
this.checksum = checksum;
|
||||||
|
this.size = size;
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
enum ChunkType {
|
||||||
|
// Warning: these values must map to their array indices. If this relationship is broken,
|
||||||
|
// you'll need to change fromCode() to not simply index into values().
|
||||||
|
END(0),
|
||||||
|
FULL(1),
|
||||||
|
FIRST(2),
|
||||||
|
MIDDLE(3),
|
||||||
|
LAST(4);
|
||||||
|
|
||||||
|
private final int code;
|
||||||
|
|
||||||
|
ChunkType(int code) {
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Construct a record type from the numeric record type code. */
|
||||||
|
static ChunkType fromCode(int code) {
|
||||||
|
return values()[code];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
164
javatests/google/registry/tools/LevelDbLogReaderTest.java
Normal file
164
javatests/google/registry/tools/LevelDbLogReaderTest.java
Normal file
|
@ -0,0 +1,164 @@
|
||||||
|
// Copyright 2017 The Nomulus Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package google.registry.tools;
|
||||||
|
|
||||||
|
import static com.google.common.truth.Truth.assertThat;
|
||||||
|
import static google.registry.tools.LevelDbLogReader.ChunkType;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.common.primitives.Bytes;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.runner.RunWith;
|
||||||
|
import org.junit.runners.JUnit4;
|
||||||
|
|
||||||
|
/** LevelDbLogReader tests. */
|
||||||
|
@RunWith(JUnit4.class)
|
||||||
|
public final class LevelDbLogReaderTest {
|
||||||
|
|
||||||
|
private static final int MAX_RECORD = LevelDbLogReader.BLOCK_SIZE - LevelDbLogReader.HEADER_SIZE;
|
||||||
|
|
||||||
|
// Size of the test record. Any value < 256 will do.
|
||||||
|
private static final int TEST_RECORD_SIZE = 231;
|
||||||
|
|
||||||
|
// The maximum offset at which a test record can be inserted, measured in bytes from the beginning
|
||||||
|
// of the block.
|
||||||
|
private static final int MAX_TEST_RECORD_OFFSET =
|
||||||
|
LevelDbLogReader.BLOCK_SIZE - (LevelDbLogReader.HEADER_SIZE + TEST_RECORD_SIZE);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a record of bytes of 'val' of the given size to bytes.
|
||||||
|
*
|
||||||
|
* <p>This currently doesn't write a real checksum since we're not doing anything with that in the
|
||||||
|
* leveldb reader.
|
||||||
|
*
|
||||||
|
* <p>Returns the new offset for the next block.
|
||||||
|
*/
|
||||||
|
private static int addRecord(
|
||||||
|
byte[] bytes, int pos, ChunkType type, int size, int val) {
|
||||||
|
|
||||||
|
// Write a bogus checksum.
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
bytes[pos++] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write size and type.
|
||||||
|
bytes[pos++] = (byte) size;
|
||||||
|
bytes[pos++] = (byte) (size >> 8);
|
||||||
|
bytes[pos++] = (byte) type.getCode();
|
||||||
|
|
||||||
|
// Write "size" bytes of data.
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
bytes[pos + i] = (byte) val;
|
||||||
|
|
||||||
|
// Swap the least significant bytes in val so we can have more than 256 different same-sized
|
||||||
|
// records.
|
||||||
|
val = (val >> 8) | ((val & 0xff) << 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
return pos + size;
|
||||||
|
}
|
||||||
|
|
||||||
|
private TestBlock makeBlockOfRepeatingBytes(int startVal) {
|
||||||
|
byte[] block = new byte[LevelDbLogReader.BLOCK_SIZE];
|
||||||
|
int pos = 0;
|
||||||
|
int recordCount = 0;
|
||||||
|
while (pos < MAX_TEST_RECORD_OFFSET) {
|
||||||
|
pos =
|
||||||
|
addRecord(
|
||||||
|
block,
|
||||||
|
pos,
|
||||||
|
ChunkType.FULL,
|
||||||
|
TEST_RECORD_SIZE,
|
||||||
|
0xffff & (pos + startVal));
|
||||||
|
++recordCount;
|
||||||
|
}
|
||||||
|
return new TestBlock(block, recordCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimpleBlock() throws IOException {
|
||||||
|
TestBlock block = makeBlockOfRepeatingBytes(0);
|
||||||
|
LevelDbLogReader reader = new LevelDbLogReader();
|
||||||
|
reader.readFrom(new ByteArrayInputStream(block.data));
|
||||||
|
ImmutableList<byte[]> records = reader.getRecords();
|
||||||
|
assertThat(records).hasSize(block.recordCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLargeRecord() throws IOException {
|
||||||
|
byte[] block = new byte[LevelDbLogReader.BLOCK_SIZE];
|
||||||
|
addRecord(block, 0, ChunkType.FIRST, MAX_RECORD, (byte) 1);
|
||||||
|
LevelDbLogReader reader = new LevelDbLogReader();
|
||||||
|
reader.readFrom(new ByteArrayInputStream(block));
|
||||||
|
assertThat(reader.getRecords()).isEmpty();
|
||||||
|
|
||||||
|
addRecord(block, 0, ChunkType.MIDDLE, MAX_RECORD, (byte) 2);
|
||||||
|
reader.readFrom(new ByteArrayInputStream(block));
|
||||||
|
assertThat(reader.getRecords()).isEmpty();
|
||||||
|
|
||||||
|
addRecord(block, 0, ChunkType.LAST, MAX_RECORD, (byte) 3);
|
||||||
|
reader.readFrom(new ByteArrayInputStream(block));
|
||||||
|
|
||||||
|
List<byte[]> records = reader.getRecords();
|
||||||
|
assertThat(records).hasSize(1);
|
||||||
|
byte[] record = records.get(0);
|
||||||
|
|
||||||
|
for (int i = 0; i < MAX_RECORD; ++i) {
|
||||||
|
assertThat(record[i]).isEqualTo((i % 2 == 1) ? 0 : 1);
|
||||||
|
}
|
||||||
|
for (int i = MAX_RECORD; i < MAX_RECORD * 2; ++i) {
|
||||||
|
// Note that we have to invert the byte check here because MAX_RECORD is not divisible by two.
|
||||||
|
assertThat(record[i]).isEqualTo((i % 2 == 0) ? 0 : 2);
|
||||||
|
}
|
||||||
|
for (int i = MAX_RECORD * 2; i < MAX_RECORD * 3; ++i) {
|
||||||
|
assertThat(record[i]).isEqualTo((i % 2 == 1) ? 0 : 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readFromMultiBlockStream() throws IOException {
|
||||||
|
TestBlock block0 = makeBlockOfRepeatingBytes(0);
|
||||||
|
TestBlock block1 = makeBlockOfRepeatingBytes(138);
|
||||||
|
ByteArrayInputStream source = new ByteArrayInputStream(Bytes.concat(block0.data, block1.data));
|
||||||
|
|
||||||
|
LevelDbLogReader reader = new LevelDbLogReader();
|
||||||
|
reader.readFrom(source);
|
||||||
|
assertThat(reader.getRecords()).hasSize(block0.recordCount + block1.recordCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChunkTypesToCode() {
|
||||||
|
// Verify that we're translating chunk types to code values correctly.z
|
||||||
|
assertThat(ChunkType.fromCode(ChunkType.END.getCode())).isEqualTo(ChunkType.END);
|
||||||
|
assertThat(ChunkType.fromCode(ChunkType.FULL.getCode())).isEqualTo(ChunkType.FULL);
|
||||||
|
assertThat(ChunkType.fromCode(ChunkType.FIRST.getCode())).isEqualTo(ChunkType.FIRST);
|
||||||
|
assertThat(ChunkType.fromCode(ChunkType.MIDDLE.getCode())).isEqualTo(ChunkType.MIDDLE);
|
||||||
|
assertThat(ChunkType.fromCode(ChunkType.LAST.getCode())).isEqualTo(ChunkType.LAST);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Aggregates the bytes of a test block with the record count. */
|
||||||
|
private static final class TestBlock {
|
||||||
|
final byte[] data;
|
||||||
|
final int recordCount;
|
||||||
|
|
||||||
|
TestBlock(byte[] data, int recordCount) {
|
||||||
|
this.data = data;
|
||||||
|
this.recordCount = recordCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue