Add Bloom filters to the Cloud SQL PremiumList schema (#306)

* Add Bloom filters to the Cloud SQL PremiumList schema

They are slightly different from the existing Bloom filters stored in Datastore
in that they now use an ASCII String encoding rather than the more generic
CharSequence, and there is no maximum size (whereas we previously had to live
within the 1 MB max entity size for Datastore).
This commit is contained in:
Ben McIlwain 2019-10-09 17:06:42 -04:00 committed by GitHub
parent 5e19cb7a02
commit c130cdb042
8 changed files with 218 additions and 4 deletions

View file

@ -0,0 +1,59 @@
// Copyright 2019 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.persistence;
import static com.google.common.base.Charsets.US_ASCII;
import static com.google.common.hash.Funnels.stringFunnel;
import com.google.common.hash.BloomFilter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
import javax.annotation.Nullable;
import javax.persistence.AttributeConverter;
import javax.persistence.Converter;
/** JPA converter for ASCII String {@link BloomFilter}s. */
@Converter(autoApply = true)
public class BloomFilterConverter implements AttributeConverter<BloomFilter<String>, byte[]> {
@Override
@Nullable
public byte[] convertToDatabaseColumn(@Nullable BloomFilter<String> entity) {
if (entity == null) {
return null;
}
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
entity.writeTo(bos);
} catch (IOException e) {
throw new UncheckedIOException("Error saving Bloom filter data", e);
}
return bos.toByteArray();
}
@Override
@Nullable
public BloomFilter<String> convertToEntityAttribute(@Nullable byte[] columnValue) {
if (columnValue == null) {
return null;
}
try {
return BloomFilter.readFrom(new ByteArrayInputStream(columnValue), stringFunnel(US_ASCII));
} catch (IOException e) {
throw new UncheckedIOException("Error loading Bloom filter data", e);
}
}
}

View file

@ -14,8 +14,12 @@
package google.registry.schema.tld; package google.registry.schema.tld;
import static com.google.common.base.Charsets.US_ASCII;
import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Preconditions.checkState;
import static com.google.common.hash.Funnels.stringFunnel;
import com.google.common.collect.ImmutableMap;
import com.google.common.hash.BloomFilter;
import google.registry.model.CreateAutoTimestamp; import google.registry.model.CreateAutoTimestamp;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.util.Map; import java.util.Map;
@ -67,11 +71,16 @@ public class PremiumList {
@Column(name = "price", nullable = false) @Column(name = "price", nullable = false)
private Map<String, BigDecimal> labelsToPrices; private Map<String, BigDecimal> labelsToPrices;
@Column(nullable = false)
private BloomFilter<String> bloomFilter;
private PremiumList(String name, CurrencyUnit currency, Map<String, BigDecimal> labelsToPrices) { private PremiumList(String name, CurrencyUnit currency, Map<String, BigDecimal> labelsToPrices) {
// TODO(mcilwain): Generate the Bloom filter and set it here.
this.name = name; this.name = name;
this.currency = currency; this.currency = currency;
this.labelsToPrices = labelsToPrices; this.labelsToPrices = labelsToPrices;
// ASCII is used for the charset because all premium list domain labels are stored punycoded.
this.bloomFilter = BloomFilter.create(stringFunnel(US_ASCII), labelsToPrices.size());
labelsToPrices.keySet().forEach(this.bloomFilter::put);
} }
// Hibernate requires this default constructor. // Hibernate requires this default constructor.
@ -101,7 +110,18 @@ public class PremiumList {
} }
/** Returns a {@link Map} of domain labels to prices. */ /** Returns a {@link Map} of domain labels to prices. */
public Map<String, BigDecimal> getLabelsToPrices() { public ImmutableMap<String, BigDecimal> getLabelsToPrices() {
return labelsToPrices; return ImmutableMap.copyOf(labelsToPrices);
}
/**
* Returns a Bloom filter to determine whether a label might be premium, or is definitely not.
*
* <p>If the domain label might be premium, then the next step is to check for the existence of a
* corresponding row in the PremiumListEntry table. Otherwise, we know for sure it's not premium,
* and no DB load is required.
*/
public BloomFilter<String> getBloomFilter() {
return bloomFilter;
} }
} }

View file

@ -33,6 +33,7 @@
<class>google.registry.model.eppcommon.Trid</class> <class>google.registry.model.eppcommon.Trid</class>
<!-- Customized type converters --> <!-- Customized type converters -->
<class>google.registry.persistence.BloomFilterConverter</class>
<class>google.registry.persistence.CreateAutoTimestampConverter</class> <class>google.registry.persistence.CreateAutoTimestampConverter</class>
<class>google.registry.persistence.UpdateAutoTimestampConverter</class> <class>google.registry.persistence.UpdateAutoTimestampConverter</class>
<class>google.registry.persistence.ZonedDateTimeConverter</class> <class>google.registry.persistence.ZonedDateTimeConverter</class>

View file

@ -0,0 +1,67 @@
// Copyright 2019 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.persistence;
import static com.google.common.base.Charsets.US_ASCII;
import static com.google.common.hash.Funnels.stringFunnel;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.model.transaction.TransactionManagerFactory.jpaTm;
import com.google.common.collect.ImmutableSet;
import com.google.common.hash.BloomFilter;
import google.registry.model.ImmutableObject;
import google.registry.model.transaction.JpaTransactionManagerRule;
import javax.persistence.Entity;
import javax.persistence.Id;
import org.hibernate.cfg.Environment;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class BloomFilterConverterTest {
@Rule
public final JpaTransactionManagerRule jpaTmRule =
new JpaTransactionManagerRule.Builder()
.withEntityClass(TestEntity.class)
.withProperty(Environment.HBM2DDL_AUTO, "update")
.build();
@Test
public void roundTripConversion_returnsSameBloomFilter() {
BloomFilter<String> bloomFilter = BloomFilter.create(stringFunnel(US_ASCII), 3);
ImmutableSet.of("foo", "bar", "baz").forEach(bloomFilter::put);
TestEntity entity = new TestEntity(bloomFilter);
jpaTm().transact(() -> jpaTm().getEntityManager().persist(entity));
TestEntity persisted =
jpaTm().transact(() -> jpaTm().getEntityManager().find(TestEntity.class, "id"));
assertThat(persisted.bloomFilter).isEqualTo(bloomFilter);
}
@Entity(name = "TestEntity") // Override entity name to avoid the nested class reference.
public static class TestEntity extends ImmutableObject {
@Id String name = "id";
BloomFilter<String> bloomFilter;
public TestEntity() {}
public TestEntity(BloomFilter<String> bloomFilter) {
this.bloomFilter = bloomFilter;
}
}
}

View file

@ -0,0 +1,50 @@
// Copyright 2019 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.schema.tld;
import static com.google.common.truth.Truth.assertThat;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.hash.BloomFilter;
import java.math.BigDecimal;
import org.joda.money.CurrencyUnit;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/** Unit tests for {@link PremiumList}. */
@RunWith(JUnit4.class)
public class PremiumListTest {
private static final ImmutableMap<String, BigDecimal> TEST_PRICES =
ImmutableMap.of(
"silver",
BigDecimal.valueOf(10.23),
"gold",
BigDecimal.valueOf(1305.47),
"palladium",
BigDecimal.valueOf(1552.78));
@Test
public void bloomFilter_worksCorrectly() {
BloomFilter<String> bloomFilter =
PremiumList.create("testname", CurrencyUnit.USD, TEST_PRICES).getBloomFilter();
ImmutableSet.of("silver", "gold", "palladium")
.forEach(l -> assertThat(bloomFilter.mightContain(l)).isTrue());
ImmutableSet.of("dirt", "pyrite", "zirconia")
.forEach(l -> assertThat(bloomFilter.mightContain(l)).isFalse());
}
}

View file

@ -0,0 +1,15 @@
-- Copyright 2019 The Nomulus Authors. All Rights Reserved.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
alter table "PremiumList" add column if not exists bloom_filter bytea not null;

View file

@ -130,6 +130,7 @@
create table "PremiumList" ( create table "PremiumList" (
revision_id bigserial not null, revision_id bigserial not null,
bloom_filter bytea not null,
creation_timestamp timestamptz not null, creation_timestamp timestamptz not null,
currency bytea not null, currency bytea not null,
name text not null, name text not null,

View file

@ -92,7 +92,8 @@ CREATE TABLE public."PremiumList" (
revision_id bigint NOT NULL, revision_id bigint NOT NULL,
creation_timestamp timestamp with time zone NOT NULL, creation_timestamp timestamp with time zone NOT NULL,
currency bytea NOT NULL, currency bytea NOT NULL,
name text NOT NULL name text NOT NULL,
bloom_filter bytea NOT NULL
); );