Skip to content

Commit 887f641

Browse files
committed
Core: Align CharSequenceSet impl with Data/DeleteFileSet
1 parent 3d9fc1d commit 887f641

File tree

3 files changed

+83
-161
lines changed

3 files changed

+83
-161
lines changed

api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java

Lines changed: 24 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -18,183 +18,53 @@
1818
*/
1919
package org.apache.iceberg.util;
2020

21-
import java.io.Serializable;
22-
import java.util.Collection;
23-
import java.util.Iterator;
24-
import java.util.Set;
25-
import java.util.stream.Collectors;
26-
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
21+
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
2722
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
28-
import org.apache.iceberg.relocated.com.google.common.collect.Iterators;
29-
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
30-
import org.apache.iceberg.relocated.com.google.common.collect.Streams;
3123

32-
public class CharSequenceSet implements Set<CharSequence>, Serializable {
24+
public class CharSequenceSet extends WrapperSet<CharSequence> {
3325
private static final ThreadLocal<CharSequenceWrapper> WRAPPERS =
3426
ThreadLocal.withInitial(() -> CharSequenceWrapper.wrap(null));
3527

36-
public static CharSequenceSet of(Iterable<CharSequence> charSequences) {
37-
return new CharSequenceSet(charSequences);
38-
}
39-
40-
public static CharSequenceSet empty() {
41-
return new CharSequenceSet(ImmutableList.of());
42-
}
43-
44-
private final Set<CharSequenceWrapper> wrapperSet;
45-
46-
private CharSequenceSet(Iterable<CharSequence> charSequences) {
47-
this.wrapperSet =
48-
Sets.newHashSet(Iterables.transform(charSequences, CharSequenceWrapper::wrap));
28+
private CharSequenceSet() {
29+
// needed for serialization
4930
}
5031

51-
@Override
52-
public int size() {
53-
return wrapperSet.size();
32+
private CharSequenceSet(Iterable<? extends CharSequence> charSequences) {
33+
super(
34+
Iterables.transform(
35+
charSequences,
36+
obj -> {
37+
Preconditions.checkNotNull(obj, "Invalid object: null");
38+
return CharSequenceWrapper.wrap(obj);
39+
}));
5440
}
5541

56-
@Override
57-
public boolean isEmpty() {
58-
return wrapperSet.isEmpty();
42+
public static CharSequenceSet of(Iterable<? extends CharSequence> charSequences) {
43+
return new CharSequenceSet(charSequences);
5944
}
6045

61-
@Override
62-
public boolean contains(Object obj) {
63-
if (obj instanceof CharSequence) {
64-
CharSequenceWrapper wrapper = WRAPPERS.get();
65-
boolean result = wrapperSet.contains(wrapper.set((CharSequence) obj));
66-
wrapper.set(null); // don't hold a reference to the value
67-
return result;
68-
}
69-
return false;
46+
public static CharSequenceSet empty() {
47+
return new CharSequenceSet();
7048
}
7149

7250
@Override
73-
public Iterator<CharSequence> iterator() {
74-
return Iterators.transform(wrapperSet.iterator(), CharSequenceWrapper::get);
51+
protected Wrapper<CharSequence> wrapper() {
52+
return WRAPPERS.get();
7553
}
7654

7755
@Override
78-
public Object[] toArray() {
79-
return Iterators.toArray(iterator(), CharSequence.class);
56+
protected Wrapper<CharSequence> wrap(CharSequence file) {
57+
return CharSequenceWrapper.wrap(file);
8058
}
8159

8260
@Override
83-
@SuppressWarnings("unchecked")
84-
public <T> T[] toArray(T[] destArray) {
85-
int size = wrapperSet.size();
86-
if (destArray.length < size) {
87-
return (T[]) toArray();
88-
}
89-
90-
Iterator<CharSequence> iter = iterator();
91-
int ind = 0;
92-
while (iter.hasNext()) {
93-
destArray[ind] = (T) iter.next();
94-
ind += 1;
95-
}
96-
97-
if (destArray.length > size) {
98-
destArray[size] = null;
99-
}
100-
101-
return destArray;
61+
protected Class<CharSequence> elementClass() {
62+
return CharSequence.class;
10263
}
10364

10465
@Override
10566
public boolean add(CharSequence charSequence) {
106-
return wrapperSet.add(CharSequenceWrapper.wrap(charSequence));
107-
}
108-
109-
@Override
110-
public boolean remove(Object obj) {
111-
if (obj instanceof CharSequence) {
112-
CharSequenceWrapper wrapper = WRAPPERS.get();
113-
boolean result = wrapperSet.remove(wrapper.set((CharSequence) obj));
114-
wrapper.set(null); // don't hold a reference to the value
115-
return result;
116-
}
117-
return false;
118-
}
119-
120-
@Override
121-
@SuppressWarnings("CollectionUndefinedEquality")
122-
public boolean containsAll(Collection<?> objects) {
123-
if (objects != null) {
124-
return Iterables.all(objects, this::contains);
125-
}
126-
return false;
127-
}
128-
129-
@Override
130-
public boolean addAll(Collection<? extends CharSequence> charSequences) {
131-
if (charSequences != null) {
132-
return Iterables.addAll(
133-
wrapperSet, Iterables.transform(charSequences, CharSequenceWrapper::wrap));
134-
}
135-
return false;
136-
}
137-
138-
@Override
139-
public boolean retainAll(Collection<?> objects) {
140-
if (objects != null) {
141-
Set<CharSequenceWrapper> toRetain =
142-
objects.stream()
143-
.filter(CharSequence.class::isInstance)
144-
.map(CharSequence.class::cast)
145-
.map(CharSequenceWrapper::wrap)
146-
.collect(Collectors.toSet());
147-
148-
return Iterables.retainAll(wrapperSet, toRetain);
149-
}
150-
151-
return false;
152-
}
153-
154-
@Override
155-
@SuppressWarnings("CollectionUndefinedEquality")
156-
public boolean removeAll(Collection<?> objects) {
157-
if (objects != null) {
158-
return objects.stream().filter(this::remove).count() != 0;
159-
}
160-
161-
return false;
162-
}
163-
164-
@Override
165-
public void clear() {
166-
wrapperSet.clear();
167-
}
168-
169-
@SuppressWarnings("CollectionUndefinedEquality")
170-
@Override
171-
public boolean equals(Object other) {
172-
if (this == other) {
173-
return true;
174-
} else if (!(other instanceof Set)) {
175-
return false;
176-
}
177-
178-
Set<?> that = (Set<?>) other;
179-
180-
if (size() != that.size()) {
181-
return false;
182-
}
183-
184-
try {
185-
return containsAll(that);
186-
} catch (ClassCastException | NullPointerException unused) {
187-
return false;
188-
}
189-
}
190-
191-
@Override
192-
public int hashCode() {
193-
return wrapperSet.stream().mapToInt(CharSequenceWrapper::hashCode).sum();
194-
}
195-
196-
@Override
197-
public String toString() {
198-
return Streams.stream(iterator()).collect(Collectors.joining("CharSequenceSet({", ", ", "})"));
67+
// method is needed to not break API compatibility
68+
return super.add(charSequence);
19969
}
20070
}

api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,11 @@
1818
*/
1919
package org.apache.iceberg.util;
2020

21-
import java.io.Serializable;
2221
import org.apache.iceberg.types.Comparators;
2322
import org.apache.iceberg.types.JavaHashes;
2423

2524
/** Wrapper class to adapt CharSequence for use in maps and sets. */
26-
public class CharSequenceWrapper implements CharSequence, Serializable {
25+
public class CharSequenceWrapper implements CharSequence, WrapperSet.Wrapper<CharSequence> {
2726
public static CharSequenceWrapper wrap(CharSequence seq) {
2827
return new CharSequenceWrapper(seq);
2928
}
@@ -39,13 +38,15 @@ private CharSequenceWrapper(CharSequence wrapped) {
3938
this.wrapped = wrapped;
4039
}
4140

41+
@Override
4242
public CharSequenceWrapper set(CharSequence newWrapped) {
4343
this.wrapped = newWrapped;
4444
this.hashCode = 0;
4545
this.hashIsZero = false;
4646
return this;
4747
}
4848

49+
@Override
4950
public CharSequence get() {
5051
return wrapped;
5152
}

api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
package org.apache.iceberg.util;
2020

2121
import static org.assertj.core.api.Assertions.assertThat;
22+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
2223

2324
import java.util.Arrays;
2425
import java.util.Collections;
2526
import java.util.Set;
27+
import org.apache.iceberg.TestHelpers;
2628
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
2729
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
2830
import org.junit.jupiter.api.Test;
@@ -42,15 +44,34 @@ public void testSearchingInCharSequenceCollection() {
4244

4345
@Test
4446
public void nullString() {
45-
assertThat(CharSequenceSet.of(Arrays.asList((String) null))).contains((String) null);
47+
assertThatThrownBy(() -> CharSequenceSet.of(Arrays.asList((String) null)))
48+
.isInstanceOf(NullPointerException.class)
49+
.hasMessage("Invalid object: null");
4650
assertThat(CharSequenceSet.empty()).doesNotContain((String) null);
4751
}
4852

4953
@Test
5054
public void testRetainAll() {
55+
CharSequenceSet empty = CharSequenceSet.empty();
56+
assertThatThrownBy(() -> empty.retainAll(null))
57+
.isInstanceOf(NullPointerException.class)
58+
.hasMessage("Invalid collection: null");
59+
60+
assertThatThrownBy(() -> empty.retainAll(Collections.singletonList(null)))
61+
.isInstanceOf(NullPointerException.class)
62+
.hasMessage("Invalid object: null");
63+
64+
assertThatThrownBy(() -> empty.retainAll(Arrays.asList("123", null)))
65+
.isInstanceOf(NullPointerException.class)
66+
.hasMessage("Invalid object: null");
67+
68+
assertThatThrownBy(() -> empty.retainAll(ImmutableList.of("456", "789", 123)))
69+
.isInstanceOf(ClassCastException.class)
70+
.hasMessage("Cannot cast java.lang.Integer to java.lang.CharSequence");
71+
5172
CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456"));
5273

53-
assertThat(set.retainAll(ImmutableList.of("456", "789", 123)))
74+
assertThat(set.retainAll(ImmutableList.of("456", "789", "555")))
5475
.overridingErrorMessage("Set should be changed")
5576
.isTrue();
5677

@@ -61,7 +82,7 @@ public void testRetainAll() {
6182
.overridingErrorMessage("Set should not be changed")
6283
.isFalse();
6384

64-
assertThat(set.retainAll(ImmutableList.of(123, 456)))
85+
assertThat(set.retainAll(ImmutableList.of("555", "789")))
6586
.overridingErrorMessage("Set should be changed")
6687
.isTrue();
6788

@@ -70,15 +91,32 @@ public void testRetainAll() {
7091

7192
@Test
7293
public void testRemoveAll() {
94+
CharSequenceSet empty = CharSequenceSet.empty();
95+
assertThatThrownBy(() -> empty.removeAll(null))
96+
.isInstanceOf(NullPointerException.class)
97+
.hasMessage("Invalid collection: null");
98+
99+
assertThatThrownBy(() -> empty.removeAll(Collections.singletonList(null)))
100+
.isInstanceOf(NullPointerException.class)
101+
.hasMessage("Invalid object: null");
102+
103+
assertThatThrownBy(() -> empty.removeAll(Arrays.asList("123", null)))
104+
.isInstanceOf(NullPointerException.class)
105+
.hasMessage("Invalid object: null");
106+
107+
assertThatThrownBy(() -> empty.removeAll(ImmutableList.of("123", 456)))
108+
.isInstanceOf(ClassCastException.class)
109+
.hasMessage("Cannot cast java.lang.Integer to java.lang.CharSequence");
110+
73111
CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456"));
74-
assertThat(set.removeAll(ImmutableList.of("456", "789", 123)))
112+
assertThat(set.removeAll(ImmutableList.of("456", "789")))
75113
.overridingErrorMessage("Set should be changed")
76114
.isTrue();
77115

78116
assertThat(set).hasSize(1).contains("123");
79117

80118
set = CharSequenceSet.of(ImmutableList.of("123", "456"));
81-
assertThat(set.removeAll(ImmutableList.of(123, 456)))
119+
assertThat(set.removeAll(ImmutableList.of("333", "789")))
82120
.overridingErrorMessage("Set should not be changed")
83121
.isFalse();
84122

@@ -119,4 +157,17 @@ public void testEqualsAndHashCode() {
119157
.isEqualTo(set3.hashCode())
120158
.isEqualTo(set4.hashCode());
121159
}
160+
161+
@Test
162+
public void kryoSerialization() throws Exception {
163+
CharSequenceSet charSequences = CharSequenceSet.of(ImmutableList.of("c", "b", "a"));
164+
assertThat(TestHelpers.KryoHelpers.roundTripSerialize(charSequences)).isEqualTo(charSequences);
165+
}
166+
167+
@Test
168+
public void javaSerialization() throws Exception {
169+
CharSequenceSet charSequences = CharSequenceSet.of(ImmutableList.of("c", "b", "a"));
170+
CharSequenceSet deserialize = TestHelpers.deserialize(TestHelpers.serialize(charSequences));
171+
assertThat(deserialize).isEqualTo(charSequences);
172+
}
122173
}

0 commit comments

Comments
 (0)