Skip to content

Core: Make namespace separator configurable #10877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ public class RESTSessionCatalog extends BaseViewSessionCatalog
// for backwards compatibility with older REST servers where it can be assumed that a particular
// server supports view endpoints but doesn't send the "endpoints" field in the ConfigResponse
static final String VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported";
static final String NAMESPACE_SEPARATOR = "namespace-separator";
public static final String REST_PAGE_SIZE = "rest-page-size";
private static final List<String> TOKEN_PREFERENCE_ORDER =
ImmutableList.of(
Expand Down Expand Up @@ -178,6 +179,7 @@ public class RESTSessionCatalog extends BaseViewSessionCatalog
private Integer pageSize = null;
private CloseableGroup closeables = null;
private Set<Endpoint> endpoints;
private String namespaceSeparator = null;

// a lazy thread pool for token refresh
private volatile ScheduledExecutorService refreshExecutor = null;
Expand Down Expand Up @@ -328,6 +330,9 @@ public void initialize(String name, Map<String, String> unresolved) {

this.reportingViaRestEnabled =
PropertyUtil.propertyAsBoolean(mergedProps, REST_METRICS_REPORTING_ENABLED, true);
this.namespaceSeparator =
PropertyUtil.propertyAsString(
mergedProps, NAMESPACE_SEPARATOR, RESTUtil.NAMESPACE_ESCAPED_SEPARATOR);
super.initialize(name, mergedProps);
}

Expand Down Expand Up @@ -615,7 +620,7 @@ public List<Namespace> listNamespaces(SessionContext context, Namespace namespac

Map<String, String> queryParams = Maps.newHashMap();
if (!namespace.isEmpty()) {
queryParams.put("parent", RESTUtil.encodeNamespace(namespace));
queryParams.put("parent", RESTUtil.encodeNamespace(namespace, namespaceSeparator));
}

ImmutableList.Builder<Namespace> namespaces = ImmutableList.builder();
Expand Down
60 changes: 51 additions & 9 deletions core/src/main/java/org/apache/iceberg/rest/RESTUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,13 @@
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.base.Splitter;
import org.apache.iceberg.relocated.com.google.common.base.Strings;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;

public class RESTUtil {
private static final String NAMESPACE_ESCAPED_SEPARATOR = "%1F";
private static final Joiner NAMESPACE_ESCAPED_JOINER = Joiner.on(NAMESPACE_ESCAPED_SEPARATOR);
private static final Splitter NAMESPACE_ESCAPED_SPLITTER =
Splitter.on(NAMESPACE_ESCAPED_SEPARATOR);
static final String NAMESPACE_ESCAPED_SEPARATOR = "%1F";

/**
* @deprecated since 1.7.0, will be made private in 1.8.0; use {@link
Expand Down Expand Up @@ -194,15 +192,35 @@ public static String decodeString(String encoded) {
* @return UTF-8 encoded string representing the namespace, suitable for use as a URL parameter
*/
public static String encodeNamespace(Namespace ns) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function looks dangerous now.

Copy link
Contributor Author

@nastra nastra Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you please elaborate what you mean by "dangerous" here? Nothing really changed for callers of this method (other than the fact that a Joiner is created every time this method is called)

Preconditions.checkArgument(ns != null, "Invalid namespace: null");
String[] levels = ns.levels();
return encodeNamespace(ns, NAMESPACE_ESCAPED_SEPARATOR);
}

/**
* Returns a String representation of a namespace that is suitable for use in a URL / URI.
*
* <p>This function needs to be called when a namespace is used as a path variable (or query
* parameter etc.), to format the namespace per the spec.
*
* <p>{@link RESTUtil#decodeNamespace(String, String)} should be used to parse the namespace from
* a URL parameter.
*
* @param namespace namespace to encode
* @param separator The namespace separator to be used for encoding. The separator will be used
* as-is and won't be UTF-8 encoded.
* @return UTF-8 encoded string representing the namespace, suitable for use as a URL parameter
*/
public static String encodeNamespace(Namespace namespace, String separator) {
Preconditions.checkArgument(namespace != null, "Invalid namespace: null");
Preconditions.checkArgument(
!Strings.isNullOrEmpty(separator), "Invalid separator: null or empty");
String[] levels = namespace.levels();
String[] encodedLevels = new String[levels.length];

for (int i = 0; i < levels.length; i++) {
encodedLevels[i] = encodeString(levels[i]);
}

return NAMESPACE_ESCAPED_JOINER.join(encodedLevels);
return Joiner.on(separator).join(encodedLevels);
}

/**
Expand All @@ -215,8 +233,32 @@ public static String encodeNamespace(Namespace ns) {
* @return a namespace
*/
public static Namespace decodeNamespace(String encodedNs) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function looks dangerous now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you please elaborate what you mean by "dangerous" here? Nothing really changed for callers of this method

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mixing this and the new function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's still not clear to my why that would be "dangerous". There is no behavioral change for existing callers of this method. Can you please add a concrete example/explanation to justify the "dangerous" part?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Users can mix both - causing "confusion".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that argument is actually true. Users won't be using this code as this code is used on the client and the server but not by users in the classical sense of a user that uses Iceberg. For engine/catalog implementers the javadoc states that one should have used encodeNamespace(Namespace namespace) if you use decodeNamespace(String encodedNs).
Using words like dangerous and confusing without clear examples and justifications isn't helpful in a code review

Preconditions.checkArgument(encodedNs != null, "Invalid namespace: null");
String[] levels = Iterables.toArray(NAMESPACE_ESCAPED_SPLITTER.split(encodedNs), String.class);
return decodeNamespace(encodedNs, NAMESPACE_ESCAPED_SEPARATOR);
}

/**
* Takes in a string representation of a namespace as used for a URL parameter and returns the
* corresponding namespace.
*
* <p>See also {@link #encodeNamespace} for generating correctly formatted URLs.
*
* @param encodedNamespace a namespace to decode
* @param separator The namespace separator to be used as-is for decoding. This should be the same
* separator that was used when calling {@link RESTUtil#encodeNamespace(Namespace, String)}
* @return a namespace
*/
public static Namespace decodeNamespace(String encodedNamespace, String separator) {
Preconditions.checkArgument(encodedNamespace != null, "Invalid namespace: null");
Preconditions.checkArgument(
!Strings.isNullOrEmpty(separator), "Invalid separator: null or empty");

// use legacy splitter for backwards compatibility in case an old clients encoded the namespace
// with %1F
Splitter splitter =
encodedNamespace.contains(NAMESPACE_ESCAPED_SEPARATOR)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can this trigger if separator is not `\u001f"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you elaborate please what you mean exactly here?

? NAMESPACE_SPLITTER
: Splitter.on(separator);
String[] levels = Iterables.toArray(splitter.split(encodedNamespace), String.class);

// Decode levels in place
for (int i = 0; i < levels.length; i++) {
Expand Down
39 changes: 30 additions & 9 deletions core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.util.PropertyUtil;

public class ResourcePaths {
private static final Joiner SLASH = Joiner.on("/").skipNulls();
Expand All @@ -42,7 +43,12 @@ public class ResourcePaths {
public static final String V1_VIEW_RENAME = "/v1/{prefix}/views/rename";

public static ResourcePaths forCatalogProperties(Map<String, String> properties) {
return new ResourcePaths(properties.get(PREFIX));
return new ResourcePaths(
properties.get(PREFIX),
PropertyUtil.propertyAsString(
properties,
RESTSessionCatalog.NAMESPACE_SEPARATOR,
RESTUtil.NAMESPACE_ESCAPED_SEPARATOR));
}

public static String config() {
Expand All @@ -54,39 +60,50 @@ public static String tokens() {
}

private final String prefix;
private final String namespaceSeparator;

/**
* @deprecated since 1.7.0, will be made private in 1.8.0; use {@link
* ResourcePaths#forCatalogProperties(Map)} instead.
*/
@Deprecated
public ResourcePaths(String prefix) {
this(prefix, RESTUtil.NAMESPACE_ESCAPED_SEPARATOR);
}

private ResourcePaths(String prefix, String namespaceSeparator) {
this.prefix = prefix;
this.namespaceSeparator = namespaceSeparator;
}

public String namespaces() {
return SLASH.join("v1", prefix, "namespaces");
}

public String namespace(Namespace ns) {
return SLASH.join("v1", prefix, "namespaces", RESTUtil.encodeNamespace(ns));
return SLASH.join("v1", prefix, "namespaces", pathEncode(ns));
}

public String namespaceProperties(Namespace ns) {
return SLASH.join("v1", prefix, "namespaces", RESTUtil.encodeNamespace(ns), "properties");
return SLASH.join("v1", prefix, "namespaces", pathEncode(ns), "properties");
}

public String tables(Namespace ns) {
return SLASH.join("v1", prefix, "namespaces", RESTUtil.encodeNamespace(ns), "tables");
return SLASH.join("v1", prefix, "namespaces", pathEncode(ns), "tables");
}

public String table(TableIdentifier ident) {
return SLASH.join(
"v1",
prefix,
"namespaces",
RESTUtil.encodeNamespace(ident.namespace()),
pathEncode(ident.namespace()),
"tables",
RESTUtil.encodeString(ident.name()));
}

public String register(Namespace ns) {
return SLASH.join("v1", prefix, "namespaces", RESTUtil.encodeNamespace(ns), "register");
return SLASH.join("v1", prefix, "namespaces", pathEncode(ns), "register");
}

public String rename() {
Expand All @@ -98,7 +115,7 @@ public String metrics(TableIdentifier identifier) {
"v1",
prefix,
"namespaces",
RESTUtil.encodeNamespace(identifier.namespace()),
pathEncode(identifier.namespace()),
"tables",
RESTUtil.encodeString(identifier.name()),
"metrics");
Expand All @@ -109,20 +126,24 @@ public String commitTransaction() {
}

public String views(Namespace ns) {
return SLASH.join("v1", prefix, "namespaces", RESTUtil.encodeNamespace(ns), "views");
return SLASH.join("v1", prefix, "namespaces", pathEncode(ns), "views");
}

public String view(TableIdentifier ident) {
return SLASH.join(
"v1",
prefix,
"namespaces",
RESTUtil.encodeNamespace(ident.namespace()),
pathEncode(ident.namespace()),
"views",
RESTUtil.encodeString(ident.name()));
}

public String renameView() {
return SLASH.join("v1", prefix, "views", "rename");
}

private String pathEncode(Namespace ns) {
return RESTUtil.encodeNamespace(ns, namespaceSeparator);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
/** Adaptor class to translate REST requests into {@link Catalog} API calls. */
public class RESTCatalogAdapter implements RESTClient {
private static final Splitter SLASH = Splitter.on('/');
private static final String NAMESPACE_SEPARATOR = "%2E";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this not .? It's not a character that needs to be escaped.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CatalogTests#testNamespaceWithDot would fail when using . here, so in this case it needs to be the UTF-8 encoded string.


private static final Map<Class<? extends Exception>, Integer> EXCEPTION_ERROR_CODES =
ImmutableMap.<Class<? extends Exception>, Integer>builder()
Expand Down Expand Up @@ -292,6 +293,7 @@ public <T extends RESTResponse> T handleRequest(
Arrays.stream(Route.values())
.map(r -> Endpoint.create(r.method.name(), r.resourcePath))
.collect(Collectors.toList()))
.withOverride(RESTSessionCatalog.NAMESPACE_SEPARATOR, NAMESPACE_SEPARATOR)
.build());

case LIST_NAMESPACES:
Expand Down Expand Up @@ -675,7 +677,7 @@ public static void configureResponseFromException(
}

private static Namespace namespaceFromPathVars(Map<String, String> pathVars) {
return RESTUtil.decodeNamespace(pathVars.get("namespace"));
return RESTUtil.decodeNamespace(pathVars.get("namespace"), NAMESPACE_SEPARATOR);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that @jackye1995 suggested sending the separator from the client each time. Is that not what we want to do here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes and that is being handled in #10905. However, as a first step we need to make the namespace separator configurable (regardless of whether it's configurable from the server or the client), which is being handled in this PR. Making it controllable from the client and send a query param is handled after this PR in #10905 (and requires a vote on the spec change)

}

private static TableIdentifier tableIdentFromPathVars(Map<String, String> pathVars) {
Expand Down
54 changes: 47 additions & 7 deletions core/src/test/java/org/apache/iceberg/rest/TestRESTUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.util.Map;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

public class TestRESTUtil {

Expand Down Expand Up @@ -67,18 +70,24 @@ public void testStripTrailingSlash() {
}
}

@Test
public void testRoundTripUrlEncodeDecodeNamespace() {
@ParameterizedTest
@ValueSource(strings = {"%1F", "%2D", "%2E", "#", "_"})
public void testRoundTripUrlEncodeDecodeNamespace(String namespaceSeparator) {
// Namespace levels and their expected url encoded form
Object[][] testCases =
new Object[][] {
new Object[] {new String[] {"dogs"}, "dogs"},
new Object[] {new String[] {"dogs.named.hank"}, "dogs.named.hank"},
new Object[] {new String[] {"dogs/named/hank"}, "dogs%2Fnamed%2Fhank"},
new Object[] {new String[] {"dogs", "named", "hank"}, "dogs%1Fnamed%1Fhank"},
new Object[] {
new String[] {"dogs", "named", "hank"},
String.format("dogs%snamed%shank", namespaceSeparator, namespaceSeparator)
},
new Object[] {
new String[] {"dogs.and.cats", "named", "hank.or.james-westfall"},
"dogs.and.cats%1Fnamed%1Fhank.or.james-westfall"
String.format(
"dogs.and.cats%snamed%shank.or.james-westfall",
namespaceSeparator, namespaceSeparator),
}
};

Expand All @@ -89,14 +98,25 @@ public void testRoundTripUrlEncodeDecodeNamespace() {
Namespace namespace = Namespace.of(levels);

// To be placed into a URL path as query parameter or path parameter
assertThat(RESTUtil.encodeNamespace(namespace)).isEqualTo(encodedNs);
assertThat(RESTUtil.encodeNamespace(namespace, namespaceSeparator)).isEqualTo(encodedNs);

// Decoded (after pulling as String) from URL
Namespace asNamespace = RESTUtil.decodeNamespace(encodedNs);
assertThat(asNamespace).isEqualTo(namespace);
assertThat(RESTUtil.decodeNamespace(encodedNs, namespaceSeparator)).isEqualTo(namespace);
}
}

@Test
public void encodeAsOldClientAndDecodeAsNewServer() {
Namespace namespace = Namespace.of("first", "second", "third");
// old client would call encodeNamespace without specifying a separator
String encodedNamespace = RESTUtil.encodeNamespace(namespace);
assertThat(encodedNamespace).contains(RESTUtil.NAMESPACE_ESCAPED_SEPARATOR);

// newer server would try and decode the namespace with the separator it communicates to clients
Namespace decodeNamespace = RESTUtil.decodeNamespace(encodedNamespace, "%2E");
assertThat(decodeNamespace).isEqualTo(namespace);
}

@Test
public void testNamespaceUrlEncodeDecodeDoesNotAllowNull() {
assertThatExceptionOfType(IllegalArgumentException.class)
Expand Down Expand Up @@ -139,4 +159,24 @@ public void testOAuth2FormDataDecoding() {

assertThat(RESTUtil.decodeFormData(formString)).isEqualTo(expected);
}

@Test
public void nullOrEmptyNamespaceSeparator() {
String errorMsg = "Invalid separator: null or empty";
assertThatThrownBy(() -> RESTUtil.encodeNamespace(Namespace.empty(), null))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage(errorMsg);

assertThatThrownBy(() -> RESTUtil.encodeNamespace(Namespace.empty(), ""))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage(errorMsg);

assertThatThrownBy(() -> RESTUtil.decodeNamespace("namespace", null))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage(errorMsg);

assertThatThrownBy(() -> RESTUtil.decodeNamespace("namespace", ""))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage(errorMsg);
}
}
Loading