Skip to content

Commit 0b1c24f

Browse files
committed
Merge remote-tracking branch 'upstream/master' into optimizeCheck
2 parents e0d943a + 8043304 commit 0b1c24f

File tree

35 files changed

+899
-203
lines changed

35 files changed

+899
-203
lines changed

common/utils/src/main/resources/error/error-conditions.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6064,6 +6064,11 @@
60646064
"The replace function does not support nested column <colName>."
60656065
]
60666066
},
6067+
"SCHEMA_LEVEL_COLLATIONS" : {
6068+
"message" : [
6069+
"Default collation for the specified schema."
6070+
]
6071+
},
60676072
"SET_NAMESPACE_PROPERTY" : {
60686073
"message" : [
60696074
"<property> is a reserved namespace property, <msg>."

connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,4 +1118,19 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
11181118
testBinaryLiteral("<=>", greaterThanBinary, 0)
11191119
}
11201120
}
1121+
1122+
test("SPARK-52262: FAILED_JDBC.TABLE_EXISTS not thrown on connection error") {
1123+
val invalidTableName = s"$catalogName.invalid"
1124+
val originalUrl = spark.conf.get(s"spark.sql.catalog.$catalogName.url")
1125+
val invalidUrl = originalUrl.replace("localhost", "nonexistenthost")
1126+
.replace("127.0.0.1", "1.2.3.4")
1127+
1128+
withSQLConf(s"spark.sql.catalog.$catalogName.url" -> invalidUrl) {
1129+
// Ideally we would catch SQLException, but analyzer wraps it
1130+
val e = intercept[AnalysisException] {
1131+
sql(s"SELECT * FROM $invalidTableName")
1132+
}
1133+
assert(e.getCondition !== "FAILED_JDBC.TABLE_EXISTS")
1134+
}
1135+
}
11211136
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
================================================================================================
2+
key-value pairs sort
3+
================================================================================================
4+
5+
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
6+
AMD EPYC 7763 64-Core Processor
7+
key-value pairs sort 25000000: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
8+
------------------------------------------------------------------------------------------------------------------------
9+
Tuple-sort using Arrays.sort() 17292 17584 413 1.4 691.7 1.0X
10+
KV-sort using Sorter 21245 21340 134 1.2 849.8 0.8X
11+
12+
13+
================================================================================================
14+
primitive int array sort
15+
================================================================================================
16+
17+
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
18+
AMD EPYC 7763 64-Core Processor
19+
primitive int array sort 25000000: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
20+
-------------------------------------------------------------------------------------------------------------------------------
21+
Java Arrays.sort() on non-primitive int array 14231 14561 468 1.8 569.2 1.0X
22+
Java Arrays.sort() on primitive int array 2141 2145 6 11.7 85.6 6.6X
23+
Sorter without key reuse on primitive int array 8607 8612 7 2.9 344.3 1.7X
24+
Sorter with key reuse on primitive int array 10602 10621 27 2.4 424.1 1.3X
25+
26+
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
================================================================================================
2+
key-value pairs sort
3+
================================================================================================
4+
5+
OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1014-azure
6+
AMD EPYC 7763 64-Core Processor
7+
key-value pairs sort 25000000: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
8+
------------------------------------------------------------------------------------------------------------------------
9+
Tuple-sort using Arrays.sort() 20545 20560 21 1.2 821.8 1.0X
10+
KV-sort using Sorter 26696 26797 143 0.9 1067.8 0.8X
11+
12+
13+
================================================================================================
14+
primitive int array sort
15+
================================================================================================
16+
17+
OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1014-azure
18+
AMD EPYC 7763 64-Core Processor
19+
primitive int array sort 25000000: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
20+
-------------------------------------------------------------------------------------------------------------------------------
21+
Java Arrays.sort() on non-primitive int array 15325 15510 263 1.6 613.0 1.0X
22+
Java Arrays.sort() on primitive int array 2088 2141 76 12.0 83.5 7.3X
23+
Sorter without key reuse on primitive int array 8254 8262 10 3.0 330.2 1.9X
24+
Sorter with key reuse on primitive int array 10184 10186 3 2.5 407.3 1.5X
25+
26+
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.util.collection
19+
20+
import java.lang.{Float => JFloat}
21+
import java.util
22+
23+
import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
24+
import org.apache.spark.util.random.XORShiftRandom
25+
26+
/**
27+
* Benchmark for o.a.s.util.collection.Sorter.
28+
* To run this benchmark:
29+
* {{{
30+
* 1. without sbt:
31+
* bin/spark-submit --class <this class> <spark core test jar>
32+
* 2. build/sbt "core/Test/runMain <this class>"
33+
* 3. generate result:
34+
* SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
35+
* Results will be written to "benchmarks/SorterBenchmark-results.txt".
36+
* }}}
37+
* */
38+
object SorterBenchmark extends BenchmarkBase {
39+
40+
def keyValuePairsSortBenchmark(): Unit = {
41+
val numElements = 25000000 // 25 mil
42+
val rand = new XORShiftRandom(123)
43+
val benchmark =
44+
new Benchmark(s"key-value pairs sort $numElements", numElements, output = output)
45+
46+
// Test key-value pairs where each element is a Tuple2[Float, Integer]
47+
val kvTuples = Array.tabulate(numElements) { i =>
48+
(JFloat.valueOf(rand.nextFloat()), Integer.valueOf(i))
49+
}
50+
51+
benchmark.addTimerCase("Tuple-sort using Arrays.sort()") { timer =>
52+
val kvTupleArray = new Array[AnyRef](numElements)
53+
System.arraycopy(kvTuples, 0, kvTupleArray, 0, numElements)
54+
timer.startTiming()
55+
util.Arrays.sort(kvTupleArray, (x: AnyRef, y: AnyRef) =>
56+
x.asInstanceOf[(JFloat, _)]._1.compareTo(y.asInstanceOf[(JFloat, _)]._1))
57+
timer.stopTiming()
58+
}
59+
60+
// Test Sorter where each element alternates between Float and Integer, non-primitive
61+
val keyValues = {
62+
val data = new Array[AnyRef](numElements * 2)
63+
var i = 0
64+
while (i < numElements) {
65+
data(2 * i) = kvTuples(i)._1
66+
data(2 * i + 1) = kvTuples(i)._2
67+
i += 1
68+
}
69+
data
70+
}
71+
72+
benchmark.addTimerCase("KV-sort using Sorter") { timer =>
73+
val keyValueArray = new Array[AnyRef](numElements * 2)
74+
System.arraycopy(keyValues, 0, keyValueArray, 0, numElements * 2)
75+
val sorter = new Sorter(new KVArraySortDataFormat[JFloat, AnyRef])
76+
timer.startTiming()
77+
sorter.sort(keyValueArray, 0, numElements, (x: JFloat, y: JFloat) => x.compareTo(y))
78+
timer.stopTiming()
79+
}
80+
81+
benchmark.run()
82+
}
83+
84+
def primitiveIntArraySortBenchmark(): Unit = {
85+
val numElements = 25000000 // 25 mil
86+
val rand = new XORShiftRandom(123)
87+
val benchmark =
88+
new Benchmark(s"primitive int array sort $numElements", numElements, output = output)
89+
90+
val ints = Array.fill(numElements)(rand.nextInt())
91+
val intObjects = {
92+
val data = new Array[Integer](numElements)
93+
var i = 0
94+
while (i < numElements) {
95+
data(i) = Integer.valueOf(ints(i))
96+
i += 1
97+
}
98+
data
99+
}
100+
101+
benchmark.addTimerCase("Java Arrays.sort() on non-primitive int array") { timer =>
102+
val intObjectArray = new Array[Integer](numElements)
103+
System.arraycopy(intObjects, 0, intObjectArray, 0, numElements)
104+
timer.startTiming()
105+
util.Arrays.sort(intObjectArray, (x: Integer, y: Integer) => x.compareTo(y))
106+
timer.stopTiming()
107+
}
108+
109+
benchmark.addTimerCase("Java Arrays.sort() on primitive int array") { timer =>
110+
val intPrimitiveArray = new Array[Int](numElements)
111+
System.arraycopy(ints, 0, intPrimitiveArray, 0, numElements)
112+
timer.startTiming()
113+
util.Arrays.sort(intPrimitiveArray)
114+
timer.stopTiming()
115+
}
116+
117+
benchmark.addTimerCase("Sorter without key reuse on primitive int array") { timer =>
118+
val intPrimitiveArray = new Array[Int](numElements)
119+
System.arraycopy(ints, 0, intPrimitiveArray, 0, numElements)
120+
val sorterWithoutKeyReuse = new Sorter(new IntArraySortDataFormat)
121+
timer.startTiming()
122+
sorterWithoutKeyReuse.sort(intPrimitiveArray, 0, numElements, Ordering[Int])
123+
timer.stopTiming()
124+
}
125+
126+
benchmark.addTimerCase("Sorter with key reuse on primitive int array") { timer =>
127+
val intPrimitiveArray = new Array[Int](numElements)
128+
System.arraycopy(ints, 0, intPrimitiveArray, 0, numElements)
129+
val sorterWithKeyReuse = new Sorter(new KeyReuseIntArraySortDataFormat)
130+
timer.startTiming()
131+
sorterWithKeyReuse.sort(intPrimitiveArray, 0, numElements, Ordering[IntWrapper])
132+
timer.stopTiming()
133+
}
134+
135+
benchmark.run()
136+
}
137+
138+
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
139+
runBenchmark("key-value pairs sort") {
140+
keyValuePairsSortBenchmark()
141+
}
142+
runBenchmark("primitive int array sort") {
143+
primitiveIntArraySortBenchmark()
144+
}
145+
}
146+
147+
/** Format to sort a simple Array[Int]. Could be easily generified and specialized. */
148+
class IntArraySortDataFormat extends AbstractIntArraySortDataFormat[Int] {
149+
150+
override protected def getKey(data: Array[Int], pos: Int): Int = {
151+
data(pos)
152+
}
153+
}
154+
155+
abstract class AbstractIntArraySortDataFormat[K] extends SortDataFormat[K, Array[Int]] {
156+
157+
override def swap(data: Array[Int], pos0: Int, pos1: Int): Unit = {
158+
val tmp = data(pos0)
159+
data(pos0) = data(pos1)
160+
data(pos1) = tmp
161+
}
162+
163+
override def copyElement(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int): Unit = {
164+
dst(dstPos) = src(srcPos)
165+
}
166+
167+
/** Copy a range of elements starting at src(srcPos) to dest, starting at destPos. */
168+
override def copyRange(src: Array[Int], srcPos: Int,
169+
dst: Array[Int], dstPos: Int, length: Int): Unit = {
170+
System.arraycopy(src, srcPos, dst, dstPos, length)
171+
}
172+
173+
/** Allocates a new structure that can hold up to 'length' elements. */
174+
override def allocate(length: Int): Array[Int] = {
175+
new Array[Int](length)
176+
}
177+
}
178+
179+
/** Wrapper of Int for key reuse. */
180+
class IntWrapper(var key: Int = 0) extends Ordered[IntWrapper] {
181+
182+
override def compare(that: IntWrapper): Int = {
183+
Ordering.Int.compare(key, that.key)
184+
}
185+
}
186+
187+
/** SortDataFormat for Array[Int] with reused keys. */
188+
class KeyReuseIntArraySortDataFormat extends AbstractIntArraySortDataFormat[IntWrapper] {
189+
190+
override def newKey(): IntWrapper = {
191+
new IntWrapper()
192+
}
193+
194+
override def getKey(data: Array[Int], pos: Int, reuse: IntWrapper): IntWrapper = {
195+
if (reuse == null) {
196+
new IntWrapper(data(pos))
197+
} else {
198+
reuse.key = data(pos)
199+
reuse
200+
}
201+
}
202+
203+
override protected def getKey(data: Array[Int], pos: Int): IntWrapper = {
204+
getKey(data, pos, null)
205+
}
206+
}
207+
}

0 commit comments

Comments
 (0)