Skip to content

Commit b043e5c

Browse files
committed
HIVE-7023 : Bucket mapjoin is broken when the number of small aliases is two or more (Navis via Ashutosh Chauhan)
git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1593648 13f79535-47bb-0310-9956-ffa450edef68
1 parent ce19ef1 commit b043e5c

File tree

4 files changed

+15
-52
lines changed

4 files changed

+15
-52
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
*.js text
1212
*.sql text
1313
*.q text
14+
*.q.out text diff
1415

1516
*.sh text eol=lf
1617

ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java

Lines changed: 10 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
import java.io.File;
2121
import java.io.IOException;
22-
import java.io.ObjectOutputStream;
2322
import java.io.OutputStream;
2423
import java.io.Serializable;
2524
import java.lang.management.ManagementFactory;
@@ -46,15 +45,13 @@
4645
import org.apache.hadoop.hive.ql.QueryPlan;
4746
import org.apache.hadoop.hive.ql.exec.BucketMatcher;
4847
import org.apache.hadoop.hive.ql.exec.FetchOperator;
49-
import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator;
5048
import org.apache.hadoop.hive.ql.exec.Operator;
5149
import org.apache.hadoop.hive.ql.exec.SecureCmdDoAs;
5250
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
5351
import org.apache.hadoop.hive.ql.exec.Task;
5452
import org.apache.hadoop.hive.ql.exec.Utilities;
5553
import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter;
5654
import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionException;
57-
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
5855
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
5956
import org.apache.hadoop.hive.ql.metadata.HiveException;
6057
import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext;
@@ -340,6 +337,12 @@ public void startForward(String bigTableBucket) throws Exception {
340337

341338
private void startForward(boolean inputFileChangeSenstive, String bigTableBucket)
342339
throws Exception {
340+
for (Operator<?> source : work.getAliasToWork().values()) {
341+
source.reset();
342+
}
343+
if (inputFileChangeSenstive) {
344+
execContext.setCurrentBigBucketFile(bigTableBucket);
345+
}
343346
for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
344347
String alias = entry.getKey();
345348
FetchOperator fetchOp = entry.getValue();
@@ -351,13 +354,6 @@ private void startForward(boolean inputFileChangeSenstive, String bigTableBucket
351354

352355
// get the root operator
353356
Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
354-
if (fetchOp.isEmptyTable()) {
355-
//generate empty hashtable for empty table
356-
this.generateDummyHashTable(alias, bigTableBucket);
357-
forwardOp.close(false);
358-
continue;
359-
}
360-
361357
// walk through the operator tree
362358
while (!forwardOp.getDone()) {
363359
InspectableObject row = fetchOp.getNextRow();
@@ -366,11 +362,10 @@ private void startForward(boolean inputFileChangeSenstive, String bigTableBucket
366362
}
367363
forwardOp.processOp(row.o, 0);
368364
}
369-
if (inputFileChangeSenstive) {
370-
execContext.setCurrentBigBucketFile(bigTableBucket);
371-
forwardOp.reset();
372-
}
373-
forwardOp.close(false);
365+
forwardOp.flush();
366+
}
367+
for (Operator<?> source : work.getAliasToWork().values()) {
368+
source.close(false);
374369
}
375370
}
376371

@@ -421,43 +416,6 @@ private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap)
421416
}
422417
}
423418

424-
private void generateDummyHashTable(String alias, String bigBucketFileName)
425-
throws HiveException,IOException {
426-
LOG.debug("generating dummy for " + alias);
427-
// find the (byte)tag for the map join(HashTableSinkOperator)
428-
Operator<? extends OperatorDesc> parentOp = work.getAliasToWork().get(alias);
429-
Operator<? extends OperatorDesc> childOp = parentOp.getChildOperators().get(0);
430-
while ((childOp != null) && (!(childOp instanceof HashTableSinkOperator))) {
431-
parentOp = childOp;
432-
assert parentOp.getChildOperators().size() == 1;
433-
childOp = parentOp.getChildOperators().get(0);
434-
}
435-
if (childOp == null) {
436-
throw new HiveException(
437-
"Cannot find HashTableSink op by tracing down the table scan operator tree");
438-
}
439-
byte tag = (byte) childOp.getParentOperators().indexOf(parentOp);
440-
441-
// generate empty hashtable for this (byte)tag
442-
Path tmpPath = this.getWork().getTmpPath();
443-
444-
String fileName = work.getBucketFileName(bigBucketFileName);
445-
446-
HashTableSinkOperator htso = (HashTableSinkOperator)childOp;
447-
Path path = Utilities.generatePath(tmpPath, htso.getConf().getDumpFilePrefix(),
448-
tag, fileName);
449-
console.printInfo(Utilities.now() + "\tDump the hashtable into file: " + path);
450-
FileSystem fs = path.getFileSystem(job);
451-
ObjectOutputStream out = new ObjectOutputStream(fs.create(path));
452-
try {
453-
MapJoinTableContainerSerDe.persistDummyTable(out);
454-
} finally {
455-
out.close();
456-
}
457-
console.printInfo(Utilities.now() + "\tUpload 1 File to: " + path + " File size: "
458-
+ fs.getFileStatus(path).getLen());
459-
}
460-
461419
private void setUpFetchOpContext(FetchOperator fetchOp, String alias, String currentInputFile)
462420
throws Exception {
463421

ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,7 @@ select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
3434
-- The join is converted to a bucketed mapjoin with a mapjoin hint
3535
explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
3636
select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
37+
38+
-- HIVE-7023
39+
explain extended select /* + MAPJOIN(a,b) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key JOIN bucket_big c ON a.key = c.key;
40+
select /* + MAPJOIN(a,b) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key JOIN bucket_big c ON a.key = c.key;
Binary file not shown.

0 commit comments

Comments
 (0)