-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-20889: Support timestamp-micros in AvroSerDe #5779
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
ce3758f
3602cfc
8fdeb5a
578f94a
dba1d0d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -162,10 +162,22 @@ public long toEpochMilli() { | |
return localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli(); | ||
} | ||
|
||
public long toEpochMicro() { | ||
return localDateTime.toEpochSecond(ZoneOffset.UTC) * 1_000_000 | ||
+ localDateTime.getNano() / 1000; | ||
} | ||
|
||
public long toEpochMilli(ZoneId id) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't zone conversion also applicable for micros? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could not find any code changes related to zone, and hence did not add extra unnecessary changes. Although, adding a similar method for future does make sense. |
||
return localDateTime.atZone(id).toInstant().toEpochMilli(); | ||
} | ||
|
||
public long toEpochMicro(ZoneId id) { | ||
return localDateTime.atZone(id) | ||
.toInstant() | ||
.getEpochSecond() * 1_000_000L + | ||
localDateTime.getNano() / 1000; | ||
} | ||
|
||
public void setTimeInMillis(long epochMilli) { | ||
localDateTime = LocalDateTime.ofInstant( | ||
Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC); | ||
|
@@ -236,6 +248,18 @@ public static Timestamp ofEpochMilli(long epochMilli, int nanos) { | |
.withNano(nanos)); | ||
} | ||
|
||
public static Timestamp ofEpochMicro(long epochMicro) { | ||
int nanos = (int) ((epochMicro % 1000000) * 1000); | ||
epochMicro -= nanos / 1_000_000; | ||
|
||
Instant instant = Instant.ofEpochSecond( | ||
epochMicro / 1_000_000, | ||
nanos | ||
); | ||
|
||
return new Timestamp(LocalDateTime.ofInstant(instant, ZoneOffset.UTC)); | ||
} | ||
|
||
public void setNanos(int nanos) { | ||
localDateTime = localDateTime.withNano(nanos); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
2012-02-21 07:08:09.123|foo:1980-12-16 07:08:09.123,bar:1998-05-07 07:08:09.123|2011-09-04 07:08:09.123,2011-09-05 07:08:09.123 | ||
2014-02-11 07:08:09.123|baz:1981-12-16 07:08:09.123|2011-09-05 07:08:09.123 | ||
1947-02-11 07:08:09.123|baz:1921-12-16 07:08:09.123|2011-09-05 07:08:09.123 | ||
8200-02-11 07:08:09.123|baz:6981-12-16 07:08:09.123|1039-09-05 07:08:09.123 | ||
1412-02-21 07:08:09.123|foo:0980-12-16 07:08:09.123,bar:0998-05-07 07:08:09.123|0011-09-04 07:08:09.123,0011-09-05 07:08:09.123 | ||
1214-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123|0011-09-05 07:08:09.123 | ||
0847-02-11 07:08:09.123|baz:0921-12-16 07:08:09.123|0011-09-05 07:08:09.123 | ||
0600-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123|0039-09-05 07:08:09.123 | ||
2012-02-21 07:08:09.123|foo:1980-12-16 07:08:09.123456,bar:1998-05-07 07:08:09.123456|2011-09-04 07:08:09.123456,2011-09-05 07:08:09.123456 | ||
2014-02-11 07:08:09.123456|baz:1981-12-16 07:08:09.123456|2011-09-05 07:08:09.123456 | ||
1947-02-11 07:08:09.123|baz:1921-12-16 07:08:09.123|2011-09-05 07:08:09.123456 | ||
8200-02-11 07:08:09.123456|baz:6981-12-16 07:08:09.123456|1039-09-05 07:08:09.123456 | ||
1412-02-21 07:08:09.123456|foo:0980-12-16 07:08:09.123456,bar:0998-05-07 07:08:09.123|0011-09-04 07:08:09.123456,0011-09-05 07:08:09.123 | ||
1214-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123456|0011-09-05 07:08:09.123456 | ||
0847-02-11 07:08:09.123456|baz:0921-12-16 07:08:09.123456|0011-09-05 07:08:09.123456 | ||
0600-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123456|0039-09-05 07:08:09.123 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,13 +5,13 @@ stored as avro; | |
|
||
INSERT INTO hybrid_table VALUES | ||
('2012-02-21 07:08:09.123'), | ||
('2014-02-11 07:08:09.123'), | ||
('2014-02-11 07:08:09.123456'), | ||
('1947-02-11 07:08:09.123'), | ||
('8200-02-11 07:08:09.123'), | ||
('1012-02-21 07:15:11.123'), | ||
('1014-02-11 07:15:11.123'), | ||
('8200-02-11 07:08:09.123456'), | ||
('1012-02-21 07:15:11.12345'), | ||
('1014-02-11 07:15:11.1234'), | ||
('0947-02-11 07:15:11.123'), | ||
('0200-02-11 07:15:11.123'); | ||
('0200-02-11 07:15:11.1234'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Always refrain from modifying existing tests. Considering adding new tests. |
||
|
||
select * from hybrid_table; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,13 +7,13 @@ stored as avro; | |
|
||
INSERT INTO hybrid_table VALUES | ||
('2012-02-21 07:08:09.123'), | ||
('2014-02-11 07:08:09.123'), | ||
('1947-02-11 07:08:09.123'), | ||
('2014-02-11 07:08:09.123456'), | ||
('1947-02-11 07:08:09.1234'), | ||
('8200-02-11 07:08:09.123'), | ||
('1012-02-21 07:15:11.123'), | ||
('1012-02-21 07:15:11.12345'), | ||
('1014-02-11 07:15:11.123'), | ||
('0947-02-11 07:15:11.123'), | ||
('0200-02-11 07:15:11.123'); | ||
('0947-02-11 07:15:11.12345'), | ||
('0200-02-11 07:15:11.123456'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Refrain from changing existing tests. Add a new test for timestamp-micros explicitly mentioning its column type. |
||
|
||
select * from hybrid_table; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
CREATE EXTERNAL TABLE micros_table(`dt` timestamp) | ||
STORED AS AVRO; | ||
|
||
INSERT INTO micros_table VALUES | ||
(cast('2024-08-09 14:08:26.326107' as timestamp)), | ||
('2012-02-21 07:08:09.123'), | ||
('1014-02-11 07:15:11.12345'); | ||
|
||
SELECT * FROM micros_table; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding a test with timestamp-micros as a column and timestamp-millis as a column. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
PREHOOK: query: CREATE EXTERNAL TABLE micros_table(`dt` timestamp) | ||
STORED AS AVRO | ||
PREHOOK: type: CREATETABLE | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@micros_table | ||
POSTHOOK: query: CREATE EXTERNAL TABLE micros_table(`dt` timestamp) | ||
STORED AS AVRO | ||
POSTHOOK: type: CREATETABLE | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@micros_table | ||
PREHOOK: query: INSERT INTO micros_table VALUES | ||
(cast('2024-08-09 14:08:26.326107' as timestamp)), | ||
('2012-02-21 07:08:09.123'), | ||
('1014-02-11 07:15:11.12345') | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: _dummy_database@_dummy_table | ||
PREHOOK: Output: default@micros_table | ||
POSTHOOK: query: INSERT INTO micros_table VALUES | ||
(cast('2024-08-09 14:08:26.326107' as timestamp)), | ||
('2012-02-21 07:08:09.123'), | ||
('1014-02-11 07:15:11.12345') | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: _dummy_database@_dummy_table | ||
POSTHOOK: Output: default@micros_table | ||
POSTHOOK: Lineage: micros_table.dt SCRIPT [] | ||
PREHOOK: query: SELECT * FROM micros_table | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@micros_table | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: SELECT * FROM micros_table | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@micros_table | ||
#### A masked pattern was here #### | ||
2024-08-09 14:08:26.326107 | ||
2012-02-21 07:08:09.123 | ||
1014-02-11 07:15:11.12345 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use similar semantics as
toEpochMilli()
?return localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli() * 1000 + localDateTime.getNano() / 1000;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doing so might lead to loss of precision due to integer overflow. Hence we are parsing the value to seconds here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli()
returns a long value and due to addition of all these values the implicit datatype is long hence there should be no chance of integer overflow.