Skip to content

Commit e4ec9a8

Browse files
committed
readyset-data: Delete Collation::default
It makes no sense at all to have a default when the two databases we support have different defaults. The existence of this function invites sloppy engineering, so it has to go. Change-Id: I2a8f8ce744f27d28433c3e95c655e4984bd6cafb Reviewed-on: https://gerrit.readyset.name/c/readyset/+/9242 Reviewed-by: Michael Zink <[email protected]> Tested-by: Buildkite CI
1 parent efec745 commit e4ec9a8

File tree

15 files changed

+68
-84
lines changed

15 files changed

+68
-84
lines changed

benchmarks/src/utils/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::time::Duration;
44

55
use database_utils::{DatabaseURL, QueryableConnection};
66
use readyset_client::status::CurrentStatus;
7-
use readyset_data::DfValue;
7+
use readyset_data::{Collation, DfValue, Dialect};
88
use readyset_sql::{ast::ShowStatement, DialectDisplay};
99
use tracing::info;
1010

@@ -47,7 +47,7 @@ pub async fn readyset_ready(target: &str) -> anyhow::Result<()> {
4747
let snapshot_status = rows.into_iter().find(|r| r[0] == b"Status".to_vec().into());
4848
let snapshot_status: String = if let Some(s) = snapshot_status {
4949
s[1].coerce_to(
50-
&readyset_data::DfType::Text(Default::default()),
50+
&readyset_data::DfType::Text(Collation::default_for(Dialect::DEFAULT_MYSQL)),
5151
&readyset_data::DfType::Unknown,
5252
)
5353
.unwrap()

dataflow-expression/src/eval/builtins.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use chrono_tz::Tz;
1212
use itertools::Either;
1313
use mysql_time::MySqlTime;
1414
use readyset_data::dialect::SqlEngine;
15-
use readyset_data::{Array, DfType, DfValue, TimestampTz};
15+
use readyset_data::{Array, Collation, DfType, DfValue, TimestampTz};
1616
use readyset_errors::{internal, invalid_query_err, unsupported, ReadySetError, ReadySetResult};
1717
use readyset_sql::ast::TimestampField;
1818
use readyset_util::math::integer_rnd;
@@ -1034,7 +1034,7 @@ impl BuiltinFunction {
10341034
if i % 2 == 0 {
10351035
keys.push(
10361036
arg.eval(record)?
1037-
.coerce_to(&DfType::Text(Default::default()), &DfType::Unknown)?,
1037+
.coerce_to(&DfType::Text(Collation::Utf8), &DfType::Unknown)?,
10381038
);
10391039
} else {
10401040
values.push(arg.eval(record)?);

dataflow-expression/src/lower.rs

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -377,13 +377,15 @@ impl BuiltinFunction {
377377
"concat" => {
378378
let arg1 = next_arg()?;
379379
let rest_args = args.by_ref().collect::<Vec<_>>();
380-
let collation = iter::once(&arg1)
381-
.chain(&rest_args)
382-
.find_map(|expr| match expr.ty() {
383-
DfType::Text(c) => Some(*c),
384-
_ => None,
385-
})
386-
.unwrap_or_default();
380+
let collation = Collation::unwrap_or_default(
381+
iter::once(&arg1)
382+
.chain(&rest_args)
383+
.find_map(|expr| match expr.ty() {
384+
DfType::Text(c) => Some(*c),
385+
_ => None,
386+
}),
387+
dialect,
388+
);
387389
let ty = DfType::Text(collation);
388390
(
389391
Self::Concat(
@@ -524,14 +526,14 @@ impl BuiltinFunction {
524526
unsupported!("Function {name} does not exist in {}", dialect.engine());
525527
}
526528
let expr = next_arg()?;
527-
(Self::Hex(expr), DfType::Text(Default::default()))
529+
(Self::Hex(expr), DfType::Text(Collation::Utf8))
528530
}
529531
"st_astext" => {
530532
// Note: `ST_AsText` is supported by both MySQL and PostGIS,
531533
let expr = next_arg()?;
532534
(
533535
Self::SpatialAsText { expr, dialect },
534-
DfType::Text(Default::default()),
536+
DfType::Text(Collation::Utf8),
535537
)
536538
}
537539
"st_aswkt" => {
@@ -541,7 +543,7 @@ impl BuiltinFunction {
541543
let expr = next_arg()?;
542544
(
543545
Self::SpatialAsText { expr, dialect },
544-
DfType::Text(Default::default()),
546+
DfType::Text(Collation::Utf8),
545547
)
546548
}
547549
SqlEngine::PostgreSQL => {
@@ -554,10 +556,7 @@ impl BuiltinFunction {
554556
match dialect.engine() {
555557
SqlEngine::PostgreSQL => {
556558
let expr = next_arg()?;
557-
(
558-
Self::SpatialAsEWKT { expr },
559-
DfType::Text(Default::default()),
560-
)
559+
(Self::SpatialAsEWKT { expr }, DfType::Text(Collation::Utf8))
561560
}
562561
SqlEngine::MySQL => {
563562
unsupported!("Function {name} does not exist in {}", dialect.engine());
@@ -656,21 +655,26 @@ fn get_text_type_max_length(ty: &DfType) -> Option<u16> {
656655

657656
fn mysql_text_type_cvt(left: &DfType, right: &DfType) -> Option<DfType> {
658657
if (left.is_any_text() || left.is_binary()) && (right.is_any_text() || right.is_binary()) {
659-
if matches!(left, DfType::Text(..)) || matches!(right, DfType::Text(..)) {
660-
Some(DfType::DEFAULT_TEXT)
661-
} else {
662-
let left_len = get_text_type_max_length(left)?;
663-
let right_len = get_text_type_max_length(right)?;
664-
match (left, right) {
665-
(DfType::Char(..), DfType::Char(..)) => Some(DfType::Char(
666-
cmp::max(left_len, right_len),
667-
Collation::default(),
668-
)),
669-
(_, _) => Some(DfType::VarChar(
670-
cmp::max(left_len, right_len),
671-
Collation::default(),
672-
)),
658+
match (left, right) {
659+
(DfType::Text(collation), _) | (_, DfType::Text(collation)) => {
660+
return Some(DfType::Text(*collation));
673661
}
662+
_ => (),
663+
}
664+
665+
let left_len = get_text_type_max_length(left)?;
666+
let right_len = get_text_type_max_length(right)?;
667+
match (left, right) {
668+
(DfType::Char(_, collation), DfType::Char(..)) => {
669+
Some(DfType::Char(cmp::max(left_len, right_len), *collation))
670+
}
671+
(DfType::VarChar(_, collation), _) | (_, DfType::VarChar(_, collation)) => {
672+
Some(DfType::VarChar(cmp::max(left_len, right_len), *collation))
673+
}
674+
(_, _) => Some(DfType::VarChar(
675+
cmp::max(left_len, right_len),
676+
Collation::Utf8,
677+
)),
674678
}
675679
} else {
676680
None

logictests/join_predicates_in_where_clause.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ select a.id, c.other from a join (select * from b) as c where a.b_id = c.id;
4747
50
4848

4949
statement ok
50-
create table c(id int, name text);
50+
create table c(id int, name text collate utf8mb4_0900_as_cs);
5151

5252
statement ok
53-
create table d(id int, name text);
53+
create table d(id int, name text collate utf8mb4_0900_as_cs);
5454

5555
statement ok
5656
insert into c values (1, 'hello'), (2, 'WORLD'), (3, 'HELLO');

readyset-data/src/array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,7 @@ mod parse {
588588
// SAFETY: The input was a valid utf8 `str` when passed into `Array::from_str`, and
589589
// we aren't slicing a string on any bytes that are used as combining characters.
590590
let s = unsafe { str::from_utf8_unchecked(v.as_bytes()) };
591-
DfValue::from_str_and_collation(s, Collation::default())
591+
DfValue::from_str_and_collation(s, Collation::Utf8)
592592
},
593593
),
594594
))(i)

readyset-data/src/collation.rs

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,26 +36,14 @@ thread_local! {
3636
/// [mysql]: https://dev.mysql.com/doc/refman/8.0/en/charset-mysql.html
3737
/// [postgres]: https://www.postgresql.org/docs/current/collation.html
3838
#[derive(
39-
Clone,
40-
Copy,
41-
Default,
42-
Hash,
43-
Serialize,
44-
Deserialize,
45-
Debug,
46-
PartialEq,
47-
Eq,
48-
EnumCount,
49-
FromRepr,
50-
Arbitrary,
39+
Clone, Copy, Hash, Serialize, Deserialize, Debug, PartialEq, Eq, EnumCount, FromRepr, Arbitrary,
5140
)]
5241
#[repr(u8)]
5342
pub enum Collation {
5443
/// The UTF-8 collation.
5544
///
5645
/// This collation, which is the default for PostgreSQL (and the [`Default`] for this type),
5746
/// corresponds to the *default* behavior of rust's [`String`] type.
58-
#[default]
5947
Utf8,
6048

6149
/// The case-insensitive text collation.
@@ -106,7 +94,7 @@ impl Collation {
10694
}
10795

10896
/// The default collation for a dialect.
109-
fn default_for(dialect: Dialect) -> Self {
97+
pub fn default_for(dialect: Dialect) -> Self {
11098
match dialect.engine() {
11199
SqlEngine::PostgreSQL => Self::Utf8,
112100
SqlEngine::MySQL => Self::Utf8AiCi,
@@ -130,6 +118,11 @@ impl Collation {
130118
Self::default_for(dialect)
131119
})
132120
}
121+
122+
/// Analogous to [`Option::unwrap_or_default`], but specific to the dialect.
123+
pub fn unwrap_or_default(collation: Option<Collation>, dialect: Dialect) -> Collation {
124+
collation.unwrap_or_else(|| Self::default_for(dialect))
125+
}
133126
}
134127

135128
fn collator_options(strength: Option<Strength>) -> CollatorOptions {

readyset-data/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,7 @@ impl DfValue {
597597
| DfType::Enum { .. }
598598
| DfType::Array(..) => {
599599
let s = std::str::from_utf8(bytes.as_ref())?;
600-
DfValue::from_str_and_collation(s, Default::default()).coerce_to(to_ty, from_ty)
600+
DfValue::from_str_and_collation(s, Collation::Utf8).coerce_to(to_ty, from_ty)
601601
}
602602
DfType::Text(collation)
603603
| DfType::Char(_, collation)
@@ -1759,7 +1759,7 @@ impl TryFrom<DfValue> for String {
17591759

17601760
impl<'a> From<&'a str> for DfValue {
17611761
fn from(s: &'a str) -> Self {
1762-
Self::from_str_and_collation(s, Default::default())
1762+
Self::from_str_and_collation(s, Collation::Utf8)
17631763
}
17641764
}
17651765

readyset-data/src/serde.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ impl Serialize for TextRef<'_> {
9393
serialize_variant(
9494
serializer,
9595
Variant::Text,
96-
&(Collation::default(), Bytes::new(self.0.as_bytes())),
96+
&(Collation::Utf8, Bytes::new(self.0.as_bytes())),
9797
)
9898
}
9999
}

readyset-data/src/text.rs

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -226,16 +226,6 @@ impl Text {
226226
unsafe { Self::new(true, collation, s.as_bytes()) }
227227
}
228228

229-
/// Create a new `Text` by copying a byte slice, which must contain valid UTF-8
230-
///
231-
/// # Safety
232-
///
233-
/// The caller must ensure that the argument passed to this function contains valid UTF-8
234-
#[inline]
235-
pub unsafe fn from_slice_unchecked(v: &[u8]) -> Self {
236-
Self::new(true, Default::default(), v)
237-
}
238-
239229
/// SAFETY: If `header.valid` is true, `v` must contain valid UTF-8
240230
unsafe fn new(valid: bool, collation: Collation, v: &[u8]) -> Self {
241231
Self {
@@ -266,7 +256,7 @@ impl TryFrom<&[u8]> for Text {
266256
impl From<&str> for Text {
267257
fn from(t: &str) -> Self {
268258
// SAFETY: `t` is guaranteed to contain valid UTF-8
269-
unsafe { Self::new(true, Default::default(), t.as_bytes()) }
259+
unsafe { Self::new(true, Collation::Utf8, t.as_bytes()) }
270260
}
271261
}
272262

@@ -293,23 +283,15 @@ impl Eq for Text {}
293283
impl fmt::Debug for Text {
294284
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
295285
write!(f, "{:?}", self.as_str())?;
296-
297-
if self.collation() != Collation::default() {
298-
write!(f, " ({:?})", self.collation())?;
299-
}
300-
286+
write!(f, " ({:?})", self.collation())?;
301287
Ok(())
302288
}
303289
}
304290

305291
impl fmt::Debug for TinyText {
306292
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
307293
write!(f, "{:?}", self.as_str())?;
308-
309-
if self.collation() != Collation::default() {
310-
write!(f, " ({:?})", self.collation())?;
311-
}
312-
294+
write!(f, " ({:?})", self.collation())?;
313295
Ok(())
314296
}
315297
}

readyset-data/src/type.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,10 +303,15 @@ impl DfType {
303303
//
304304
// `varchar` by itself is an error in MySQL but synonymous with `text` in PostgreSQL.
305305
Text | TinyText | MediumText | LongText | VarChar(None) => {
306-
Self::Text(collation.unwrap_or_default())
306+
Self::Text(Collation::unwrap_or_default(collation, dialect))
307307
}
308-
VarChar(Some(len)) => Self::VarChar(len, collation.unwrap_or_default()),
309-
Char(len) => Self::Char(len.unwrap_or(1), collation.unwrap_or_default()),
308+
VarChar(Some(len)) => {
309+
Self::VarChar(len, Collation::unwrap_or_default(collation, dialect))
310+
}
311+
Char(len) => Self::Char(
312+
len.unwrap_or(1),
313+
Collation::unwrap_or_default(collation, dialect),
314+
),
310315
QuotedChar => Self::TinyInt,
311316
Blob | TinyBlob | MediumBlob | LongBlob | ByteArray => Self::Blob,
312317
VarBinary(len) => Self::VarBinary(len),
@@ -761,7 +766,7 @@ impl TryFrom<&PGType> for DfType {
761766
fn try_from(value: &PGType) -> Result<Self, Self::Error> {
762767
match value {
763768
&PGType::BOOL => Ok(Self::Bool),
764-
&PGType::CHAR => Ok(Self::Char(1, Collation::default())),
769+
&PGType::CHAR => Ok(Self::Char(1, Collation::Utf8)),
765770
&PGType::TEXT | &PGType::VARCHAR => Ok(Self::DEFAULT_TEXT),
766771
&PGType::INT2 => Ok(Self::SmallInt),
767772
&PGType::INT4 => Ok(Self::Int),

readyset-dataflow/src/ops/grouped/aggregate.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ impl Aggregation {
7777
DfType::DEFAULT_NUMERIC
7878
}
7979
}
80-
Aggregation::GroupConcat { .. } => DfType::Text(/* TODO */ Collation::default()),
81-
Aggregation::JsonObjectAgg { .. } => DfType::Text(Collation::default()),
80+
Aggregation::GroupConcat { .. } => DfType::Text(/* TODO */ Collation::Utf8),
81+
Aggregation::JsonObjectAgg { .. } => DfType::Text(Collation::Utf8),
8282
};
8383

8484
Ok(GroupedOperator::new(

readyset-dataflow/src/ops/grouped/concat.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ impl GroupedOperation for GroupConcat {
201201
}
202202

203203
fn output_col_type(&self) -> DfType {
204-
DfType::Text(/* TODO */ Collation::default())
204+
DfType::Text(/* TODO */ Collation::Utf8)
205205
}
206206

207207
fn empty_value(&self) -> Option<DfValue> {

readyset-mysql/src/query_handler.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,7 @@ impl QueryHandler for MySqlQueryHandler {
900900
name: field_name.clone(),
901901
table: None,
902902
},
903-
column_type: DfType::Char(8_u16, Collation::default()),
903+
column_type: DfType::Char(8_u16, Collation::Utf8),
904904
base: None,
905905
}]),
906906
columns: Cow::Owned(vec![field_name]),

readyset-server/src/controller/mir_to_flow.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,7 @@ fn make_grouped_node(
602602
// aggregation before we pattern match for a generic aggregation.
603603
GroupedNodeType::Aggregation(Aggregation::GroupConcat { separator: sep }) => {
604604
let gc = GroupConcat::new(parent_na.address(), over_col_indx, group_col_indx, sep)?;
605-
let agg_col = make_agg_col(DfType::Text(/* TODO */ Collation::default()));
605+
let agg_col = make_agg_col(DfType::Text(/* TODO */ Collation::Utf8));
606606
cols.push(agg_col);
607607
set_names(&column_names(columns), &mut cols)?;
608608
mig.add_ingredient(name, cols, gc)

readyset-server/src/integration.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use itertools::Itertools;
2929
use readyset_client::consensus::{Authority, LocalAuthority, LocalAuthorityStore};
3030
use readyset_client::recipe::changelist::{Change, ChangeList, CreateCache};
3131
use readyset_client::{KeyComparison, Modification, SchemaType, ViewPlaceholder, ViewQuery};
32-
use readyset_data::{Bound, DfType, DfValue, Dialect, IntoBoundedRange};
32+
use readyset_data::{Bound, Collation, DfType, DfValue, Dialect, IntoBoundedRange};
3333
use readyset_errors::ReadySetError::{self, RpcFailed, SelectQueryCreationFailed};
3434
use readyset_sql::ast;
3535
use readyset_sql::ast::{OrderType, Relation, SqlQuery};
@@ -4196,7 +4196,7 @@ async fn correct_nested_view_schema() {
41964196

41974197
let expected_schema = vec![
41984198
("swvc.id".into(), DfType::Int),
4199-
("swvc.content".into(), DfType::DEFAULT_TEXT),
4199+
("swvc.content".into(), DfType::Text(Collation::Utf8AiCi)),
42004200
("swvc.vc".into(), DfType::BigInt),
42014201
("swvc.story".into(), DfType::Int),
42024202
];

0 commit comments

Comments
 (0)