diff --git a/docs/advanced/column-types.md b/docs/advanced/column-types.md new file mode 100644 index 0000000000..46650e6683 --- /dev/null +++ b/docs/advanced/column-types.md @@ -0,0 +1,153 @@ +# Column Types + +In the tutorial, we stored scalar data types in our tables, like strings, numbers and timestamps. In practice, we often work with more complicated types that need to be converted to a data type our database supports. + +## Customising String Field Lengths + +As we discussed in [`TEXT` or `VARCHAR`](../tutorial/create-db-and-table.md#text-or-varchar), a `str` field type will be created as a `VARCHAR`, which has varying maximum-lengths depending on the database engine you are using. + +For cases where you know you only need to store a certain length of text, string field maximum length can be reduced using the `max_length` validation argument to `Field()`: + +```Python hl_lines="11" +{!./docs_src/advanced/column_types/tutorial001.py[ln:1-12]!} +``` + +/// details | 👀 Full file preview + +```Python +{!./docs_src/advanced/column_types/tutorial001.py!} +``` + +/// + +/// warning + +Database engines behave differently when you attempt to store longer text than the character length of the `VARCHAR` column. Notably: + +* SQLite does not enforce the length of a `VARCHAR`. It will happily store up to 500-million characters of text. +* MySQL will emit a warning, but will also truncate your text to fit the size of the `VARCHAR`. +* PostgreSQL will respond with an error code, and your query will not be executed. + +/// + +However if you need to store much longer strings than `VARCHAR` can allow, databases provide `TEXT` or `CLOB` (**c**haracter **l**arge **ob**ject) column types. We can use these by specifying an SQLAlchemy column type to the field with the `sa_type` keyword argument: + +```Python hl_lines="12" +{!./docs_src/advanced/column_types/tutorial001.py[ln:5-45]!} +``` + +/// tip + +`Text` also accepts a character length argument, which databases use to optimise the storage of a particular field. Some databases support `TINYTEXT`, `SMALLTEXT`, `MEDIUMTEXT` and `LONGTEXT` column types - ranging from 255 bytes to 4 gigabytes. If you know the maximum length of data, specifying it like `Text(1000)` will automatically select the best-suited, supported type for your database engine. + +/// + + +With this approach, we can use [any kind of SQLAlchemy type](https://docs.sqlalchemy.org/en/20/core/type_basics.html). For example, we can store pickled objects in the database: + +```Python +{!./docs_src/advanced/column_types/tutorial002.py!} +``` + +## Supported Types + +Python types are mapped to column types as so: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Python typeSQLAlchemy typeDatabase column types
strStringVARCHAR
intIntegerINTEGER
floatFloatFLOAT, REAL, DOUBLE
boolBooleanBOOL or TINYINT
datetime.datetimeDateTimeDATETIME, TIMESTAMP, DATE
datetime.dateDateDATE
datetime.timedeltaIntervalINTERVAL, INT
datetime.timeTimeTIME, DATETIME
bytesLargeBinaryBLOB, BYTEA
DecimalNumericDECIMAL, FLOAT
enum.EnumEnumENUM, VARCHAR
uuid.UUIDGUIDUUID, CHAR(32)
+ +In addition, the following types are stored as `VARCHAR`: + +* ipaddress.IPv4Address +* ipaddress.IPv4Network +* ipaddress.IPv6Address +* ipaddress.IPv6Network +* pathlib.Path +* pydantic.networks.IPvAnyAddress +* pydantic.networks.IPvAnyInterface +* pydantic.networks.IPvAnyNetwork +* pydantic.EmailStr + +Note that while the column types for these are `VARCHAR`, values are not converted to and from strings. + +### IP Addresses + +IP Addresses from the [Python `ipaddress` module](https://docs.python.org/3/library/ipaddress.html){.external-link target=_blank} are stored as text. + +```Python hl_lines="5 11" +{!./docs_src/advanced/column_types/tutorial003.py[ln:1-15]!} +``` + +### Filesystem Paths + +Paths to files and directories using the [Python `pathlib` module](https://docs.python.org/3/library/pathlib.html){.external-link target=_blank} are stored as text. + +```Python hl_lines="2 12" +{!./docs_src/advanced/column_types/tutorial003.py[ln:1-15]!} +``` + +/// tip + +The stored value of a Path is the basic string value: `str(Path('../path/to/file'))`. If you need to store the full path ensure you call `absolute()` on the path before setting it in your model. + +/// + +### UUIDs + +UUIDs from the [Python `uuid` module](https://docs.python.org/3/library/uuid.html){.external-link target=_blank} are stored as native `UUID` types in supported databases (just PostgreSQL at the moment), otherwise as a `CHAR(32)`. + +```Python hl_lines="3 10" +{!./docs_src/advanced/column_types/tutorial003.py[ln:1-15]!} +``` + +### Email Addresses + +Email addresses using [Pydantic's `EmailStr` type](https://docs.pydantic.dev/latest/api/networks/#pydantic.networks.EmailStr){.external-link target=_blank} are stored as strings. + +```Python hl_lines="5 14" +{!./docs_src/advanced/column_types/tutorial003.py[ln:1-15]!} +``` + + +## Custom Pydantic types + +As SQLModel is built on Pydantic, you can use any custom type as long as it would work in a Pydantic model. However, if the type is not a subclass of [a type from the table above](#supported-types), you will need to specify an SQLAlchemy type to use. diff --git a/docs/tutorial/create-db-and-table.md b/docs/tutorial/create-db-and-table.md index 0d8a9a21ce..5dfb757ae1 100644 --- a/docs/tutorial/create-db-and-table.md +++ b/docs/tutorial/create-db-and-table.md @@ -500,7 +500,7 @@ To make it easier to start using **SQLModel** right away independent of the data /// tip -You will learn how to change the maximum length of string columns later in the Advanced Tutorial - User Guide. +You can learn how to change the maximum length of string columns later in the [Advanced Tutorial - User Guide](../advanced/column-types.md){.internal-link target=_blank}. /// diff --git a/docs_src/advanced/column_types/__init__.py b/docs_src/advanced/column_types/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs_src/advanced/column_types/tutorial001.py b/docs_src/advanced/column_types/tutorial001.py new file mode 100644 index 0000000000..a5270959db --- /dev/null +++ b/docs_src/advanced/column_types/tutorial001.py @@ -0,0 +1,80 @@ +from typing import Optional + +from sqlalchemy import Text +from sqlmodel import Field, Session, SQLModel, create_engine, select +from wonderwords import RandomWord + + +class Villian(SQLModel, table=True): + id: Optional[int] = Field(default=None, primary_key=True) + name: str = Field(index=True) + country_code: str = Field(max_length=2) + backstory: str = Field(sa_type=Text()) + + +sqlite_file_name = "database.db" +sqlite_url = f"sqlite:///{sqlite_file_name}" + +engine = create_engine(sqlite_url, echo=True) + + +def create_db_and_tables(): + SQLModel.metadata.create_all(engine) + + +def generate_backstory(words: int) -> str: + return " ".join(RandomWord().random_words(words, regex=r"\S+")) + + +def create_villains(): + villian_1 = Villian( + name="Green Gobbler", country_code="US", backstory=generate_backstory(500) + ) + villian_2 = Villian( + name="Arnim Zozza", country_code="DE", backstory=generate_backstory(500) + ) + villian_3 = Villian( + name="Low-key", country_code="AS", backstory=generate_backstory(500) + ) + + with Session(engine) as session: + session.add(villian_1) + session.add(villian_2) + session.add(villian_3) + + session.commit() + + +def count_words(sentence: str) -> int: + return sentence.count(" ") + 1 + + +def select_villians(): + with Session(engine) as session: + statement = select(Villian).where(Villian.name == "Green Gobbler") + results = session.exec(statement) + villian_1 = results.one() + print( + "Villian 1:", + {"name": villian_1.name, "country_code": villian_1.country_code}, + count_words(villian_1.backstory), + ) + + statement = select(Villian).where(Villian.name == "Low-key") + results = session.exec(statement) + villian_2 = results.one() + print( + "Villian 2:", + {"name": villian_2.name, "country_code": villian_2.country_code}, + count_words(villian_1.backstory), + ) + + +def main(): + create_db_and_tables() + create_villains() + select_villians() + + +if __name__ == "__main__": + main() diff --git a/docs_src/advanced/column_types/tutorial002.py b/docs_src/advanced/column_types/tutorial002.py new file mode 100644 index 0000000000..2971ac3955 --- /dev/null +++ b/docs_src/advanced/column_types/tutorial002.py @@ -0,0 +1,80 @@ +from datetime import datetime + +try: + from datetime import UTC +except ImportError: + UTC = None +from sqlalchemy import PickleType +from sqlmodel import Field, Session, SQLModel, create_engine, select +from typing_extensions import TypedDict + + +class ModelOutput(TypedDict): + model_checkpoint: datetime + score: float + + +class ModelResult(SQLModel, table=True): + id: int = Field(default=None, primary_key=True) + output: ModelOutput = Field(sa_type=PickleType()) + + +sqlite_file_name = "database.db" +sqlite_url = f"sqlite:///{sqlite_file_name}" + +engine = create_engine(sqlite_url, echo=True) + + +def create_db_and_tables(): + SQLModel.metadata.create_all(engine) + + +def create_model_results(): + checkpoint = datetime.now(tz=UTC) + + result_1 = ModelResult( + output={ + "model_checkpoint": checkpoint, + "score": 0.9123, + } + ) + result_2 = ModelResult( + output={ + "model_checkpoint": checkpoint, + "score": 0.1294, + } + ) + result_3 = ModelResult( + output={ + "model_checkpoint": checkpoint, + "score": 0.4821, + } + ) + + with Session(engine) as session: + session.add(result_1) + session.add(result_2) + session.add(result_3) + + session.commit() + + +def get_average_score(): + with Session(engine) as session: + statement = select(ModelResult) + result = session.exec(statement) + model_results = result.all() + + scores = [model_result.output["score"] for model_result in model_results] + + print("Average score:", sum(scores) / len(scores)) + + +def main(): + create_db_and_tables() + create_model_results() + get_average_score() + + +if __name__ == "__main__": + main() diff --git a/docs_src/advanced/column_types/tutorial003.py b/docs_src/advanced/column_types/tutorial003.py new file mode 100644 index 0000000000..2e94d3270c --- /dev/null +++ b/docs_src/advanced/column_types/tutorial003.py @@ -0,0 +1,78 @@ +from datetime import datetime + +try: + from datetime import UTC +except ImportError: + UTC = None +from pathlib import Path +from uuid import UUID, uuid4 + +from pydantic import EmailStr, IPvAnyAddress +from sqlmodel import Field, Session, SQLModel, create_engine, select + + +class Avatar(SQLModel, table=True): + id: UUID = Field(default_factory=uuid4, primary_key=True) + source_ip_address: IPvAnyAddress + upload_location: Path + uploaded_at: datetime = Field(default=datetime.now(tz=UTC)) + author_email: EmailStr + + +sqlite_file_name = "database.db" +sqlite_url = f"sqlite:///{sqlite_file_name}" + +engine = create_engine(sqlite_url, echo=True) + + +def create_db_and_tables(): + SQLModel.metadata.create_all(engine) + + +def create_avatars(): + avatar_1 = Avatar( + source_ip_address="127.0.0.1", + upload_location="/uploads/1/123456789.jpg", + author_email="tiangolo@example.com", + ) + + avatar_2 = Avatar( + source_ip_address="192.168.0.1", + upload_location="/uploads/9/987654321.png", + author_email="rmasters@example.com", + ) + + with Session(engine) as session: + session.add(avatar_1) + session.add(avatar_2) + + session.commit() + + +def read_avatars(): + with Session(engine) as session: + statement = select(Avatar).where(Avatar.author_email == "tiangolo@example.com") + result = session.exec(statement) + avatar_1: Avatar = result.one() + + print( + "Avatar 1:", + { + "email": avatar_1.author_email, + "email_type": type(avatar_1.author_email), + "ip_address": avatar_1.source_ip_address, + "ip_address_type": type(avatar_1.source_ip_address), + "upload_location": avatar_1.upload_location, + "upload_location_type": type(avatar_1.upload_location), + }, + ) + + +def main(): + create_db_and_tables() + create_avatars() + read_avatars() + + +if __name__ == "__main__": + main() diff --git a/mkdocs.yml b/mkdocs.yml index ce98f1524e..22a82b1657 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -98,6 +98,7 @@ nav: - Advanced User Guide: - advanced/index.md - advanced/decimal.md + - advanced/column-types.md - alternatives.md - help.md - contributing.md diff --git a/pyproject.toml b/pyproject.toml index 10d73793d2..98159cd908 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,9 @@ httpx = "0.24.1" dirty-equals = "^0.6.0" typer-cli = "^0.0.13" mkdocs-markdownextradata-plugin = ">=0.1.7,<0.3.0" +# For column type tests +wonderwords = "^2.2.0" +pydantic = {extras = ["email"], version = ">=1.10.13,<3.0.0"} [build-system] requires = ["poetry-core"] diff --git a/sqlmodel/main.py b/sqlmodel/main.py index 10064c7116..4f86a239d4 100644 --- a/sqlmodel/main.py +++ b/sqlmodel/main.py @@ -26,6 +26,7 @@ from pydantic import BaseModel from pydantic.fields import FieldInfo as PydanticFieldInfo +from pydantic.networks import EmailStr, IPvAnyAddress, IPvAnyInterface, IPvAnyNetwork from sqlalchemy import ( Boolean, Column, @@ -600,8 +601,16 @@ def get_sqlalchemy_type(field: Any) -> Any: return AutoString if issubclass(type_, ipaddress.IPv6Network): return AutoString + if issubclass(type_, IPvAnyAddress): + return AutoString + if issubclass(type_, IPvAnyInterface): + return AutoString + if issubclass(type_, IPvAnyNetwork): + return AutoString if issubclass(type_, Path): return AutoString + if issubclass(type_, EmailStr): + return AutoString if issubclass(type_, uuid.UUID): return GUID raise ValueError(f"{type_} has no matching SQLAlchemy type") diff --git a/tests/test_advanced/test_column_types/__init__.py b/tests/test_advanced/test_column_types/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_advanced/test_column_types/test_tutorial001.py b/tests/test_advanced/test_column_types/test_tutorial001.py new file mode 100644 index 0000000000..e3439feee2 --- /dev/null +++ b/tests/test_advanced/test_column_types/test_tutorial001.py @@ -0,0 +1,44 @@ +from unittest.mock import patch + +from sqlmodel import create_engine + +from ...conftest import get_testing_print_function + +expected_calls = [ + [ + "Villian 1:", + { + "name": "Green Gobbler", + "country_code": "US", + }, + 500, + ], + [ + "Villian 2:", + { + "name": "Low-key", + "country_code": "AS", + }, + 500, + ], +] + + +def test_tutorial(clear_sqlmodel): + """ + Unfortunately, SQLite does not enforce varchar lengths, so we can't test an oversize case without spinning up a + database engine. + + """ + + from docs_src.advanced.column_types import tutorial001 as mod + + mod.sqlite_url = "sqlite://" + mod.engine = create_engine(mod.sqlite_url) + calls = [] + + new_print = get_testing_print_function(calls) + + with patch("builtins.print", new=new_print): + mod.main() + assert calls == expected_calls diff --git a/tests/test_advanced/test_column_types/test_tutorial002.py b/tests/test_advanced/test_column_types/test_tutorial002.py new file mode 100644 index 0000000000..90e90c707b --- /dev/null +++ b/tests/test_advanced/test_column_types/test_tutorial002.py @@ -0,0 +1,25 @@ +from unittest.mock import patch + +import pytest +from sqlmodel import create_engine + +from ...conftest import get_testing_print_function, needs_pydanticv2 + +expected_calls = [ + ["Average score:", pytest.approx(0.5079, abs=0.0001)], +] + + +@needs_pydanticv2 +def test_tutorial(clear_sqlmodel): + from docs_src.advanced.column_types import tutorial002 as mod + + mod.sqlite_url = "sqlite://" + mod.engine = create_engine(mod.sqlite_url) + calls = [] + + new_print = get_testing_print_function(calls) + + with patch("builtins.print", new=new_print): + mod.main() + assert calls == expected_calls diff --git a/tests/test_advanced/test_column_types/test_tutorial003.py b/tests/test_advanced/test_column_types/test_tutorial003.py new file mode 100644 index 0000000000..c778946fce --- /dev/null +++ b/tests/test_advanced/test_column_types/test_tutorial003.py @@ -0,0 +1,34 @@ +from unittest.mock import patch + +from sqlmodel import create_engine + +from ...conftest import get_testing_print_function, needs_pydanticv2 + +expected_calls = [ + [ + "Avatar 1:", + { + "email": "tiangolo@example.com", + "email_type": str, + "ip_address": "127.0.0.1", + "ip_address_type": str, + "upload_location": "/uploads/1/123456789.jpg", + "upload_location_type": str, + }, + ], +] + + +@needs_pydanticv2 +def test_tutorial(clear_sqlmodel): + from docs_src.advanced.column_types import tutorial003 as mod + + mod.sqlite_url = "sqlite://" + mod.engine = create_engine(mod.sqlite_url) + calls = [] + + new_print = get_testing_print_function(calls) + + with patch("builtins.print", new=new_print): + mod.main() + assert calls == expected_calls