From 8c05fd816b4826002be64c8be16f170b1a0d6181 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 11:57:06 -0400 Subject: [PATCH 01/25] =?UTF-8?q?Rename=20types.py=20=E2=86=92=5Ftypes.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Necessary because it breaks some namespace imports in SQLAlchemy "types.py" is a pretty common package name. Other dialects appear to do the same underscore prefix approach Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/__init__.py | 2 +- src/databricks/sqlalchemy/{types.py => _types.py} | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) rename src/databricks/sqlalchemy/{types.py => _types.py} (91%) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index d1d4782d9..6eb9d7a2b 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -13,7 +13,7 @@ from databricks import sql # This import is required to process our @compiles decorators -import databricks.sqlalchemy.types +import databricks.sqlalchemy._types from databricks.sqlalchemy.base import ( diff --git a/src/databricks/sqlalchemy/types.py b/src/databricks/sqlalchemy/_types.py similarity index 91% rename from src/databricks/sqlalchemy/types.py rename to src/databricks/sqlalchemy/_types.py index 4b10fc6f1..032fa3f3f 100644 --- a/src/databricks/sqlalchemy/types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -1,6 +1,7 @@ import sqlalchemy from sqlalchemy.ext.compiler import compiles +import databricks.sql @compiles(sqlalchemy.types.Enum, "databricks") @compiles(sqlalchemy.types.String, "databricks") @@ -78,3 +79,15 @@ def compile_array_databricks(type_, compiler, **kw): inner = compiler.process(type_.item_type, **kw) return f"ARRAY<{inner}>" + + +class DatabricksBinaryType(sqlalchemy.types.TypeDecorator): + """ + """ + + impl = sqlalchemy.types.BINARY + + cache_ok = True + + def process_bind_param(self, value, dialect): + return databricks.sql.BINARY(value) From 68159352cfd7284bb3a1157fb971e31b60727ea6 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 13:25:58 -0400 Subject: [PATCH 02/25] Mark BINARY sqlalchemy tests as skip because connector doesn't support them Neither inline nor bound parameter implementations can handle BINARY test_suite.py::BinaryTest_databricks+databricks::test_binary_roundtrip[7\xe7\x9f] SKIPPED (pysql doesn't support binding of BINARY type parameters) test_suite.py::BinaryTest_databricks+databricks::test_binary_roundtrip[this is binary] SKIPPED (pysql doesn't support binding of BINARY type parameters) Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 7a840404b..10452bf95 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -24,20 +24,9 @@ # See further: https://github.com/sqlalchemy/sqlalchemy/blob/rel_1_4_48/README.dialects.rst +@pytest.mark.skip(reason="pysql doesn't support binding of BINARY type parameters") class BinaryTest(BinaryTest): - @pytest.mark.skip(reason="Binary type is not implemented.") - def test_binary_roundtrip(self): - """ - Exception: - sqlalchemy.exc.StatementError: (builtins.AttributeError) module 'databricks.sql' has no attribute 'Binary' - """ - - @pytest.mark.skip(reason="Binary type is not implemented.") - def test_pickle_roundtrip(self): - """ - Exception: - sqlalchemy.exc.StatementError: (builtins.AttributeError) module 'databricks.sql' has no attribute 'Binary' - """ + pass class DateHistoricTest(DateHistoricTest): From 75368a50e609dfaf36c315644bd15c3d693a2172 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 13:34:27 -0400 Subject: [PATCH 03/25] Stop skipping DateHistoric tests This test requires date_historic and datetime_literals requirements be marked open. test_suite.py::DateHistoricTest_databricks+databricks::test_literal PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_null PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 19 ++++++++++++++++++- src/databricks/sqlalchemy/test/test_suite.py | 20 -------------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 7da460057..37a7a9791 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -31,4 +31,21 @@ def __some_example_requirement(self): class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - pass + + @property + def date_historic(self): + """target dialect supports representation of Python + datetime.datetime() objects with historic (pre 1970) values.""" + + return sqlalchemy.testing.exclusions.open() + + @property + def datetime_literals(self): + """target dialect supports rendering of a date, time, or datetime as a + literal string, e.g. via the TypeEngine.literal_processor() method. + + """ + + return sqlalchemy.testing.exclusions.open() + + diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 10452bf95..0f5f9d27f 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -29,26 +29,6 @@ class BinaryTest(BinaryTest): pass -class DateHistoricTest(DateHistoricTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.date(1727, 4, 1)" with datatype DATE - """ - - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '1727-04-01' != datetime.date(1727, 4, 1) - """ - - class DateTest(DateTest): @pytest.mark.skip( reason="Date type implementation needs work. Cannot render literal values." From da9b9f24ea3c0a09328e25c8f0a22cbf3dc943ca Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 13:38:28 -0400 Subject: [PATCH 04/25] Stop skipping DateTest tests No changes to requirements.py were needed test_suite.py::DateTest_databricks+databricks::test_literal PASSED test_suite.py::DateTest_databricks+databricks::test_null PASSED test_suite.py::DateTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 0f5f9d27f..b5865f6e8 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -29,26 +29,6 @@ class BinaryTest(BinaryTest): pass -class DateTest(DateTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.date(2012, 10, 15)" with datatype DATE - """ - - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15' != datetime.date(2012, 10, 15) - """ - - class DateTimeHistoricTest(DateTimeHistoricTest): @pytest.mark.skip(reason="Date type implementation needs work") def test_literal(self): From b7afd1b56257377e6249cfbeb2d0315129a42110 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 20:31:42 -0400 Subject: [PATCH 05/25] Stop skipping DateTimeHistoric tests This is the first case where implement our own process_result_value method. For this to work, we need to update the dialect.colspecs map so that SQLAlchemy knows to use our custom implementation rather than its default test_suite.py::DateTimeHistoricTest_databricks+databricks::test_literal PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_null PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/__init__.py | 6 +++- src/databricks/sqlalchemy/_types.py | 23 +++++++++----- src/databricks/sqlalchemy/requirements.py | 7 +++++ src/databricks/sqlalchemy/test/test_suite.py | 30 ------------------- .../sqlalchemy/test_local/test_types.py | 4 +-- 5 files changed, 30 insertions(+), 40 deletions(-) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index 6eb9d7a2b..6d61b5eb9 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -13,7 +13,7 @@ from databricks import sql # This import is required to process our @compiles decorators -import databricks.sqlalchemy._types +import databricks.sqlalchemy._types as dialect_type_impl from databricks.sqlalchemy.base import ( @@ -48,6 +48,10 @@ class DatabricksDialect(default.DefaultDialect): non_native_boolean_check_constraint: bool = False paramstyle: str = "named" + colspecs = { + sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType + } + @classmethod def dbapi(cls): return sql diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index 032fa3f3f..163f1ca2b 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -1,7 +1,9 @@ import sqlalchemy from sqlalchemy.ext.compiler import compiles -import databricks.sql +from typing import Union + +from datetime import datetime @compiles(sqlalchemy.types.Enum, "databricks") @compiles(sqlalchemy.types.String, "databricks") @@ -60,7 +62,7 @@ def compile_datetime_databricks(type_, compiler, **kw): """ We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME" """ - return "TIMESTAMP" + return "TIMESTAMP_NTZ" @compiles(sqlalchemy.types.ARRAY, "databricks") @@ -81,13 +83,20 @@ def compile_array_databricks(type_, compiler, **kw): return f"ARRAY<{inner}>" -class DatabricksBinaryType(sqlalchemy.types.TypeDecorator): - """ +class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator): + """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ + includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that + the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any + timezone set, we need to strip the timezone off the value received from pysql. + + It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug. """ - impl = sqlalchemy.types.BINARY + impl = sqlalchemy.types.DateTime cache_ok = True - def process_bind_param(self, value, dialect): - return databricks.sql.BINARY(value) + def process_result_value(self, value: Union[None, datetime], dialect): + if value is None: + return None + return value.replace(tzinfo=None) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 37a7a9791..c2f0de026 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -39,6 +39,13 @@ def date_historic(self): return sqlalchemy.testing.exclusions.open() + @property + def datetime_historic(self): + """target dialect supports representation of Python + datetime.datetime() objects with historic (pre 1970) values.""" + + return sqlalchemy.testing.exclusions.open() + @property def datetime_literals(self): """target dialect supports rendering of a date, time, or datetime as a diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index b5865f6e8..5e62e9a7a 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -29,36 +29,6 @@ class BinaryTest(BinaryTest): pass -class DateTimeHistoricTest(DateTimeHistoricTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(1850, 11, 10, 11, 52, 35)" with datatype DATETIME - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(1850, 11, 10, 11, 52, 35, tzinfo=),) != (datetime.datetime(1850, 11, 10, 11, 52, 35),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(1850, 11, 10, 11, 52, 35, tzinfo=),) != (datetime.datetime(1850, 11, 10, 11, 52, 35),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '1850-11-10 11:52:35.000000' != datetime.datetime(1850, 11, 10, 11, 52, 35) - """ - - class DateTimeMicrosecondsTest(DateTimeMicrosecondsTest): @pytest.mark.skip(reason="Date type implementation needs work") def test_literal(self): diff --git a/src/databricks/sqlalchemy/test_local/test_types.py b/src/databricks/sqlalchemy/test_local/test_types.py index 91f11e17e..f7423f697 100644 --- a/src/databricks/sqlalchemy/test_local/test_types.py +++ b/src/databricks/sqlalchemy/test_local/test_types.py @@ -36,12 +36,12 @@ class DatabricksDataType(enum.Enum): sqlalchemy.types.LargeBinary: DatabricksDataType.BINARY, sqlalchemy.types.Boolean: DatabricksDataType.BOOLEAN, sqlalchemy.types.Date: DatabricksDataType.DATE, - sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP, + sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP_NTZ, sqlalchemy.types.Double: DatabricksDataType.DOUBLE, sqlalchemy.types.Enum: DatabricksDataType.STRING, sqlalchemy.types.Float: DatabricksDataType.FLOAT, sqlalchemy.types.Integer: DatabricksDataType.INT, - sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP, + sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP_NTZ, sqlalchemy.types.Numeric: DatabricksDataType.DECIMAL, sqlalchemy.types.PickleType: DatabricksDataType.BINARY, sqlalchemy.types.SmallInteger: DatabricksDataType.SMALLINT, From f4a59f51631c02f88dcc2f65689640ce388b4560 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 20:35:26 -0400 Subject: [PATCH 06/25] Stop skipping DateTimeTest These were fixed by updating our @compiles directives and dialect.colspecs test_suite.py::DateTimeTest_databricks+databricks::test_literal PASSED test_suite.py::DateTimeTest_databricks+databricks::test_null PASSED test_suite.py::DateTimeTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTimeTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTimeTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateTimeTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 30 -------------------- 1 file changed, 30 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 5e62e9a7a..95d323290 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -59,36 +59,6 @@ def test_select_direct(self): """ -class DateTimeTest(DateTimeTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18)" with datatype DATETIME - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000000' != datetime.datetime(2012, 10, 15, 12, 57, 18) - """ - - class FetchLimitOffsetTest(FetchLimitOffsetTest): @pytest.mark.skip( reason="Dialect should advertise which offset rules Databricks supports. Offset handling needs work." From c817509969725bb4b988b8a98c65a86f26e5c77b Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 21:16:10 -0400 Subject: [PATCH 07/25] Stop skipping TimeTest This fix follows the same pattern used to make DateTimeHistoricTest work test_suite.py::TimeTest_databricks+databricks::test_literal PASSED test_suite.py::TimeTest_databricks+databricks::test_null PASSED test_suite.py::TimeTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::TimeTest_databricks+databricks::test_round_trip PASSED test_suite.py::TimeTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::TimeTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/__init__.py | 3 +- src/databricks/sqlalchemy/_types.py | 32 +++++++++++++ src/databricks/sqlalchemy/test/test_suite.py | 47 -------------------- 3 files changed, 34 insertions(+), 48 deletions(-) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index 6d61b5eb9..7bb90c74d 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -49,7 +49,8 @@ class DatabricksDialect(default.DefaultDialect): paramstyle: str = "named" colspecs = { - sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType + sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType, + sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, } @classmethod diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index 163f1ca2b..2c32e0da8 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -100,3 +100,35 @@ def process_result_value(self, value: Union[None, datetime], dialect): if value is None: return None return value.replace(tzinfo=None) + +class DatabricksTimeType(sqlalchemy.types.TypeDecorator): + """Databricks has no native TIME type. So we store it as a string. + """ + + impl = sqlalchemy.types.Time + cache_ok = True + + + def process_bind_param(self, value: Union[datetime.time, None], dialect) -> str: + """Values sent to the database are converted to %:H:%M:%S strings. + """ + if value is None: + return None + return value.strftime("%H:%M:%S") + + def process_literal_param(self, value, dialect) -> datetime.time: + """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails + because the string literal renderer receives a str() object and calls .isoformat() on it. + + Whereas this method receives a datetime.time() object which is subsequently passed to that + same renderer. And that works. + """ + return value + + + def process_result_value(self, value: Union[None, str], dialect) -> Union[datetime.time, None]: + """Values received from the database are parsed into datetime.time() objects + """ + if value is None: + return None + return datetime.strptime(value, "%H:%M:%S").time() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 95d323290..ecfbfbde1 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -355,53 +355,6 @@ def test_select_direct(self): """ -class TimeTest(TimeTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.time(12, 57, 18)" with datatype TIME - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_null_bound_comparison(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_round_trip(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_round_trip_decorated(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_select_direct(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - class TimestampMicrosecondsTest(TimestampMicrosecondsTest): @pytest.mark.skip( reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." From d3a76a493f9cdf324e1b3476cc699897c6bb0472 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 21:18:20 -0400 Subject: [PATCH 08/25] Stop skipping DateTimeCoercedToDateTimeTest The fixes for DateTimes in the past few commits have enabled these tests to pass. test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_literal PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_null PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 52 -------------------- 1 file changed, 52 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index ecfbfbde1..bbbcc019a 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -393,58 +393,6 @@ def test_select_direct(self): """ -class DateTimeCoercedToDateTimeTest(DateTimeCoercedToDateTimeTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Literal values not coerced properly." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000000' != datetime.datetime(2012, 10, 15, 12, 57, 18) - assert '2012-10-15 12:57:18.000000' == datetime.datetime(2012, 10, 15, 12, 57, 18) - """ - - @pytest.mark.skip(reason="Forthcoming deprecated feature.") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_null(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_null_bound_comparison(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_round_trip(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_round_trip_decorated(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - class ExceptionTest(ExceptionTest): @pytest.mark.skip(reason="Databricks may not support this method.") def test_integrity_error(self): From d4afb6080114bd6652c13bec7a9502a10f352d20 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 21:20:58 -0400 Subject: [PATCH 09/25] Stop skipping TimestampMicrosecondsTest I needed to add an entry to requirements.py otherwise these tests were skipped. test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_literal PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_null PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_round_trip PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 8 +++++ src/databricks/sqlalchemy/test/test_suite.py | 35 -------------------- 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index c2f0de026..cc76a47e7 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -54,5 +54,13 @@ def datetime_literals(self): """ return sqlalchemy.testing.exclusions.open() + + @property + def timestamp_microseconds(self): + """target dialect supports representation of Python + datetime.datetime() with microsecond objects but only + if TIMESTAMP is used.""" + + return sqlalchemy.testing.exclusions.open() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index bbbcc019a..38798abde 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -355,42 +355,7 @@ def test_select_direct(self): """ -class TimestampMicrosecondsTest(TimestampMicrosecondsTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18, 396)" with datatype TIMESTAMP - """ - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000396' != datetime.datetime(2012, 10, 15, 12, 57, 18, 396) - """ class ExceptionTest(ExceptionTest): From ab8e28fedaf7a1639db7bd64ecdfc12232827c1d Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 21:22:19 -0400 Subject: [PATCH 10/25] Stop skipping DateTimeMicrosecondsTest Changes in earlier commits around DateTime types have enabled these to pass. test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_literal PASSED test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_null PASSED test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_round_trip_decorated PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 30 -------------------- 1 file changed, 30 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 38798abde..f925be259 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -29,36 +29,6 @@ class BinaryTest(BinaryTest): pass -class DateTimeMicrosecondsTest(DateTimeMicrosecondsTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18, 396)" with datatype DATETIME - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000396' != datetime.datetime(2012, 10, 15, 12, 57, 18, 396) - """ - - class FetchLimitOffsetTest(FetchLimitOffsetTest): @pytest.mark.skip( reason="Dialect should advertise which offset rules Databricks supports. Offset handling needs work." From af1a49e85bf505e1f10f94c8a28b036cacac7f7f Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 16:13:21 -0400 Subject: [PATCH 11/25] Stop skipping StringTest I've found that test_dont_truncate_rightside test is flaky because sometimes DBR returns the correct data but in the wrong order. It's supposed to check that a query returns ["AB", "BC"] but sometimes it returns ["BC", "AB"] which is the right data in the wrong order. Python list comparison doesn't evaluate that ["AB", "BC"] == ["BC", "AB"]. We could reimplement the test using collections.Counter in stdlib or check with DBR team about why this order is sometimes different. Signed-off-by: Jesse Whitehouse --- Also, we had to break with SQLAlchemy's advice to never implement the TypeDecorator.literal_processor method because otherwise our strings end up double-escaped and raise a syntax error. test_suite.py::StringTest_databricks+databricks::test_concatenate_binary PASSED test_suite.py::StringTest_databricks+databricks::test_concatenate_clauselist PASSED test_suite.py::StringTest_databricks+databricks::test_dont_truncate_rightside[%B%-expected0] PASSED test_suite.py::StringTest_databricks+databricks::test_dont_truncate_rightside[A%C%Z-expected2] PASSED test_suite.py::StringTest_databricks+databricks::test_dont_truncate_rightside[A%C-expected1] PASSED test_suite.py::StringTest_databricks+databricks::test_literal PASSED test_suite.py::StringTest_databricks+databricks::test_literal_backslashes PASSED test_suite.py::StringTest_databricks+databricks::test_literal_non_ascii PASSED test_suite.py::StringTest_databricks+databricks::test_literal_quoting PASSED test_suite.py::StringTest_databricks+databricks::test_nolength_string PASSED --- src/databricks/sqlalchemy/__init__.py | 1 + src/databricks/sqlalchemy/_types.py | 62 ++++++++++++++++++++ src/databricks/sqlalchemy/test/test_suite.py | 19 ------ 3 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index 7bb90c74d..a369a7d07 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -51,6 +51,7 @@ class DatabricksDialect(default.DefaultDialect): colspecs = { sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType, sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, + sqlalchemy.types.String: dialect_type_impl.DatabricksStringType } @classmethod diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index 2c32e0da8..a9d72be2c 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -5,6 +5,9 @@ from datetime import datetime + +from databricks.sql.utils import ParamEscaper + @compiles(sqlalchemy.types.Enum, "databricks") @compiles(sqlalchemy.types.String, "databricks") @compiles(sqlalchemy.types.Text, "databricks") @@ -132,3 +135,62 @@ def process_result_value(self, value: Union[None, str], dialect) -> Union[dateti if value is None: return None return datetime.strptime(value, "%H:%M:%S").time() + +class DatabricksStringType(sqlalchemy.types.TypeDecorator): + """We have to implement our own String() type because SQLAlchemy's default implementation + wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for + escaping of literal strings. And SQLAlchemy's default escaping breaks Databricks SQL. + """ + + impl = sqlalchemy.types.String + cache_ok = True + pe = ParamEscaper() + + def process_literal_param(self, value, dialect) -> str: + """SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here + implements the same logic as our legacy inline escaping logic. + """ + + return self.pe.escape_string(value) + + def literal_processor(self, dialect): + """We manually override this method to prevent further processing of the string literal beyond + what happens in the process_literal_param() method. + + The SQLAlchemy docs _specifically_ say to not override this method. + + It appears that any processing that happens from TypeEngine.process_literal_param happens _before_ + and _in addition to_ whatever the class's impl.literal_processor() method does. The String.literal_processor() + method performs a string replacement that doubles any single-quote in the contained string. This raises a syntax + error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need. + + We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong. + + See type_api.py::TypeEngine.literal_processor: + + ```python + def process(value: Any) -> str: + return fixed_impl_processor( + fixed_process_literal_param(value, dialect) + ) + ``` + + That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the + process_literal_param defined in our Databricks dialect) + + https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor + """ + def process(value): + """This is a copy of the default String.literal_processor() method but stripping away + its double-escaping behaviour for single-quotes. + """ + + _step1 = self.process_literal_param(value, dialect="databricks") + if dialect.identifier_preparer._double_percents: + _step2 = _step1.replace("%", "%%") + else: + _step2 = _step1 + + return "%s" % _step2 + + return process \ No newline at end of file diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index f925be259..86f28f83c 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -236,25 +236,6 @@ def test_row_w_scalar_select(self): """ -class StringTest(StringTest): - @pytest.mark.skip( - reason="String implementation needs work. Quote escaping is inconsistent between read/write." - ) - def test_literal_backslashes(self): - """ - Exception: - AssertionError: assert 'backslash one backslash two \\ end' in ['backslash one \\ backslash two \\\\ end'] - """ - - @pytest.mark.skip( - reason="String implementation needs work. Quote escaping is inconsistent between read/write." - ) - def test_literal_quoting(self): - """ - Exception: - assert 'some text hey "hi there" thats text' in ['some \'text\' hey "hi there" that\'s text'] - """ - class TextTest(TextTest): """Fixing StringTest should fix these failures also.""" From 53eef60765997285165e1cfed2d29a3b14375faa Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 16:15:50 -0400 Subject: [PATCH 12/25] Stop skipping TextTest The fixes from fixing StringTest also fixed these tests test_suite.py::TextTest_databricks+databricks::test_literal PASSED test_suite.py::TextTest_databricks+databricks::test_literal_backslashes PASSED test_suite.py::TextTest_databricks+databricks::test_literal_non_ascii PASSED test_suite.py::TextTest_databricks+databricks::test_literal_percentsigns PASSED test_suite.py::TextTest_databricks+databricks::test_literal_quoting PASSED test_suite.py::TextTest_databricks+databricks::test_text_empty_strings PASSED test_suite.py::TextTest_databricks+databricks::test_text_null_strings PASSED test_suite.py::TextTest_databricks+databricks::test_text_roundtrip PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 22 -------------------- 1 file changed, 22 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 86f28f83c..9ccac6d7f 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -237,28 +237,6 @@ def test_row_w_scalar_select(self): -class TextTest(TextTest): - """Fixing StringTest should fix these failures also.""" - - @pytest.mark.skip( - reason="String implementation needs work. See comments from StringTest." - ) - def test_literal_backslashes(self): - """ - Exception: - AssertionError: assert 'backslash one backslash two \\ end' in ['backslash one \\ backslash two \\\\ end'] - """ - - @pytest.mark.skip( - reason="String implementation needs work. See comments from StringTest." - ) - def test_literal_quoting(self): - """ - Exception: - assert 'some text hey "hi there" thats text' in ['some \'text\' hey "hi there" that\'s text'] - """ - - class TimeMicrosecondsTest(TimeMicrosecondsTest): @pytest.mark.skip( reason="Time type implementation needs work. Microseconds are not handled at all." From a56b0836da22b35ff07f69355a4795dae7183f19 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 16:34:36 -0400 Subject: [PATCH 13/25] Stop skipping TimeMicrosecondsTest I had to update our custom Time() override to cope with both non-microseconds timestrings and microseconds time strings. So I reran the TimeTest() as well to confirm both types still work. test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_literal PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_null PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_round_trip PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/_types.py | 19 +++++++- src/databricks/sqlalchemy/requirements.py | 15 ++++++ src/databricks/sqlalchemy/test/test_suite.py | 50 -------------------- 3 files changed, 32 insertions(+), 52 deletions(-) diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index a9d72be2c..b4ddf6d0d 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -111,13 +111,15 @@ class DatabricksTimeType(sqlalchemy.types.TypeDecorator): impl = sqlalchemy.types.Time cache_ok = True + TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f" + TIME_NO_MICROSECONDS_FMT = "%H:%M:%S" def process_bind_param(self, value: Union[datetime.time, None], dialect) -> str: """Values sent to the database are converted to %:H:%M:%S strings. """ if value is None: return None - return value.strftime("%H:%M:%S") + return value.strftime(self.TIME_WITH_MICROSECONDS_FMT) def process_literal_param(self, value, dialect) -> datetime.time: """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails @@ -125,6 +127,12 @@ def process_literal_param(self, value, dialect) -> datetime.time: Whereas this method receives a datetime.time() object which is subsequently passed to that same renderer. And that works. + + UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar + mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct + of Databricks not having a true TIME type. I think the string representation of Time() types is + somehow affecting the literal rendering process. But as long as this passes the tests, I'm not + worried about it. """ return value @@ -134,7 +142,14 @@ def process_result_value(self, value: Union[None, str], dialect) -> Union[dateti """ if value is None: return None - return datetime.strptime(value, "%H:%M:%S").time() + + try: + _parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT) + except ValueError: + # If the string doesn't have microseconds, try parsing it without them + _parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT) + + return _parsed.time() class DatabricksStringType(sqlalchemy.types.TypeDecorator): """We have to implement our own String() type because SQLAlchemy's default implementation diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index cc76a47e7..ba4149bc6 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -62,5 +62,20 @@ def timestamp_microseconds(self): if TIMESTAMP is used.""" return sqlalchemy.testing.exclusions.open() + + @property + def time_microseconds(self): + """target dialect supports representation of Python + datetime.time() with microsecond objects. + + This requirement declaration isn't needed but I've included it here for completeness. + Since Databricks doesn't have a TIME type, SQLAlchemy will compile Time() columns + as STRING Databricks data types. And we use a custom time type to render those strings + between str() and time.time() representations. Therefore we can store _any_ precision + that SQLAlchemy needs. The time_microseconds requirement defaults to ON for all dialects + except mssql, mysql, mariadb, and oracle. + """ + + return sqlalchemy.testing.exclusions.open() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 9ccac6d7f..cf5814b79 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -237,56 +237,6 @@ def test_row_w_scalar_select(self): -class TimeMicrosecondsTest(TimeMicrosecondsTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.time(12, 57, 18, 396)" with datatype TIME - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_null_bound_comparison(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_round_trip(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_round_trip_decorated(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_select_direct(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - - - - class ExceptionTest(ExceptionTest): @pytest.mark.skip(reason="Databricks may not support this method.") def test_integrity_error(self): From aa1cae3ebf9694b600dd4d718892afae5384bfbc Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:06:54 -0400 Subject: [PATCH 14/25] Stop skipping NumericTest I added exclusions to requirements.py for features which Databricks doesn't natively support. These tests apply to the generic (camelcase) Numeric() type. Therefore the tests are applicable to the DECIMAL Databricks data type. Some of these features _will_ work for Databricks' FLOAT type. Those tests marked as skipped in the below listing are skipped because of the exclusions called out in requirements.py test_suite.py::NumericTest_databricks+databricks::test_decimal_coerce_round_trip PASSED test_suite.py::NumericTest_databricks+databricks::test_decimal_coerce_round_trip_w_cast PASSED test_suite.py::NumericTest_databricks+databricks::test_enotation_decimal SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_enotation_decimal (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_enotation_decimal_large SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_enotation_decimal_large (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_float_as_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_float_as_float PASSED test_suite.py::NumericTest_databricks+databricks::test_float_coerce_round_trip SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_float_coerce_round_trip (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_float_custom_scale SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_float_custom_scale (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_float_is_not_numeric[Double] PASSED test_suite.py::NumericTest_databricks+databricks::test_float_is_not_numeric[Float] PASSED test_suite.py::NumericTest_databricks+databricks::test_infinity_floats PASSED test_suite.py::NumericTest_databricks+databricks::test_many_significant_digits SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_many_significant_digits (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_numeric_as_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_numeric_as_float PASSED test_suite.py::NumericTest_databricks+databricks::test_numeric_no_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_numeric_null_as_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_numeric_null_as_float PASSED test_suite.py::NumericTest_databricks+databricks::test_precision_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_render_literal_float PASSED test_suite.py::NumericTest_databricks+databricks::test_render_literal_numeric PASSED test_suite.py::NumericTest_databricks+databricks::test_render_literal_numeric_asfloat PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 51 ++++++++++++++ src/databricks/sqlalchemy/test/test_suite.py | 72 -------------------- 2 files changed, 51 insertions(+), 72 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index ba4149bc6..e12041f4a 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -77,5 +77,56 @@ def time_microseconds(self): """ return sqlalchemy.testing.exclusions.open() + + @property + def precision_generic_float_type(self): + """target backend will return native floating point numbers with at + least seven decimal places when using the generic Float type. + + Databricks sometimes only returns six digits of precision for the generic Float type + """ + return sqlalchemy.testing.exclusions.closed() + + @property + def literal_float_coercion(self): + """target backend will return the exact float value 15.7563 + with only four significant digits from this statement: + + SELECT :param + + where :param is the Python float 15.7563 + + i.e. it does not return 15.75629997253418 + + Without additional work, Databricks returns 15.75629997253418 + This is a potential area where we could override the Float literal processor. + Will leave to a PM to decide if we should do so. + """ + return sqlalchemy.testing.exclusions.closed() + + @property + def precision_numerics_enotation_large(self): + """target backend supports Decimal() objects using E notation + to represent very large values. + + Databricks supports E notation for FLOAT data types but not for DECIMAL types, + which is the underlying data type SQLAlchemy uses for Numeric() types. + + """ + return sqlalchemy.testing.exclusions.closed() + + @property + def infinity_floats(self): + """The Float type can persist and load float('inf'), float('-inf').""" + + return sqlalchemy.testing.exclusions.open() + + @property + def precision_numerics_retains_significant_digits(self): + """A precision numeric type will return empty significant digits, + i.e. a value such as 10.000 will come back in Decimal form with + the .000 maintained.""" + + return sqlalchemy.testing.exclusions.open() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index cf5814b79..16b9ead86 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -151,78 +151,6 @@ def test_long_convention_name(self): """ -class NumericTest(NumericTest): - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_decimal_coerce_round_trip_w_cast(self): - """ - Exception: - AssertionError: Decimal('16') != Decimal('15.7563') - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_enotation_decimal(self): - """ - Exception: - AssertionError: {Decimal('0'), Decimal('1')} != {Decimal('0.70000000000696'), Decimal('1E-7'), Decimal('0.00001'), Decimal('6.96E-12'), Decimal('0.001'), Decimal('5.940696E-8'), Decimal('0.01000005940696'), Decimal('1E-8'), Decimal('0.01'), Decimal('0.000001'), Decimal('0.0001'), Decimal('6.96E-10')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_enotation_decimal_large(self): - """ - Exception: - sqlalchemy.exc.DatabaseError: (databricks.sql.exc.ServerOperationError) [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "DOUBLE" type into the "DECIMAL(10,0)" type column `x` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_float_custom_scale(self): - """ - Exception: - AssertionError: {Decimal('15.7563829')} != {Decimal('15.7563827')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_many_significant_digits(self): - """ - Exception: - sqlalchemy.exc.DatabaseError: (databricks.sql.exc.ServerOperationError) [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "DECIMAL(22,2)" type into the "DECIMAL(10,0)" type column `x` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_numeric_as_decimal(self): - """ - Exception: - AssertionError: {Decimal('16')} != {Decimal('15.7563')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_numeric_as_float(self): - """ - Exception: - AssertionError: {16.0} != {15.7563} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_precision_decimal(self): - """ - Exception: - AssertionError: {Decimal('0'), Decimal('900'), Decimal('54')} != {Decimal('0.004354'), Decimal('900.0'), Decimal('54.234246451650')} - """ class RowFetchTest(RowFetchTest): From 4671ff90debae95c620db1ec25ad31fc60d88056 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:08:53 -0400 Subject: [PATCH 15/25] Stop skipping Boolean test These tests appear to be fixed by fixing our @compiles decorators test_suite.py::BooleanTest_databricks+databricks::test_null PASSED test_suite.py::BooleanTest_databricks+databricks::test_render_literal_bool PASSED test_suite.py::BooleanTest_databricks+databricks::test_round_trip PASSED test_suite.py::BooleanTest_databricks+databricks::test_whereclause PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 35 -------------------- 1 file changed, 35 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 16b9ead86..2afea9dca 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -298,41 +298,6 @@ def test_numeric_reflection(self): """ -class BooleanTest(BooleanTest): - @pytest.mark.skip(reason="Boolean type needs work.") - def test_null(self): - """ - This failure appears to infrastructure based. Should attempt a re-run. - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_render_literal_bool(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - _ ERROR at setup of BooleanTest_databricks+databricks.test_render_literal_bool _ - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_round_trip(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_whereclause(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - """ - pass - class DifficultParametersTest(DifficultParametersTest): @pytest.mark.skip(reason="Error during execution. Requires investigation.") From 77221c2f60ec5327a2662284be67d4173004d2e3 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:10:31 -0400 Subject: [PATCH 16/25] Fix: must add array_type exclusion to this file else the entire test suite fails. This appears to be a programming error in SQLAlchemy Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index e12041f4a..c6877def0 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -129,4 +129,9 @@ def precision_numerics_retains_significant_digits(self): return sqlalchemy.testing.exclusions.open() + @property + def array_type(self): + """While Databricks does support ARRAY types, pysql cannot bind them. So + we cannot use them with SQLAlchemy""" + return sqlalchemy.testing.exclusions.closed() From bc0495dfca0785935a7f9357a0cbb339eb0f1a78 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:10:44 -0400 Subject: [PATCH 17/25] Black all files changed in this PR Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/__init__.py | 3 +- src/databricks/sqlalchemy/_types.py | 48 ++++++++++---------- src/databricks/sqlalchemy/requirements.py | 25 +++++----- src/databricks/sqlalchemy/test/test_suite.py | 4 -- 4 files changed, 38 insertions(+), 42 deletions(-) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index a369a7d07..95b6c5169 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -51,7 +51,7 @@ class DatabricksDialect(default.DefaultDialect): colspecs = { sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType, sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, - sqlalchemy.types.String: dialect_type_impl.DatabricksStringType + sqlalchemy.types.String: dialect_type_impl.DatabricksStringType, } @classmethod @@ -136,7 +136,6 @@ def get_columns(self, connection, table_name, schema=None, **kwargs): columns = [] for col in resp: - # Taken from PyHive. This removes added type info from decimals and maps _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0) this_column = { diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index b4ddf6d0d..3fdfef74b 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -8,6 +8,7 @@ from databricks.sql.utils import ParamEscaper + @compiles(sqlalchemy.types.Enum, "databricks") @compiles(sqlalchemy.types.String, "databricks") @compiles(sqlalchemy.types.Text, "databricks") @@ -88,11 +89,11 @@ def compile_array_databricks(type_, compiler, **kw): class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator): """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ - includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that - the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any - timezone set, we need to strip the timezone off the value received from pysql. + includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that + the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any + timezone set, we need to strip the timezone off the value received from pysql. - It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug. + It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug. """ impl = sqlalchemy.types.DateTime @@ -103,10 +104,10 @@ def process_result_value(self, value: Union[None, datetime], dialect): if value is None: return None return value.replace(tzinfo=None) - + + class DatabricksTimeType(sqlalchemy.types.TypeDecorator): - """Databricks has no native TIME type. So we store it as a string. - """ + """Databricks has no native TIME type. So we store it as a string.""" impl = sqlalchemy.types.Time cache_ok = True @@ -115,12 +116,11 @@ class DatabricksTimeType(sqlalchemy.types.TypeDecorator): TIME_NO_MICROSECONDS_FMT = "%H:%M:%S" def process_bind_param(self, value: Union[datetime.time, None], dialect) -> str: - """Values sent to the database are converted to %:H:%M:%S strings. - """ + """Values sent to the database are converted to %:H:%M:%S strings.""" if value is None: return None return value.strftime(self.TIME_WITH_MICROSECONDS_FMT) - + def process_literal_param(self, value, dialect) -> datetime.time: """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails because the string literal renderer receives a str() object and calls .isoformat() on it. @@ -136,21 +136,22 @@ def process_literal_param(self, value, dialect) -> datetime.time: """ return value - - def process_result_value(self, value: Union[None, str], dialect) -> Union[datetime.time, None]: - """Values received from the database are parsed into datetime.time() objects - """ + def process_result_value( + self, value: Union[None, str], dialect + ) -> Union[datetime.time, None]: + """Values received from the database are parsed into datetime.time() objects""" if value is None: return None - + try: _parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT) except ValueError: # If the string doesn't have microseconds, try parsing it without them _parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT) - + return _parsed.time() - + + class DatabricksStringType(sqlalchemy.types.TypeDecorator): """We have to implement our own String() type because SQLAlchemy's default implementation wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for @@ -160,18 +161,18 @@ class DatabricksStringType(sqlalchemy.types.TypeDecorator): impl = sqlalchemy.types.String cache_ok = True pe = ParamEscaper() - + def process_literal_param(self, value, dialect) -> str: """SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here implements the same logic as our legacy inline escaping logic. """ return self.pe.escape_string(value) - + def literal_processor(self, dialect): """We manually override this method to prevent further processing of the string literal beyond what happens in the process_literal_param() method. - + The SQLAlchemy docs _specifically_ say to not override this method. It appears that any processing that happens from TypeEngine.process_literal_param happens _before_ @@ -180,7 +181,7 @@ def literal_processor(self, dialect): error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need. We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong. - + See type_api.py::TypeEngine.literal_processor: ```python @@ -192,9 +193,10 @@ def process(value: Any) -> str: That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the process_literal_param defined in our Databricks dialect) - + https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor """ + def process(value): """This is a copy of the default String.literal_processor() method but stripping away its double-escaping behaviour for single-quotes. @@ -208,4 +210,4 @@ def process(value): return "%s" % _step2 - return process \ No newline at end of file + return process diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index c6877def0..9ae07572f 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -31,21 +31,20 @@ def __some_example_requirement(self): class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - @property def date_historic(self): """target dialect supports representation of Python datetime.datetime() objects with historic (pre 1970) values.""" return sqlalchemy.testing.exclusions.open() - + @property def datetime_historic(self): """target dialect supports representation of Python datetime.datetime() objects with historic (pre 1970) values.""" return sqlalchemy.testing.exclusions.open() - + @property def datetime_literals(self): """target dialect supports rendering of a date, time, or datetime as a @@ -54,7 +53,7 @@ def datetime_literals(self): """ return sqlalchemy.testing.exclusions.open() - + @property def timestamp_microseconds(self): """target dialect supports representation of Python @@ -62,22 +61,22 @@ def timestamp_microseconds(self): if TIMESTAMP is used.""" return sqlalchemy.testing.exclusions.open() - + @property def time_microseconds(self): """target dialect supports representation of Python datetime.time() with microsecond objects. - + This requirement declaration isn't needed but I've included it here for completeness. Since Databricks doesn't have a TIME type, SQLAlchemy will compile Time() columns as STRING Databricks data types. And we use a custom time type to render those strings between str() and time.time() representations. Therefore we can store _any_ precision that SQLAlchemy needs. The time_microseconds requirement defaults to ON for all dialects - except mssql, mysql, mariadb, and oracle. + except mssql, mysql, mariadb, and oracle. """ return sqlalchemy.testing.exclusions.open() - + @property def precision_generic_float_type(self): """target backend will return native floating point numbers with at @@ -86,7 +85,7 @@ def precision_generic_float_type(self): Databricks sometimes only returns six digits of precision for the generic Float type """ return sqlalchemy.testing.exclusions.closed() - + @property def literal_float_coercion(self): """target backend will return the exact float value 15.7563 @@ -103,24 +102,24 @@ def literal_float_coercion(self): Will leave to a PM to decide if we should do so. """ return sqlalchemy.testing.exclusions.closed() - + @property def precision_numerics_enotation_large(self): """target backend supports Decimal() objects using E notation to represent very large values. - + Databricks supports E notation for FLOAT data types but not for DECIMAL types, which is the underlying data type SQLAlchemy uses for Numeric() types. """ return sqlalchemy.testing.exclusions.closed() - + @property def infinity_floats(self): """The Float type can persist and load float('inf'), float('-inf').""" return sqlalchemy.testing.exclusions.open() - + @property def precision_numerics_retains_significant_digits(self): """A precision numeric type will return empty significant digits, diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 2afea9dca..055720ac2 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -151,8 +151,6 @@ def test_long_convention_name(self): """ - - class RowFetchTest(RowFetchTest): @pytest.mark.skip( reason="Date type implementation needs work. Timezone information not preserved." @@ -164,7 +162,6 @@ def test_row_w_scalar_select(self): """ - class ExceptionTest(ExceptionTest): @pytest.mark.skip(reason="Databricks may not support this method.") def test_integrity_error(self): @@ -298,7 +295,6 @@ def test_numeric_reflection(self): """ - class DifficultParametersTest(DifficultParametersTest): @pytest.mark.skip(reason="Error during execution. Requires investigation.") def test_round_trip_same_named_column(self): From e2330580385174a1cb6268fbf356bed24ff82756 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:13:52 -0400 Subject: [PATCH 18/25] Update test running instructions Signed-off-by: Jesse Whitehouse --- CONTRIBUTING.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4efaba0c5..f4ec73e77 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,16 +148,14 @@ The suites marked `[not documented]` require additional configuration which will SQLAlchemy provides reusable tests for testing dialect implementations. -To run these tests, assuming the environment variables needed for e2e tests are set, do the following: - ``` -cd src/databricks/sqlalchemy -poetry run python -m pytest test/sqlalchemy_dialect_compliance.py --dburi \ +poetry shell +cd src/databricks/sqlalchemy/test +python -m pytest test_suite.py --dburi \ "databricks://token:$access_token@$host?http_path=$http_path&catalog=$catalog&schema=$schema" ``` -Some of these of these tests fail currently. We're working on getting -relavent tests passing and others skipped. +Some of these of these tests fail currently. We're working on getting relevant tests passing and others skipped. ### Code formatting From 214523bdec4b657dd90a05885497ac3f266e202a Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:30:54 -0400 Subject: [PATCH 19/25] Remove outdated comment now that we better understand how to use this file Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 9ae07572f..0e3d2ffb9 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -1,20 +1,4 @@ """ -This module is supposedly used by the compliance tests to control which tests are run based on database capabilities. -However, based on some experimentation that does not appear to be consistently the case. Until we better understand -when these requirements are and are not implemented, we prefer to manually capture the exact nature of the failures -and errors. - -Once we better understand how to use requirements.py, an example exclusion will look like this: - - import sqlalchemy.testing.requirements - import sqlalchemy.testing.exclusions - - class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - @property - def __some_example_requirement(self): - return sqlalchemy.testing.exclusions.closed - - The complete list of requirements is provided by SQLAlchemy here: https://github.com/sqlalchemy/sqlalchemy/blob/main/lib/sqlalchemy/testing/requirements.py @@ -23,13 +7,6 @@ def __some_example_requirement(self): import sqlalchemy.testing.requirements import sqlalchemy.testing.exclusions -import logging - -logger = logging.getLogger(__name__) - -logger.warning("requirements.py is not currently employed by Databricks dialect") - - class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): @property def date_historic(self): From 80ae0dddf4b59855d8ae0ce470bb8d5d03b29e77 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Mon, 9 Oct 2023 12:13:54 -0400 Subject: [PATCH 20/25] Add marks for the type tests that we've reviewed. This way we can re-run all of the reviewed tests with pytest test_suite.py -m reviewed This allows us to check for regressions in a single test command rather than running each block of types individually. Signed-off-by: Jesse Whitehouse --- CONTRIBUTING.md | 3 +- src/databricks/sqlalchemy/requirements.py | 1 + src/databricks/sqlalchemy/test/test_suite.py | 65 ++++++++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f4ec73e77..c3e64a4a6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -155,7 +155,8 @@ python -m pytest test_suite.py --dburi \ "databricks://token:$access_token@$host?http_path=$http_path&catalog=$catalog&schema=$schema" ``` -Some of these of these tests fail currently. We're working on getting relevant tests passing and others skipped. +Some of these of these tests fail currently. We're working on getting relevant tests passing and others skipped. The tests that we've already reviewed and verified +are decorated with a pytest marker called `reviewed`. To only run these tests and check for regressions, you can add `-m reviewed` to the invocation command above. ### Code formatting diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 0e3d2ffb9..fc4910e62 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -7,6 +7,7 @@ import sqlalchemy.testing.requirements import sqlalchemy.testing.exclusions + class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): @property def date_historic(self): diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 055720ac2..e9b36e457 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -29,6 +29,71 @@ class BinaryTest(BinaryTest): pass +@pytest.mark.reviewed +class BooleanTest(BooleanTest): + pass + + +@pytest.mark.reviewed +class NumericTest(NumericTest): + pass + + +@pytest.mark.reviewed +class TimeMicrosecondsTest(TimeMicrosecondsTest): + pass + + +@pytest.mark.reviewed +class TextTest(TextTest): + pass + + +@pytest.mark.reviewed +class StringTest(StringTest): + pass + + +@pytest.mark.reviewed +class DateTimeMicrosecondsTest(DateTimeMicrosecondsTest): + pass + + +@pytest.mark.reviewed +class TimestampMicrosecondsTest(TimestampMicrosecondsTest): + pass + + +@pytest.mark.reviewed +class DateTimeCoercedToDateTimeTest(DateTimeCoercedToDateTimeTest): + pass + + +@pytest.mark.reviewed +class TimeTest(TimeTest): + pass + + +@pytest.mark.reviewed +class DateTimeTest(DateTimeTest): + pass + + +@pytest.mark.reviewed +class DateTimeHistoricTest(DateTimeHistoricTest): + pass + + +@pytest.mark.reviewed +class DateTest(DateTest): + pass + + +@pytest.mark.reviewed +class DateHistoricTest(DateHistoricTest): + pass + + class FetchLimitOffsetTest(FetchLimitOffsetTest): @pytest.mark.skip( reason="Dialect should advertise which offset rules Databricks supports. Offset handling needs work." From b31eef45bb56e9d02253e0924896e999b8c5ab38 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 10 Oct 2023 15:29:22 -0400 Subject: [PATCH 21/25] (1/x) Remove .closed() exclusions and replace with explicit skips E notation for decimals Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 11 ----------- src/databricks/sqlalchemy/test/test_suite.py | 10 +++++++++- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index fc4910e62..e1705f023 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -81,17 +81,6 @@ def literal_float_coercion(self): """ return sqlalchemy.testing.exclusions.closed() - @property - def precision_numerics_enotation_large(self): - """target backend supports Decimal() objects using E notation - to represent very large values. - - Databricks supports E notation for FLOAT data types but not for DECIMAL types, - which is the underlying data type SQLAlchemy uses for Numeric() types. - - """ - return sqlalchemy.testing.exclusions.closed() - @property def infinity_floats(self): """The Float type can persist and load float('inf'), float('-inf').""" diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index e9b36e457..1598d7ae5 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -36,7 +36,15 @@ class BooleanTest(BooleanTest): @pytest.mark.reviewed class NumericTest(NumericTest): - pass + + @pytest.mark.skip(reason="Databricks doesn't support E notation for DECIMAL types") + def test_enotation_decimal(self): + pass + + @pytest.mark.skip(reason="Databricks doesn't support E notation for DECIMAL types") + def test_enotation_decimal_large(self): + pass + @pytest.mark.reviewed From c8332da3ef6140ec2f88b4d8ce2c9f20c63155db Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 10 Oct 2023 15:32:27 -0400 Subject: [PATCH 22/25] Add an open exclusion to allow test_many_significant_digits to run. Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index e1705f023..3fb80b2cb 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -94,6 +94,14 @@ def precision_numerics_retains_significant_digits(self): the .000 maintained.""" return sqlalchemy.testing.exclusions.open() + + @property + def precision_numerics_many_significant_digits(self): + """target backend supports values with many digits on both sides, + such as 319438950232418390.273596, 87673.594069654243 + + """ + return sqlalchemy.testing.exclusions.open() @property def array_type(self): From 8200e23159d3b65e9e92320133135a39fabce93d Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 10 Oct 2023 15:43:18 -0400 Subject: [PATCH 23/25] (2/x) Remove .closed() exclusions and replace with explicit skips Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 39 ++++++-------------- src/databricks/sqlalchemy/test/test_suite.py | 21 +++++++++++ 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 3fb80b2cb..39edb31c5 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -2,6 +2,13 @@ The complete list of requirements is provided by SQLAlchemy here: https://github.com/sqlalchemy/sqlalchemy/blob/main/lib/sqlalchemy/testing/requirements.py + +When SQLAlchemy skips a test because a requirement is closed() it gives a generic skip message. +To make these failures more actionable, we only define requirements in this file that we wish to +force to be open(). If a test should be skipped on Databricks, it will be specifically marked skip +in test_suite.py with a Databricks-specific reason. + +See the special note about the array_type exclusion below. """ import sqlalchemy.testing.requirements @@ -55,32 +62,6 @@ def time_microseconds(self): return sqlalchemy.testing.exclusions.open() - @property - def precision_generic_float_type(self): - """target backend will return native floating point numbers with at - least seven decimal places when using the generic Float type. - - Databricks sometimes only returns six digits of precision for the generic Float type - """ - return sqlalchemy.testing.exclusions.closed() - - @property - def literal_float_coercion(self): - """target backend will return the exact float value 15.7563 - with only four significant digits from this statement: - - SELECT :param - - where :param is the Python float 15.7563 - - i.e. it does not return 15.75629997253418 - - Without additional work, Databricks returns 15.75629997253418 - This is a potential area where we could override the Float literal processor. - Will leave to a PM to decide if we should do so. - """ - return sqlalchemy.testing.exclusions.closed() - @property def infinity_floats(self): """The Float type can persist and load float('inf'), float('-inf').""" @@ -106,6 +87,10 @@ def precision_numerics_many_significant_digits(self): @property def array_type(self): """While Databricks does support ARRAY types, pysql cannot bind them. So - we cannot use them with SQLAlchemy""" + we cannot use them with SQLAlchemy + + Due to a bug in SQLAlchemy, we _must_ define this exclusion as closed() here or else the + test runner will crash the pytest process due to an AttributeError + """ return sqlalchemy.testing.exclusions.closed() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 1598d7ae5..81eb1d81a 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -39,10 +39,31 @@ class NumericTest(NumericTest): @pytest.mark.skip(reason="Databricks doesn't support E notation for DECIMAL types") def test_enotation_decimal(self): + """This test automatically runs if requirements.precision_numerics_enotation_large is open() + """ pass @pytest.mark.skip(reason="Databricks doesn't support E notation for DECIMAL types") def test_enotation_decimal_large(self): + """This test automatically runs if requirements.precision_numerics_enotation_large is open() + """ + pass + + @pytest.mark.skip(reason="Without a specific CAST, Databricks doesn't return floats with same precision that was selected.") + def test_float_coerce_round_trip(self): + """ + This automatically runs if requirements.literal_float_coercion is open() + + Without additional work, Databricks returns 15.75629997253418 when you SELECT 15.7563. + This is a potential area where we could override the Float literal processor to add a CAST. + Will leave to a PM to decide if we should do so. + """ + pass + + @pytest.mark.skip(reason="Databricks sometimes only returns six digits of precision for the generic Float type") + def test_float_custom_scale(self): + """This test automatically runs if requirements.precision_generic_float_type is open() + """ pass From 12d081bbdd33d1994d05ac569a911110fe0ca7a3 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 10 Oct 2023 15:46:59 -0400 Subject: [PATCH 24/25] Fix linting Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/_types.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index 3fdfef74b..5a93493ea 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -3,7 +3,7 @@ from typing import Union -from datetime import datetime +from datetime import datetime, time from databricks.sql.utils import ParamEscaper @@ -115,13 +115,14 @@ class DatabricksTimeType(sqlalchemy.types.TypeDecorator): TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f" TIME_NO_MICROSECONDS_FMT = "%H:%M:%S" - def process_bind_param(self, value: Union[datetime.time, None], dialect) -> str: + def process_bind_param(self, value: Union[time, None], dialect) -> Union[None, str]: """Values sent to the database are converted to %:H:%M:%S strings.""" if value is None: return None return value.strftime(self.TIME_WITH_MICROSECONDS_FMT) - def process_literal_param(self, value, dialect) -> datetime.time: + # mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string + def process_literal_param(self, value, dialect) -> time: # type: ignore """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails because the string literal renderer receives a str() object and calls .isoformat() on it. @@ -138,7 +139,7 @@ def process_literal_param(self, value, dialect) -> datetime.time: def process_result_value( self, value: Union[None, str], dialect - ) -> Union[datetime.time, None]: + ) -> Union[time, None]: """Values received from the database are parsed into datetime.time() objects""" if value is None: return None From 0d5be1cb79f83d4086e5eac3f4704eaf32e3c946 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 10 Oct 2023 15:49:25 -0400 Subject: [PATCH 25/25] Fix black linting Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/_types.py | 2 +- src/databricks/sqlalchemy/requirements.py | 4 ++-- src/databricks/sqlalchemy/test/test_suite.py | 21 ++++++++++---------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index 5a93493ea..d2ea8c083 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -122,7 +122,7 @@ def process_bind_param(self, value: Union[time, None], dialect) -> Union[None, s return value.strftime(self.TIME_WITH_MICROSECONDS_FMT) # mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string - def process_literal_param(self, value, dialect) -> time: # type: ignore + def process_literal_param(self, value, dialect) -> time: # type: ignore """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails because the string literal renderer receives a str() object and calls .isoformat() on it. diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 39edb31c5..e639d19b7 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -75,7 +75,7 @@ def precision_numerics_retains_significant_digits(self): the .000 maintained.""" return sqlalchemy.testing.exclusions.open() - + @property def precision_numerics_many_significant_digits(self): """target backend supports values with many digits on both sides, @@ -88,7 +88,7 @@ def precision_numerics_many_significant_digits(self): def array_type(self): """While Databricks does support ARRAY types, pysql cannot bind them. So we cannot use them with SQLAlchemy - + Due to a bug in SQLAlchemy, we _must_ define this exclusion as closed() here or else the test runner will crash the pytest process due to an AttributeError """ diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 81eb1d81a..c9ba48b42 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -36,38 +36,37 @@ class BooleanTest(BooleanTest): @pytest.mark.reviewed class NumericTest(NumericTest): - @pytest.mark.skip(reason="Databricks doesn't support E notation for DECIMAL types") def test_enotation_decimal(self): - """This test automatically runs if requirements.precision_numerics_enotation_large is open() - """ + """This test automatically runs if requirements.precision_numerics_enotation_large is open()""" pass @pytest.mark.skip(reason="Databricks doesn't support E notation for DECIMAL types") def test_enotation_decimal_large(self): - """This test automatically runs if requirements.precision_numerics_enotation_large is open() - """ + """This test automatically runs if requirements.precision_numerics_enotation_large is open()""" pass - @pytest.mark.skip(reason="Without a specific CAST, Databricks doesn't return floats with same precision that was selected.") + @pytest.mark.skip( + reason="Without a specific CAST, Databricks doesn't return floats with same precision that was selected." + ) def test_float_coerce_round_trip(self): """ This automatically runs if requirements.literal_float_coercion is open() - + Without additional work, Databricks returns 15.75629997253418 when you SELECT 15.7563. This is a potential area where we could override the Float literal processor to add a CAST. Will leave to a PM to decide if we should do so. """ pass - @pytest.mark.skip(reason="Databricks sometimes only returns six digits of precision for the generic Float type") + @pytest.mark.skip( + reason="Databricks sometimes only returns six digits of precision for the generic Float type" + ) def test_float_custom_scale(self): - """This test automatically runs if requirements.precision_generic_float_type is open() - """ + """This test automatically runs if requirements.precision_generic_float_type is open()""" pass - @pytest.mark.reviewed class TimeMicrosecondsTest(TimeMicrosecondsTest): pass