diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4efaba0c5..c3e64a4a6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,16 +148,15 @@ The suites marked `[not documented]` require additional configuration which will SQLAlchemy provides reusable tests for testing dialect implementations. -To run these tests, assuming the environment variables needed for e2e tests are set, do the following: - ``` -cd src/databricks/sqlalchemy -poetry run python -m pytest test/sqlalchemy_dialect_compliance.py --dburi \ +poetry shell +cd src/databricks/sqlalchemy/test +python -m pytest test_suite.py --dburi \ "databricks://token:$access_token@$host?http_path=$http_path&catalog=$catalog&schema=$schema" ``` -Some of these of these tests fail currently. We're working on getting -relavent tests passing and others skipped. +Some of these of these tests fail currently. We're working on getting relevant tests passing and others skipped. The tests that we've already reviewed and verified +are decorated with a pytest marker called `reviewed`. To only run these tests and check for regressions, you can add `-m reviewed` to the invocation command above. ### Code formatting diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index d1d4782d9..95b6c5169 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -13,7 +13,7 @@ from databricks import sql # This import is required to process our @compiles decorators -import databricks.sqlalchemy.types +import databricks.sqlalchemy._types as dialect_type_impl from databricks.sqlalchemy.base import ( @@ -48,6 +48,12 @@ class DatabricksDialect(default.DefaultDialect): non_native_boolean_check_constraint: bool = False paramstyle: str = "named" + colspecs = { + sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType, + sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, + sqlalchemy.types.String: dialect_type_impl.DatabricksStringType, + } + @classmethod def dbapi(cls): return sql @@ -130,7 +136,6 @@ def get_columns(self, connection, table_name, schema=None, **kwargs): columns = [] for col in resp: - # Taken from PyHive. This removes added type info from decimals and maps _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0) this_column = { diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py new file mode 100644 index 000000000..d2ea8c083 --- /dev/null +++ b/src/databricks/sqlalchemy/_types.py @@ -0,0 +1,214 @@ +import sqlalchemy +from sqlalchemy.ext.compiler import compiles + +from typing import Union + +from datetime import datetime, time + + +from databricks.sql.utils import ParamEscaper + + +@compiles(sqlalchemy.types.Enum, "databricks") +@compiles(sqlalchemy.types.String, "databricks") +@compiles(sqlalchemy.types.Text, "databricks") +@compiles(sqlalchemy.types.Time, "databricks") +@compiles(sqlalchemy.types.Unicode, "databricks") +@compiles(sqlalchemy.types.UnicodeText, "databricks") +@compiles(sqlalchemy.types.Uuid, "databricks") +def compile_string_databricks(type_, compiler, **kw): + """ + We override the default compilation for Enum(), String(), Text(), and Time() because SQLAlchemy + defaults to incompatible / abnormal compiled names + + Enum -> VARCHAR + String -> VARCHAR[LENGTH] + Text -> VARCHAR[LENGTH] + Time -> TIME + Unicode -> VARCHAR[LENGTH] + UnicodeText -> TEXT + Uuid -> CHAR[32] + + But all of these types will be compiled to STRING in Databricks SQL + """ + return "STRING" + + +@compiles(sqlalchemy.types.Integer, "databricks") +def compile_integer_databricks(type_, compiler, **kw): + """ + We need to override the default Integer compilation rendering because Databricks uses "INT" instead of "INTEGER" + """ + return "INT" + + +@compiles(sqlalchemy.types.LargeBinary, "databricks") +def compile_binary_databricks(type_, compiler, **kw): + """ + We need to override the default LargeBinary compilation rendering because Databricks uses "BINARY" instead of "BLOB" + """ + return "BINARY" + + +@compiles(sqlalchemy.types.Numeric, "databricks") +def compile_numeric_databricks(type_, compiler, **kw): + """ + We need to override the default Numeric compilation rendering because Databricks uses "DECIMAL" instead of "NUMERIC" + + The built-in visit_DECIMAL behaviour captures the precision and scale. Here we're just mapping calls to compile Numeric + to the SQLAlchemy Decimal() implementation + """ + return compiler.visit_DECIMAL(type_, **kw) + + +@compiles(sqlalchemy.types.DateTime, "databricks") +def compile_datetime_databricks(type_, compiler, **kw): + """ + We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME" + """ + return "TIMESTAMP_NTZ" + + +@compiles(sqlalchemy.types.ARRAY, "databricks") +def compile_array_databricks(type_, compiler, **kw): + """ + SQLAlchemy's default ARRAY can't compile as it's only implemented for Postgresql. + The Postgres implementation works for Databricks SQL, so we duplicate that here. + + :type_: + This is an instance of sqlalchemy.types.ARRAY which always includes an item_type attribute + which is itself an instance of TypeEngine + + https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY + """ + + inner = compiler.process(type_.item_type, **kw) + + return f"ARRAY<{inner}>" + + +class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator): + """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ + includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that + the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any + timezone set, we need to strip the timezone off the value received from pysql. + + It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug. + """ + + impl = sqlalchemy.types.DateTime + + cache_ok = True + + def process_result_value(self, value: Union[None, datetime], dialect): + if value is None: + return None + return value.replace(tzinfo=None) + + +class DatabricksTimeType(sqlalchemy.types.TypeDecorator): + """Databricks has no native TIME type. So we store it as a string.""" + + impl = sqlalchemy.types.Time + cache_ok = True + + TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f" + TIME_NO_MICROSECONDS_FMT = "%H:%M:%S" + + def process_bind_param(self, value: Union[time, None], dialect) -> Union[None, str]: + """Values sent to the database are converted to %:H:%M:%S strings.""" + if value is None: + return None + return value.strftime(self.TIME_WITH_MICROSECONDS_FMT) + + # mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string + def process_literal_param(self, value, dialect) -> time: # type: ignore + """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails + because the string literal renderer receives a str() object and calls .isoformat() on it. + + Whereas this method receives a datetime.time() object which is subsequently passed to that + same renderer. And that works. + + UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar + mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct + of Databricks not having a true TIME type. I think the string representation of Time() types is + somehow affecting the literal rendering process. But as long as this passes the tests, I'm not + worried about it. + """ + return value + + def process_result_value( + self, value: Union[None, str], dialect + ) -> Union[time, None]: + """Values received from the database are parsed into datetime.time() objects""" + if value is None: + return None + + try: + _parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT) + except ValueError: + # If the string doesn't have microseconds, try parsing it without them + _parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT) + + return _parsed.time() + + +class DatabricksStringType(sqlalchemy.types.TypeDecorator): + """We have to implement our own String() type because SQLAlchemy's default implementation + wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for + escaping of literal strings. And SQLAlchemy's default escaping breaks Databricks SQL. + """ + + impl = sqlalchemy.types.String + cache_ok = True + pe = ParamEscaper() + + def process_literal_param(self, value, dialect) -> str: + """SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here + implements the same logic as our legacy inline escaping logic. + """ + + return self.pe.escape_string(value) + + def literal_processor(self, dialect): + """We manually override this method to prevent further processing of the string literal beyond + what happens in the process_literal_param() method. + + The SQLAlchemy docs _specifically_ say to not override this method. + + It appears that any processing that happens from TypeEngine.process_literal_param happens _before_ + and _in addition to_ whatever the class's impl.literal_processor() method does. The String.literal_processor() + method performs a string replacement that doubles any single-quote in the contained string. This raises a syntax + error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need. + + We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong. + + See type_api.py::TypeEngine.literal_processor: + + ```python + def process(value: Any) -> str: + return fixed_impl_processor( + fixed_process_literal_param(value, dialect) + ) + ``` + + That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the + process_literal_param defined in our Databricks dialect) + + https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor + """ + + def process(value): + """This is a copy of the default String.literal_processor() method but stripping away + its double-escaping behaviour for single-quotes. + """ + + _step1 = self.process_literal_param(value, dialect="databricks") + if dialect.identifier_preparer._double_percents: + _step2 = _step1.replace("%", "%%") + else: + _step2 = _step1 + + return "%s" % _step2 + + return process diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 7da460057..e639d19b7 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -1,34 +1,96 @@ """ -This module is supposedly used by the compliance tests to control which tests are run based on database capabilities. -However, based on some experimentation that does not appear to be consistently the case. Until we better understand -when these requirements are and are not implemented, we prefer to manually capture the exact nature of the failures -and errors. - -Once we better understand how to use requirements.py, an example exclusion will look like this: - - import sqlalchemy.testing.requirements - import sqlalchemy.testing.exclusions - - class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - @property - def __some_example_requirement(self): - return sqlalchemy.testing.exclusions.closed - - The complete list of requirements is provided by SQLAlchemy here: https://github.com/sqlalchemy/sqlalchemy/blob/main/lib/sqlalchemy/testing/requirements.py + +When SQLAlchemy skips a test because a requirement is closed() it gives a generic skip message. +To make these failures more actionable, we only define requirements in this file that we wish to +force to be open(). If a test should be skipped on Databricks, it will be specifically marked skip +in test_suite.py with a Databricks-specific reason. + +See the special note about the array_type exclusion below. """ import sqlalchemy.testing.requirements import sqlalchemy.testing.exclusions -import logging -logger = logging.getLogger(__name__) +class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): + @property + def date_historic(self): + """target dialect supports representation of Python + datetime.datetime() objects with historic (pre 1970) values.""" -logger.warning("requirements.py is not currently employed by Databricks dialect") + return sqlalchemy.testing.exclusions.open() + @property + def datetime_historic(self): + """target dialect supports representation of Python + datetime.datetime() objects with historic (pre 1970) values.""" -class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - pass + return sqlalchemy.testing.exclusions.open() + + @property + def datetime_literals(self): + """target dialect supports rendering of a date, time, or datetime as a + literal string, e.g. via the TypeEngine.literal_processor() method. + + """ + + return sqlalchemy.testing.exclusions.open() + + @property + def timestamp_microseconds(self): + """target dialect supports representation of Python + datetime.datetime() with microsecond objects but only + if TIMESTAMP is used.""" + + return sqlalchemy.testing.exclusions.open() + + @property + def time_microseconds(self): + """target dialect supports representation of Python + datetime.time() with microsecond objects. + + This requirement declaration isn't needed but I've included it here for completeness. + Since Databricks doesn't have a TIME type, SQLAlchemy will compile Time() columns + as STRING Databricks data types. And we use a custom time type to render those strings + between str() and time.time() representations. Therefore we can store _any_ precision + that SQLAlchemy needs. The time_microseconds requirement defaults to ON for all dialects + except mssql, mysql, mariadb, and oracle. + """ + + return sqlalchemy.testing.exclusions.open() + + @property + def infinity_floats(self): + """The Float type can persist and load float('inf'), float('-inf').""" + + return sqlalchemy.testing.exclusions.open() + + @property + def precision_numerics_retains_significant_digits(self): + """A precision numeric type will return empty significant digits, + i.e. a value such as 10.000 will come back in Decimal form with + the .000 maintained.""" + + return sqlalchemy.testing.exclusions.open() + + @property + def precision_numerics_many_significant_digits(self): + """target backend supports values with many digits on both sides, + such as 319438950232418390.273596, 87673.594069654243 + + """ + return sqlalchemy.testing.exclusions.open() + + @property + def array_type(self): + """While Databricks does support ARRAY types, pysql cannot bind them. So + we cannot use them with SQLAlchemy + + Due to a bug in SQLAlchemy, we _must_ define this exclusion as closed() here or else the + test runner will crash the pytest process due to an AttributeError + """ + + return sqlalchemy.testing.exclusions.closed() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 7a840404b..c9ba48b42 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -24,150 +24,102 @@ # See further: https://github.com/sqlalchemy/sqlalchemy/blob/rel_1_4_48/README.dialects.rst +@pytest.mark.skip(reason="pysql doesn't support binding of BINARY type parameters") class BinaryTest(BinaryTest): - @pytest.mark.skip(reason="Binary type is not implemented.") - def test_binary_roundtrip(self): - """ - Exception: - sqlalchemy.exc.StatementError: (builtins.AttributeError) module 'databricks.sql' has no attribute 'Binary' - """ + pass - @pytest.mark.skip(reason="Binary type is not implemented.") - def test_pickle_roundtrip(self): - """ - Exception: - sqlalchemy.exc.StatementError: (builtins.AttributeError) module 'databricks.sql' has no attribute 'Binary' - """ +@pytest.mark.reviewed +class BooleanTest(BooleanTest): + pass -class DateHistoricTest(DateHistoricTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.date(1727, 4, 1)" with datatype DATE - """ - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '1727-04-01' != datetime.date(1727, 4, 1) - """ +@pytest.mark.reviewed +class NumericTest(NumericTest): + @pytest.mark.skip(reason="Databricks doesn't support E notation for DECIMAL types") + def test_enotation_decimal(self): + """This test automatically runs if requirements.precision_numerics_enotation_large is open()""" + pass + @pytest.mark.skip(reason="Databricks doesn't support E notation for DECIMAL types") + def test_enotation_decimal_large(self): + """This test automatically runs if requirements.precision_numerics_enotation_large is open()""" + pass -class DateTest(DateTest): @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." + reason="Without a specific CAST, Databricks doesn't return floats with same precision that was selected." ) - def test_literal(self): + def test_float_coerce_round_trip(self): """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.date(2012, 10, 15)" with datatype DATE + This automatically runs if requirements.literal_float_coercion is open() + + Without additional work, Databricks returns 15.75629997253418 when you SELECT 15.7563. + This is a potential area where we could override the Float literal processor to add a CAST. + Will leave to a PM to decide if we should do so. """ + pass @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." + reason="Databricks sometimes only returns six digits of precision for the generic Float type" ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15' != datetime.date(2012, 10, 15) - """ + def test_float_custom_scale(self): + """This test automatically runs if requirements.precision_generic_float_type is open()""" + pass -class DateTimeHistoricTest(DateTimeHistoricTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(1850, 11, 10, 11, 52, 35)" with datatype DATETIME - """ +@pytest.mark.reviewed +class TimeMicrosecondsTest(TimeMicrosecondsTest): + pass - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(1850, 11, 10, 11, 52, 35, tzinfo=),) != (datetime.datetime(1850, 11, 10, 11, 52, 35),) - """ - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(1850, 11, 10, 11, 52, 35, tzinfo=),) != (datetime.datetime(1850, 11, 10, 11, 52, 35),) - """ +@pytest.mark.reviewed +class TextTest(TextTest): + pass - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '1850-11-10 11:52:35.000000' != datetime.datetime(1850, 11, 10, 11, 52, 35) - """ + +@pytest.mark.reviewed +class StringTest(StringTest): + pass +@pytest.mark.reviewed class DateTimeMicrosecondsTest(DateTimeMicrosecondsTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18, 396)" with datatype DATETIME - """ + pass - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ +@pytest.mark.reviewed +class TimestampMicrosecondsTest(TimestampMicrosecondsTest): + pass - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000396' != datetime.datetime(2012, 10, 15, 12, 57, 18, 396) - """ + +@pytest.mark.reviewed +class DateTimeCoercedToDateTimeTest(DateTimeCoercedToDateTimeTest): + pass +@pytest.mark.reviewed +class TimeTest(TimeTest): + pass + + +@pytest.mark.reviewed class DateTimeTest(DateTimeTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18)" with datatype DATETIME - """ + pass - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18),) - """ - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18),) - """ +@pytest.mark.reviewed +class DateTimeHistoricTest(DateTimeHistoricTest): + pass + + +@pytest.mark.reviewed +class DateTest(DateTest): + pass - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000000' != datetime.datetime(2012, 10, 15, 12, 57, 18) - """ + +@pytest.mark.reviewed +class DateHistoricTest(DateHistoricTest): + pass class FetchLimitOffsetTest(FetchLimitOffsetTest): @@ -292,80 +244,6 @@ def test_long_convention_name(self): """ -class NumericTest(NumericTest): - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_decimal_coerce_round_trip_w_cast(self): - """ - Exception: - AssertionError: Decimal('16') != Decimal('15.7563') - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_enotation_decimal(self): - """ - Exception: - AssertionError: {Decimal('0'), Decimal('1')} != {Decimal('0.70000000000696'), Decimal('1E-7'), Decimal('0.00001'), Decimal('6.96E-12'), Decimal('0.001'), Decimal('5.940696E-8'), Decimal('0.01000005940696'), Decimal('1E-8'), Decimal('0.01'), Decimal('0.000001'), Decimal('0.0001'), Decimal('6.96E-10')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_enotation_decimal_large(self): - """ - Exception: - sqlalchemy.exc.DatabaseError: (databricks.sql.exc.ServerOperationError) [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "DOUBLE" type into the "DECIMAL(10,0)" type column `x` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_float_custom_scale(self): - """ - Exception: - AssertionError: {Decimal('15.7563829')} != {Decimal('15.7563827')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_many_significant_digits(self): - """ - Exception: - sqlalchemy.exc.DatabaseError: (databricks.sql.exc.ServerOperationError) [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "DECIMAL(22,2)" type into the "DECIMAL(10,0)" type column `x` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_numeric_as_decimal(self): - """ - Exception: - AssertionError: {Decimal('16')} != {Decimal('15.7563')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_numeric_as_float(self): - """ - Exception: - AssertionError: {16.0} != {15.7563} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_precision_decimal(self): - """ - Exception: - AssertionError: {Decimal('0'), Decimal('900'), Decimal('54')} != {Decimal('0.004354'), Decimal('900.0'), Decimal('54.234246451650')} - """ - - class RowFetchTest(RowFetchTest): @pytest.mark.skip( reason="Date type implementation needs work. Timezone information not preserved." @@ -377,232 +255,6 @@ def test_row_w_scalar_select(self): """ -class StringTest(StringTest): - @pytest.mark.skip( - reason="String implementation needs work. Quote escaping is inconsistent between read/write." - ) - def test_literal_backslashes(self): - """ - Exception: - AssertionError: assert 'backslash one backslash two \\ end' in ['backslash one \\ backslash two \\\\ end'] - """ - - @pytest.mark.skip( - reason="String implementation needs work. Quote escaping is inconsistent between read/write." - ) - def test_literal_quoting(self): - """ - Exception: - assert 'some text hey "hi there" thats text' in ['some \'text\' hey "hi there" that\'s text'] - """ - - -class TextTest(TextTest): - """Fixing StringTest should fix these failures also.""" - - @pytest.mark.skip( - reason="String implementation needs work. See comments from StringTest." - ) - def test_literal_backslashes(self): - """ - Exception: - AssertionError: assert 'backslash one backslash two \\ end' in ['backslash one \\ backslash two \\\\ end'] - """ - - @pytest.mark.skip( - reason="String implementation needs work. See comments from StringTest." - ) - def test_literal_quoting(self): - """ - Exception: - assert 'some text hey "hi there" thats text' in ['some \'text\' hey "hi there" that\'s text'] - """ - - -class TimeMicrosecondsTest(TimeMicrosecondsTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.time(12, 57, 18, 396)" with datatype TIME - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_null_bound_comparison(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_round_trip(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_round_trip_decorated(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_select_direct(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - -class TimeTest(TimeTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.time(12, 57, 18)" with datatype TIME - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_null_bound_comparison(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_round_trip(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_round_trip_decorated(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_select_direct(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - -class TimestampMicrosecondsTest(TimestampMicrosecondsTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18, 396)" with datatype TIMESTAMP - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000396' != datetime.datetime(2012, 10, 15, 12, 57, 18, 396) - """ - - -class DateTimeCoercedToDateTimeTest(DateTimeCoercedToDateTimeTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Literal values not coerced properly." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000000' != datetime.datetime(2012, 10, 15, 12, 57, 18) - assert '2012-10-15 12:57:18.000000' == datetime.datetime(2012, 10, 15, 12, 57, 18) - """ - - @pytest.mark.skip(reason="Forthcoming deprecated feature.") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_null(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_null_bound_comparison(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_round_trip(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_round_trip_decorated(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - class ExceptionTest(ExceptionTest): @pytest.mark.skip(reason="Databricks may not support this method.") def test_integrity_error(self): @@ -736,42 +388,6 @@ def test_numeric_reflection(self): """ -class BooleanTest(BooleanTest): - @pytest.mark.skip(reason="Boolean type needs work.") - def test_null(self): - """ - This failure appears to infrastructure based. Should attempt a re-run. - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_render_literal_bool(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - _ ERROR at setup of BooleanTest_databricks+databricks.test_render_literal_bool _ - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_round_trip(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_whereclause(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - """ - pass - - class DifficultParametersTest(DifficultParametersTest): @pytest.mark.skip(reason="Error during execution. Requires investigation.") def test_round_trip_same_named_column(self): diff --git a/src/databricks/sqlalchemy/test_local/test_types.py b/src/databricks/sqlalchemy/test_local/test_types.py index 91f11e17e..f7423f697 100644 --- a/src/databricks/sqlalchemy/test_local/test_types.py +++ b/src/databricks/sqlalchemy/test_local/test_types.py @@ -36,12 +36,12 @@ class DatabricksDataType(enum.Enum): sqlalchemy.types.LargeBinary: DatabricksDataType.BINARY, sqlalchemy.types.Boolean: DatabricksDataType.BOOLEAN, sqlalchemy.types.Date: DatabricksDataType.DATE, - sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP, + sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP_NTZ, sqlalchemy.types.Double: DatabricksDataType.DOUBLE, sqlalchemy.types.Enum: DatabricksDataType.STRING, sqlalchemy.types.Float: DatabricksDataType.FLOAT, sqlalchemy.types.Integer: DatabricksDataType.INT, - sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP, + sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP_NTZ, sqlalchemy.types.Numeric: DatabricksDataType.DECIMAL, sqlalchemy.types.PickleType: DatabricksDataType.BINARY, sqlalchemy.types.SmallInteger: DatabricksDataType.SMALLINT, diff --git a/src/databricks/sqlalchemy/types.py b/src/databricks/sqlalchemy/types.py deleted file mode 100644 index 4b10fc6f1..000000000 --- a/src/databricks/sqlalchemy/types.py +++ /dev/null @@ -1,80 +0,0 @@ -import sqlalchemy -from sqlalchemy.ext.compiler import compiles - - -@compiles(sqlalchemy.types.Enum, "databricks") -@compiles(sqlalchemy.types.String, "databricks") -@compiles(sqlalchemy.types.Text, "databricks") -@compiles(sqlalchemy.types.Time, "databricks") -@compiles(sqlalchemy.types.Unicode, "databricks") -@compiles(sqlalchemy.types.UnicodeText, "databricks") -@compiles(sqlalchemy.types.Uuid, "databricks") -def compile_string_databricks(type_, compiler, **kw): - """ - We override the default compilation for Enum(), String(), Text(), and Time() because SQLAlchemy - defaults to incompatible / abnormal compiled names - - Enum -> VARCHAR - String -> VARCHAR[LENGTH] - Text -> VARCHAR[LENGTH] - Time -> TIME - Unicode -> VARCHAR[LENGTH] - UnicodeText -> TEXT - Uuid -> CHAR[32] - - But all of these types will be compiled to STRING in Databricks SQL - """ - return "STRING" - - -@compiles(sqlalchemy.types.Integer, "databricks") -def compile_integer_databricks(type_, compiler, **kw): - """ - We need to override the default Integer compilation rendering because Databricks uses "INT" instead of "INTEGER" - """ - return "INT" - - -@compiles(sqlalchemy.types.LargeBinary, "databricks") -def compile_binary_databricks(type_, compiler, **kw): - """ - We need to override the default LargeBinary compilation rendering because Databricks uses "BINARY" instead of "BLOB" - """ - return "BINARY" - - -@compiles(sqlalchemy.types.Numeric, "databricks") -def compile_numeric_databricks(type_, compiler, **kw): - """ - We need to override the default Numeric compilation rendering because Databricks uses "DECIMAL" instead of "NUMERIC" - - The built-in visit_DECIMAL behaviour captures the precision and scale. Here we're just mapping calls to compile Numeric - to the SQLAlchemy Decimal() implementation - """ - return compiler.visit_DECIMAL(type_, **kw) - - -@compiles(sqlalchemy.types.DateTime, "databricks") -def compile_datetime_databricks(type_, compiler, **kw): - """ - We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME" - """ - return "TIMESTAMP" - - -@compiles(sqlalchemy.types.ARRAY, "databricks") -def compile_array_databricks(type_, compiler, **kw): - """ - SQLAlchemy's default ARRAY can't compile as it's only implemented for Postgresql. - The Postgres implementation works for Databricks SQL, so we duplicate that here. - - :type_: - This is an instance of sqlalchemy.types.ARRAY which always includes an item_type attribute - which is itself an instance of TypeEngine - - https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY - """ - - inner = compiler.process(type_.item_type, **kw) - - return f"ARRAY<{inner}>"