diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 90d65327ea980..31c926233d5b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -164,6 +164,7 @@ repos: rev: v1.2.2 hooks: - id: yesqa + additional_dependencies: [flake8==3.8.4] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.3.0 hooks: diff --git a/Makefile b/Makefile index 2c968234749f5..ae1b082626629 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY : develop build clean clean_pyc doc lint-diff black +.PHONY : develop build clean clean_pyc doc lint-diff black test-scripts all: develop @@ -38,3 +38,6 @@ check: --included-file-extensions="py" \ --excluded-file-paths=pandas/tests,asv_bench/,doc/ pandas/ + +test-scripts: + pytest scripts diff --git a/doc/source/_static/css/pandas.css b/doc/source/_static/css/pandas.css index 403d182e3d3e5..87357fd8ae716 100644 --- a/doc/source/_static/css/pandas.css +++ b/doc/source/_static/css/pandas.css @@ -2,7 +2,7 @@ :root { /* Use softer blue from bootstrap's default info color */ - --color-info: 23, 162, 184; + --pst-color-info: 23, 162, 184; } /* Getting started index page */ diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index c9c31b408fb7e..8739694c20e33 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -16,6 +16,7 @@ Version 1.2 .. toctree:: :maxdepth: 2 + v1.2.4 v1.2.3 v1.2.2 v1.2.1 diff --git a/doc/source/whatsnew/v1.2.3.rst b/doc/source/whatsnew/v1.2.3.rst index c94491df474ab..dec2d061504b4 100644 --- a/doc/source/whatsnew/v1.2.3.rst +++ b/doc/source/whatsnew/v1.2.3.rst @@ -29,4 +29,4 @@ Fixed regressions Contributors ~~~~~~~~~~~~ -.. contributors:: v1.2.2..v1.2.3|HEAD +.. contributors:: v1.2.2..v1.2.3 diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst new file mode 100644 index 0000000000000..dd74091b64014 --- /dev/null +++ b/doc/source/whatsnew/v1.2.4.rst @@ -0,0 +1,33 @@ +.. _whatsnew_124: + +What's new in 1.2.4 (April 12, 2021) +------------------------------------ + +These are the changes in pandas 1.2.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_124.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`) +- Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`) +- Fixed regression in (in)equality comparison of ``pd.NaT`` with a non-datetimelike numpy array returning a scalar instead of an array (:issue:`40722`) +- Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`) +- Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`) +- Fixed regression in repr of floats in an ``object`` column not respecting ``float_format`` when printed in the console or outputted through :meth:`DataFrame.to_string`, :meth:`DataFrame.to_html`, and :meth:`DataFrame.to_latex` (:issue:`40024`) +- Fixed regression in NumPy ufuncs such as ``np.add`` not passing through all arguments for :class:`DataFrame` (:issue:`40662`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_124.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.2.3..v1.2.4|HEAD diff --git a/environment.yml b/environment.yml index bc5bfcd162500..a369f656cb575 100644 --- a/environment.yml +++ b/environment.yml @@ -113,5 +113,5 @@ dependencies: - tabulate>=0.8.3 # DataFrame.to_markdown - natsort # DataFrame.sort_values - pip: - - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103 + - git+https://github.com/pydata/pydata-sphinx-theme.git@master - numpydoc < 1.2 # 2021-02-09 1.2dev breaking CI diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 59298522d86d1..5a3cccdbfea7e 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -272,18 +272,6 @@ static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) { return ret; } -static int is_simple_frame(PyObject *obj) { - PyObject *check = get_sub_attr(obj, "_mgr", "is_mixed_type"); - int ret = (check == Py_False); - - if (!check) { - return 0; - } - - Py_DECREF(check); - return ret; -} - static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { PyObject *tmp = PyObject_GetAttrString(obj, attr); Py_ssize_t ret; @@ -301,6 +289,17 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { return ret; } +static int is_simple_frame(PyObject *obj) { + PyObject *mgr = PyObject_GetAttrString(obj, "_mgr"); + if (!mgr) { + return 0; + } + int ret = (get_attr_length(mgr, "blocks") <= 1); + + Py_DECREF(mgr); + return ret; +} + static npy_int64 get_long_attr(PyObject *o, const char *attr) { npy_int64 long_val; PyObject *value = PyObject_GetAttrString(o, attr); diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 3a61de62daf39..51863f0749790 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -124,6 +124,10 @@ cdef class _NaT(datetime): result.fill(_nat_scalar_rules[op]) elif other.dtype.kind == "O": result = np.array([PyObject_RichCompare(self, x, op) for x in other]) + elif op == Py_EQ: + result = np.zeros(other.shape, dtype=bool) + elif op == Py_NE: + result = np.ones(other.shape, dtype=bool) else: return NotImplemented return result diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index cb185dcf78f63..8d02ddef29593 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -351,15 +351,17 @@ def reconstruct(result): # * len(inputs) > 1 is doable when we know that we have # aligned blocks / dtypes. inputs = tuple(np.asarray(x) for x in inputs) - result = getattr(ufunc, method)(*inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) elif self.ndim == 1: # ufunc(series, ...) inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) result = getattr(ufunc, method)(*inputs, **kwargs) else: # ufunc(dataframe) - if method == "__call__": + if method == "__call__" and not kwargs: # for np.(..) calls + # kwargs cannot necessarily be handled block-by-block, so only + # take this path if there are no kwargs mgr = inputs[0]._mgr result = mgr.apply(getattr(ufunc, method)) else: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0094ebc744a34..4c156d7470364 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8786,6 +8786,7 @@ def _reduce( **kwds, ): + min_count = kwds.get("min_count", 0) assert filter_type is None or filter_type == "bool", filter_type out_dtype = "bool" if filter_type == "bool" else None @@ -8830,7 +8831,7 @@ def _get_data() -> DataFrame: data = self._get_bool_data() return data - if numeric_only is not None or axis == 0: + if (numeric_only is not None or axis == 0) and min_count == 0: # For numeric_only non-None and axis non-None, we know # which blocks to use and no try/except is needed. # For numeric_only=None only the case with axis==0 and no object @@ -8847,7 +8848,7 @@ def _get_data() -> DataFrame: # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager.reduce - res, indexer = df._mgr.reduce(blk_func, ignore_failures=ignore_failures) + res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures) out = df._constructor(res).iloc[0] if out_dtype is not None: out = out.astype(out_dtype) @@ -8875,14 +8876,15 @@ def _get_data() -> DataFrame: with np.errstate(all="ignore"): result = func(values) - if filter_type == "bool" and notna(result).all(): - result = result.astype(np.bool_) - elif filter_type is None and is_object_dtype(result.dtype): - try: - result = result.astype(np.float64) - except (ValueError, TypeError): - # try to coerce to the original dtypes item by item if we can - pass + if hasattr(result, "dtype"): + if filter_type == "bool" and notna(result).all(): + result = result.astype(np.bool_) + elif filter_type is None and is_object_dtype(result.dtype): + try: + result = result.astype(np.float64) + except (ValueError, TypeError): + # try to coerce to the original dtypes item by item if we can + pass result = self._constructor_sliced(result, index=labels) return result diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 99218cebc37e1..b6bca855a9f05 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -894,10 +894,20 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray: rb = [self if inplace else self.copy()] for i, (src, dest) in enumerate(pairs): + convert = i == src_len # only convert once at the end new_rb: List["Block"] = [] - for blk in rb: - m = masks[i] - convert = i == src_len # only convert once at the end + + # GH-39338: _replace_coerce can split a block into + # single-column blocks, so track the index so we know + # where to index into the mask + for blk_num, blk in enumerate(rb): + if len(rb) == 1: + m = masks[i] + else: + mib = masks[i] + assert not isinstance(mib, bool) + m = mib[blk_num : blk_num + 1] + result = blk._replace_coerce( to_replace=src, value=dest, @@ -1458,7 +1468,7 @@ def where( raise ValueError("where must have a condition that is ndarray like") if cond.ravel("K").all(): - result = values + result = values.copy() else: # see if we can operate on the entire block, or need item-by-item # or if we are a single block (ndim == 1) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 88662a4fabed8..edc1b1e96509e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import functools import itertools import operator @@ -1368,7 +1370,7 @@ def _maybe_null_out( mask: Optional[np.ndarray], shape: Tuple[int, ...], min_count: int = 1, -) -> float: +) -> Union[np.ndarray, float]: """ Returns ------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index d0b821a3679bb..ee0600cfa16e0 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1254,7 +1254,9 @@ def _format_strings(self) -> List[str]: float_format = get_option("display.float_format") if float_format is None: precision = get_option("display.precision") - float_format = lambda x: f"{x: .{precision:d}f}" + float_format = lambda x: _trim_zeros_single_float( + f"{x: .{precision:d}f}" + ) else: float_format = self.float_format @@ -1305,7 +1307,7 @@ def _format(x): if not is_float_type[i] and leading_space: fmt_values.append(f" {_format(v)}") elif is_float_type[i]: - fmt_values.append(_trim_zeros_single_float(float_format(v))) + fmt_values.append(float_format(v)) else: if leading_space is False: # False specifically, so that the default is diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 02b06b164a2a1..b93f097b93441 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -5,6 +5,7 @@ from contextlib import contextmanager from datetime import date, datetime, time +from distutils.version import LooseVersion from functools import partial import re from typing import Iterator, List, Optional, Union, overload @@ -55,6 +56,16 @@ def _is_sqlalchemy_connectable(con): return False +def _gt14() -> bool: + """ + Check if sqlalchemy.__version__ is at least 1.4.0, when several + deprecations were made. + """ + import sqlalchemy + + return LooseVersion(sqlalchemy.__version__) >= LooseVersion("1.4.0") + + def _convert_params(sql, params): """Convert SQL and params args to DBAPI2.0 compliant format.""" args = [sql] @@ -715,7 +726,10 @@ def sql_schema(self): def _execute_create(self): # Inserting table into database, add to MetaData object - self.table = self.table.tometadata(self.pd_sql.meta) + if _gt14(): + self.table = self.table.to_metadata(self.pd_sql.meta) + else: + self.table = self.table.tometadata(self.pd_sql.meta) self.table.create() def create(self): @@ -1409,9 +1423,17 @@ def to_sql( # Only check when name is not a number and name is not lower case engine = self.connectable.engine with self.connectable.connect() as conn: - table_names = engine.table_names( - schema=schema or self.meta.schema, connection=conn - ) + if _gt14(): + from sqlalchemy import inspect + + insp = inspect(conn) + table_names = insp.get_table_names( + schema=schema or self.meta.schema + ) + else: + table_names = engine.table_names( + schema=schema or self.meta.schema, connection=conn + ) if name not in table_names: msg = ( f"The provided table name '{name}' is not found exactly as " @@ -1426,9 +1448,15 @@ def tables(self): return self.meta.tables def has_table(self, name, schema=None): - return self.connectable.run_callable( - self.connectable.dialect.has_table, name, schema or self.meta.schema - ) + if _gt14(): + import sqlalchemy as sa + + insp = sa.inspect(self.connectable) + return insp.has_table(name, schema or self.meta.schema) + else: + return self.connectable.run_callable( + self.connectable.dialect.has_table, name, schema or self.meta.schema + ) def get_table(self, table_name, schema=None): schema = schema or self.meta.schema diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py index 964596d9b6319..729d2bf1f019a 100644 --- a/pandas/plotting/_matplotlib/compat.py +++ b/pandas/plotting/_matplotlib/compat.py @@ -22,3 +22,4 @@ def inner(): mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge) mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge) mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge) +mpl_ge_3_4_0 = _mpl_version("3.4.0", operator.ge) diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index a1c62f9fce23c..f519d1e96f5b0 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -144,7 +144,9 @@ def normalize(series): df = frame.drop(class_column, axis=1).apply(normalize) if ax is None: - ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) + ax = plt.gca() + ax.set_xlim(-1, 1) + ax.set_ylim(-1, 1) to_plot: Dict[Label, List[List]] = {} colors = get_standard_colors( @@ -260,7 +262,8 @@ def f(t): ) colors = dict(zip(classes, color_values)) if ax is None: - ax = plt.gca(xlim=(-np.pi, np.pi)) + ax = plt.gca() + ax.set_xlim(-np.pi, np.pi) for i in range(n): row = df.iloc[i].values f = function(row) @@ -440,7 +443,9 @@ def autocorrelation_plot( n = len(series) data = np.asarray(series) if ax is None: - ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) + ax = plt.gca() + ax.set_xlim(1, n) + ax.set_ylim(-1.0, 1.0) mean = np.mean(data) c0 = np.sum((data - mean) ** 2) / float(n) diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 955a057000c41..da7b451137c94 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -366,6 +366,11 @@ def handle_shared_axes( row_num = lambda x: x.rowNum col_num = lambda x: x.colNum + if compat.mpl_ge_3_4_0(): + is_first_col = lambda x: x.get_subplotspec().is_first_col() + else: + is_first_col = lambda x: x.is_first_col() + if nrows > 1: try: # first find out the ax layout, @@ -397,7 +402,7 @@ def handle_shared_axes( # only the first column should get y labels -> set all other to # off as we only have labels in the first column and we always # have a subplot there, we can skip the layout test - if ax.is_first_col(): + if is_first_col(ax): continue if sharey or _has_externally_shared_axis(ax, "y"): _remove_labels_from_axis(ax.yaxis) diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py index 01de64568a011..9b854a81f2def 100644 --- a/pandas/tests/arrays/boolean/test_arithmetic.py +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -66,7 +66,10 @@ def test_div(left_array, right_array): @pytest.mark.parametrize( "opname", [ - "floordiv", + pytest.param( + "floordiv", + marks=pytest.mark.xfail(reason="NumpyDev GH#40874", strict=False), + ), "mod", pytest.param( "pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour? GH34686") diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py index 148b7092abb56..34686f6052131 100644 --- a/pandas/tests/arrays/masked/test_arithmetic.py +++ b/pandas/tests/arrays/masked/test_arithmetic.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.compat.numpy import is_numpy_dev + import pandas as pd import pandas._testing as tm from pandas.core.arrays import ExtensionArray @@ -49,6 +51,8 @@ def test_array_scalar_like_equivalence(data, all_arithmetic_operators): def test_array_NA(data, all_arithmetic_operators): if "truediv" in all_arithmetic_operators: pytest.skip("division with pd.NA raises") + if "floordiv" in all_arithmetic_operators and is_numpy_dev: + pytest.skip("NumpyDev behavior GH#40874") data, _ = data op = tm.get_op_from_name(all_arithmetic_operators) check_skip(data, all_arithmetic_operators) diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 86a0bc9213256..d15c822f22c14 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -16,6 +16,8 @@ import numpy as np import pytest +from pandas.compat.numpy import is_numpy_dev + import pandas as pd import pandas._testing as tm from pandas.core.arrays.boolean import BooleanDtype @@ -139,6 +141,21 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError): with pytest.raises(exc): op(s, other) + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + if "floordiv" in all_arithmetic_operators and is_numpy_dev: + pytest.skip("NumpyDev behavior GH#40874") + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if "floordiv" in all_arithmetic_operators and is_numpy_dev: + pytest.skip("NumpyDev behavior GH#40874") + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_divmod_series_array(self, data, data_for_twos): + if is_numpy_dev: + pytest.skip("NumpyDev behavior GH#40874") + super().test_divmod_series_array(data, data_for_twos) + def _check_divmod_op(self, s, op, other, exc=None): # override to not raise an error super()._check_divmod_op(s, op, other, None) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index acdb5726e4adb..2b3c6010bf633 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -653,3 +653,20 @@ def test_where_categorical_filtering(self): expected.loc[0, :] = np.nan tm.assert_equal(result, expected) + + +def test_where_copies_with_noop(frame_or_series): + # GH-39595 + result = frame_or_series([1, 2, 3, 4]) + expected = result.copy() + col = result[0] if frame_or_series is DataFrame else result + + where_res = result.where(col < 5) + where_res *= 2 + + tm.assert_equal(result, expected) + + where_res = result.where(col > 5, [1, 2, 3, 4]) + where_res *= 2 + + tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 1b570028964df..c4f2e09911b34 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -644,6 +644,28 @@ def test_regex_replace_numeric_to_object_conversion(self, mix_abc): tm.assert_frame_equal(res, expec) assert res.a.dtype == np.object_ + @pytest.mark.parametrize( + "to_replace", [{"": np.nan, ",": ""}, {",": "", "": np.nan}] + ) + def test_joint_simple_replace_and_regex_replace(self, to_replace): + # GH-39338 + df = DataFrame( + { + "col1": ["1,000", "a", "3"], + "col2": ["a", "", "b"], + "col3": ["a", "b", "c"], + } + ) + result = df.replace(regex=to_replace) + expected = DataFrame( + { + "col1": ["1000", "a", "3"], + "col2": ["a", np.nan, "b"], + "col3": ["a", "b", "c"], + } + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"]) def test_replace_regex_metachar(self, metachar): df = DataFrame({"a": [metachar, "else"]}) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index d843d4b0e9504..cb481613eb97f 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -835,6 +835,13 @@ def test_sum_nanops_timedelta(self): expected = Series([0, 0, np.nan], dtype="m8[ns]", index=idx) tm.assert_series_equal(result, expected) + def test_sum_nanops_min_count(self): + # https://github.com/pandas-dev/pandas/issues/39738 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + result = df.sum(min_count=10) + expected = Series([np.nan, np.nan], index=["x", "y"]) + tm.assert_series_equal(result, expected) + def test_sum_object(self, float_frame): values = float_frame.values.astype(int) frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns) diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py index 83fd3db72a90c..19ebae449ecc3 100644 --- a/pandas/tests/frame/test_ufunc.py +++ b/pandas/tests/frame/test_ufunc.py @@ -1,3 +1,5 @@ +from functools import partial + import numpy as np import pytest @@ -55,6 +57,42 @@ def test_binary_input_dispatch_binop(dtype): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "func,arg,expected", + [ + (np.add, 1, [2, 3, 4, 5]), + ( + partial(np.add, where=[[False, True], [True, False]]), + np.array([[1, 1], [1, 1]]), + [0, 3, 4, 0], + ), + (np.power, np.array([[1, 1], [2, 2]]), [1, 2, 9, 16]), + (np.subtract, 2, [-1, 0, 1, 2]), + ( + partial(np.negative, where=np.array([[False, True], [True, False]])), + None, + [0, -2, -3, 0], + ), + ], +) +def test_ufunc_passes_args(func, arg, expected, request): + # GH#40662 + arr = np.array([[1, 2], [3, 4]]) + df = pd.DataFrame(arr) + result_inplace = np.zeros_like(arr) + # 1-argument ufunc + if arg is None: + result = func(df, out=result_inplace) + else: + result = func(df, arg, out=result_inplace) + + expected = np.array(expected).reshape(2, 2) + tm.assert_numpy_array_equal(result_inplace, expected) + + expected = pd.DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype_a", dtypes) @pytest.mark.parametrize("dtype_b", dtypes) def test_binary_input_aligns_columns(dtype_a, dtype_b): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b0b07045a9156..d879808790277 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2021,6 +2021,21 @@ def test_repr_str_float_truncation(self, data, expected): result = repr(series) assert result == expected + @pytest.mark.parametrize( + "float_format,expected", + [ + ("{:,.0f}".format, "0 1,000\n1 test\ndtype: object"), + ("{:.4f}".format, "0 1000.0000\n1 test\ndtype: object"), + ], + ) + def test_repr_float_format_in_object_col(self, float_format, expected): + # GH#40024 + df = Series([1000.0, "test"]) + with option_context("display.float_format", float_format): + result = repr(df) + + assert result == expected + def test_dict_entries(self): df = DataFrame({"A": [{"a": 1, "b": 2}]}) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index a88dec84bd693..2e428453dfc93 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -878,3 +878,29 @@ def test_to_html_na_rep_and_float_format(na_rep): """ assert result == expected + + +def test_to_html_float_format_object_col(): + # GH#40024 + df = DataFrame(data={"x": [1000.0, "test"]}) + result = df.to_html(float_format=lambda x: f"{x:,.0f}") + expected = """ + + + + + + + + + + + + + + + + +
x
01,000
1test
""" + + assert result == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index ba6d7c010613b..b8dc5b688160d 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -121,6 +121,24 @@ def test_to_latex_column_format(self): ) assert result == expected + def test_to_latex_float_format_object_col(self): + # GH#40024 + ser = Series([1000.0, "test"]) + result = ser.to_latex(float_format="{:,.0f}".format) + expected = _dedent( + r""" + \begin{tabular}{ll} + \toprule + {} & 0 \\ + \midrule + 0 & 1,000 \\ + 1 & test \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + def test_to_latex_empty_tabular(self): df = DataFrame() result = df.to_latex() diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index aed1aaedf2fa3..aedb87eb44797 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -66,6 +66,7 @@ def assert_framelist_equal(list1, list2, *args, **kwargs): @td.skip_if_no("bs4") +@td.skip_if_no("html5lib") def test_bs4_version_fails(monkeypatch, datapath): import bs4 @@ -85,6 +86,7 @@ def test_invalid_flavor(): @td.skip_if_no("bs4") @td.skip_if_no("lxml") +@td.skip_if_no("html5lib") def test_same_ordering(datapath): filename = datapath("io", "data", "html", "valid_markup.html") dfs_lxml = read_html(filename, index_col=0, flavor=["lxml"]) @@ -95,7 +97,7 @@ def test_same_ordering(datapath): @pytest.mark.parametrize( "flavor", [ - pytest.param("bs4", marks=td.skip_if_no("bs4")), + pytest.param("bs4", marks=[td.skip_if_no("bs4"), td.skip_if_no("html5lib")]), pytest.param("lxml", marks=td.skip_if_no("lxml")), ], scope="class", diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 16d4bc65094f8..f8a8b662f2652 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -44,10 +44,11 @@ import pandas._testing as tm import pandas.io.sql as sql -from pandas.io.sql import read_sql_query, read_sql_table +from pandas.io.sql import _gt14, read_sql_query, read_sql_table try: import sqlalchemy + from sqlalchemy import inspect from sqlalchemy.ext import declarative from sqlalchemy.orm import session as sa_session import sqlalchemy.schema @@ -1331,7 +1332,11 @@ def test_create_table(self): pandasSQL = sql.SQLDatabase(temp_conn) pandasSQL.to_sql(temp_frame, "temp_frame") - assert temp_conn.has_table("temp_frame") + if _gt14(): + insp = inspect(temp_conn) + assert insp.has_table("temp_frame") + else: + assert temp_conn.has_table("temp_frame") def test_drop_table(self): temp_conn = self.connect() @@ -1343,11 +1348,18 @@ def test_drop_table(self): pandasSQL = sql.SQLDatabase(temp_conn) pandasSQL.to_sql(temp_frame, "temp_frame") - assert temp_conn.has_table("temp_frame") + if _gt14(): + insp = inspect(temp_conn) + assert insp.has_table("temp_frame") + else: + assert temp_conn.has_table("temp_frame") pandasSQL.drop_table("temp_frame") - assert not temp_conn.has_table("temp_frame") + if _gt14(): + assert not insp.has_table("temp_frame") + else: + assert not temp_conn.has_table("temp_frame") def test_roundtrip(self): self._roundtrip() @@ -1689,9 +1701,10 @@ def test_nan_string(self): tm.assert_frame_equal(result, df) def _get_index_columns(self, tbl_name): - from sqlalchemy.engine import reflection + from sqlalchemy import inspect + + insp = inspect(self.conn) - insp = reflection.Inspector.from_engine(self.conn) ixs = insp.get_indexes(tbl_name) ixs = [i["column_names"] for i in ixs] return ixs diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 68e693cdb85e2..baed44e09b581 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -697,7 +697,9 @@ def test_plot_scatter_with_categorical_data(self, x, y): _check_plot_works(df.plot.scatter, x=x, y=y) - def test_plot_scatter_with_c(self): + def test_plot_scatter_with_c(self, request): + from pandas.plotting._matplotlib.compat import mpl_ge_3_4_0 + df = DataFrame( np.random.randn(6, 4), index=list(string.ascii_letters[:6]), @@ -709,9 +711,10 @@ def test_plot_scatter_with_c(self): # default to Greys assert ax.collections[0].cmap.name == "Greys" - # n.b. there appears to be no public method - # to get the colorbar label - assert ax.collections[0].colorbar._label == "z" + if mpl_ge_3_4_0(): + assert ax.collections[0].colorbar.ax.get_ylabel() == "z" + else: + assert ax.collections[0].colorbar._label == "z" cm = "cubehelix" ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 1208100ed2dce..ba774e8d13cf1 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -71,10 +71,12 @@ def setup_method(self, method): def test_autocorrelation_plot(self): from pandas.plotting import autocorrelation_plot - _check_plot_works(autocorrelation_plot, series=self.ts) - _check_plot_works(autocorrelation_plot, series=self.ts.values) + # Ensure no UserWarning when making plot + with tm.assert_produces_warning(None): + _check_plot_works(autocorrelation_plot, series=self.ts) + _check_plot_works(autocorrelation_plot, series=self.ts.values) - ax = autocorrelation_plot(self.ts, label="Test") + ax = autocorrelation_plot(self.ts, label="Test") self._check_legend_labels(ax, labels=["Test"]) def test_lag_plot(self): @@ -132,8 +134,9 @@ def test_andrews_curves(self, iris): from pandas.plotting import andrews_curves df = iris - - _check_plot_works(andrews_curves, frame=df, class_column="Name") + # Ensure no UserWarning when making plot + with tm.assert_produces_warning(None): + _check_plot_works(andrews_curves, frame=df, class_column="Name") rgba = ("#556270", "#4ECDC4", "#C7F464") ax = _check_plot_works( @@ -280,7 +283,9 @@ def test_radviz(self, iris): from pandas.plotting import radviz df = iris - _check_plot_works(radviz, frame=df, class_column="Name") + # Ensure no UserWarning when making plot + with tm.assert_produces_warning(None): + _check_plot_works(radviz, frame=df, class_column="Name") rgba = ("#556270", "#4ECDC4", "#C7F464") ax = _check_plot_works(radviz, frame=df, class_column="Name", color=rgba) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 20de0effc30e1..84b9c51b7d387 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -575,6 +575,47 @@ def test_nat_comparisons_invalid(other, op): op(other, NaT) +@pytest.mark.parametrize( + "other", + [ + np.array(["foo"] * 2, dtype=object), + np.array([2, 3], dtype="int64"), + np.array([2.0, 3.5], dtype="float64"), + ], + ids=["str", "int", "float"], +) +def test_nat_comparisons_invalid_ndarray(other): + # GH#40722 + expected = np.array([False, False]) + result = NaT == other + tm.assert_numpy_array_equal(result, expected) + result = other == NaT + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([True, True]) + result = NaT != other + tm.assert_numpy_array_equal(result, expected) + result = other != NaT + tm.assert_numpy_array_equal(result, expected) + + for symbol, op in [ + ("<=", operator.le), + ("<", operator.lt), + (">=", operator.ge), + (">", operator.gt), + ]: + msg = f"'{symbol}' not supported between" + + with pytest.raises(TypeError, match=msg): + op(NaT, other) + + if other.dtype == np.dtype("object"): + # uses the reverse operator, so symbol changes + msg = None + with pytest.raises(TypeError, match=msg): + op(other, NaT) + + def test_compare_date(): # GH#39151 comparing NaT with date object is deprecated # See also: tests.scalar.timestamps.test_comparisons::test_compare_date diff --git a/requirements-dev.txt b/requirements-dev.txt index 98a149eb5d412..595b2ee537e63 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -76,5 +76,5 @@ cftime pyreadstat tabulate>=0.8.3 natsort -git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103 +git+https://github.com/pydata/pydata-sphinx-theme.git@master numpydoc < 1.2 diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py index 37e6d288d9341..10cb3042dfacb 100644 --- a/scripts/tests/test_inconsistent_namespace_check.py +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -2,7 +2,7 @@ import pytest -from scripts.check_for_inconsistent_pandas_namespace import main +from ..check_for_inconsistent_pandas_namespace import main BAD_FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()" BAD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()" diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 74819db7b878c..7e4c68ddc183b 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -2,7 +2,8 @@ import textwrap import pytest -import validate_docstrings + +from .. import validate_docstrings class BadDocstrings: @@ -162,7 +163,9 @@ def test_bad_class(self, capsys): ( "BadDocstrings", "indentation_is_not_a_multiple_of_four", - ("flake8 error: E111 indentation is not a multiple of four",), + # with flake8 3.9.0, the message ends with four spaces, + # whereas in earlier versions, it ended with "four" + ("flake8 error: E111 indentation is not a multiple of ",), ), ( "BadDocstrings", diff --git a/scripts/tests/test_validate_unwanted_patterns.py b/scripts/tests/test_validate_unwanted_patterns.py index 947666a730ee9..ef93fd1d21981 100644 --- a/scripts/tests/test_validate_unwanted_patterns.py +++ b/scripts/tests/test_validate_unwanted_patterns.py @@ -1,7 +1,8 @@ import io import pytest -import validate_unwanted_patterns + +from .. import validate_unwanted_patterns class TestBarePytestRaises: