Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

fix: series.(to_csv|to_json) leverages bq export #452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions 12 bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2588,16 +2588,16 @@ def to_json(
if "*" not in path_or_buf:
raise NotImplementedError(ERROR_IO_REQUIRES_WILDCARD)

if lines is True and orient != "records":
raise ValueError(
"'lines' keyword is only valid when 'orient' is 'records'."
)

# TODO(ashleyxu) Support lines=False for small tables with arrays and TO_JSON_STRING.
# See: https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#to_json_string
if lines is False:
raise NotImplementedError(
f"Only newline delimited JSON format is supported. {constants.FEEDBACK_LINK}"
f"Only newline-delimited JSON is supported. Add `lines=True` to your function call. {constants.FEEDBACK_LINK}"
)

if lines is True and orient != "records":
raise ValueError(
"'lines' keyword is only valid when 'orient' is 'records'."
)

result_table = self._run_io_query(
Expand Down
20 changes: 12 additions & 8 deletions 20 bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1390,9 +1390,10 @@ def to_frame(self, name: blocks.Label = None) -> bigframes.dataframe.DataFrame:
)
return bigframes.dataframe.DataFrame(block)

def to_csv(self, path_or_buf=None, **kwargs) -> typing.Optional[str]:
# TODO(b/280651142): Implement version that leverages bq export native csv support to bypass local pandas step.
return self.to_pandas().to_csv(path_or_buf, **kwargs)
def to_csv(
self, path_or_buf: str, sep=",", *, header: bool = True, index: bool = True
) -> None:
return self.to_frame().to_csv(path_or_buf, sep=sep, header=header, index=index)

def to_dict(self, into: type[dict] = dict) -> typing.Mapping:
return typing.cast(dict, self.to_pandas().to_dict(into)) # type: ignore
Expand All @@ -1402,14 +1403,17 @@ def to_excel(self, excel_writer, sheet_name="Sheet1", **kwargs) -> None:

def to_json(
self,
path_or_buf=None,
path_or_buf: str,
orient: typing.Literal[
"split", "records", "index", "columns", "values", "table"
] = "columns",
**kwargs,
) -> typing.Optional[str]:
# TODO(b/280651142): Implement version that leverages bq export native csv support to bypass local pandas step.
return self.to_pandas().to_json(path_or_buf, **kwargs)
*,
lines: bool = False,
index: bool = True,
) -> None:
return self.to_frame().to_json(
path_or_buf=path_or_buf, orient=orient, lines=lines, index=index
)

def to_latex(
self, buf=None, columns=None, header=True, index=True, **kwargs
Expand Down
28 changes: 20 additions & 8 deletions 28 tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2384,18 +2384,30 @@ def test_to_frame(scalars_dfs):
assert_pandas_df_equal(bf_result, pd_result)


def test_to_json(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index["int64_col"].to_json()
pd_result = scalars_pandas_df_index["int64_col"].to_json()
def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
path = gcs_folder + "test_series_to_json*.jsonl"
scalars_df_index["int64_col"].to_json(path, lines=True, orient="records")
gcs_df = pd.read_json(path, lines=True)

assert bf_result == pd_result
pd.testing.assert_series_equal(
gcs_df["int64_col"].astype(pd.Int64Dtype()),
scalars_pandas_df_index["int64_col"],
check_dtype=False,
check_index=False,
)


def test_to_csv(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index["int64_col"].to_csv()
pd_result = scalars_pandas_df_index["int64_col"].to_csv()
def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index):
path = gcs_folder + "test_series_to_csv*.csv"
scalars_df_index["int64_col"].to_csv(path)
gcs_df = pd.read_csv(path)

assert bf_result == pd_result
pd.testing.assert_series_equal(
gcs_df["int64_col"].astype(pd.Int64Dtype()),
scalars_pandas_df_index["int64_col"],
check_dtype=False,
check_index=False,
)


def test_to_latex(scalars_df_index, scalars_pandas_df_index):
Expand Down
4 changes: 2 additions & 2 deletions 4 third_party/bigframes_vendored/pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def to_json(
*,
index: bool = True,
lines: bool = False,
) -> str | None:
) -> None:
"""Convert the object to a JSON string, written to Cloud Storage.

Note NaN's and None will be converted to null and datetime objects
Expand Down Expand Up @@ -241,7 +241,7 @@ def to_json(
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_csv(self, path_or_buf: str, *, index: bool = True) -> str | None:
def to_csv(self, path_or_buf: str, *, index: bool = True) -> None:
"""Write object to a comma-separated values (csv) file on Cloud Storage.

Args:
Expand Down
53 changes: 0 additions & 53 deletions 53 third_party/bigframes_vendored/pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,59 +535,6 @@ def to_xarray(self):
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_json(
self,
path_or_buf=None,
orient: Literal[
"split", "records", "index", "columns", "values", "table"
] = "columns",
**kwarg,
) -> str | None:
"""
Convert the object to a JSON string.

Note NaN's and None will be converted to null and datetime objects
will be converted to UNIX timestamps.

Args:
path_or_buf (str, path object, file-like object, or None, default None):
String, path object (implementing os.PathLike[str]), or file-like
object implementing a write() function. If None, the result is
returned as a string.
orient ({"split", "records", "index", "columns", "values", "table"}, default "columns"):
Indication of expected JSON string format.
'split' : dict like {{'index' -> [index], 'columns' -> [columns],'data' -> [values]}}
'records' : list like [{{column -> value}}, ... , {{column -> value}}]
'index' : dict like {{index -> {{column -> value}}}}
'columns' : dict like {{column -> {{index -> value}}}}
'values' : just the values array
'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}}
Describing the data, where data component is like ``orient='records'``.

Returns:
None or str: If path_or_buf is None, returns the resulting json format as a
string. Otherwise returns None.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_csv(self, path_or_buf: str, *, index: bool = True) -> str | None:
"""
Write object to a comma-separated values (csv) file.

Args:
path_or_buf (str, path object, file-like object, or None, default None):
String, path object (implementing os.PathLike[str]), or file-like
object implementing a write() function. If None, the result is
returned as a string. If a non-binary file object is passed, it should
be opened with `newline=''`, disabling universal newlines. If a binary
file object is passed, `mode` might need to contain a `'b'`.

Returns:
None or str: If path_or_buf is None, returns the resulting csv format
as a string. Otherwise returns None.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def agg(self, func):
"""
Aggregate using one or more operations over the specified axis.
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.