Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

feat: add bbq.json_query_array and warn bbq.json_extract_array deprecated #1811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions 2 bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
json_extract_array,
json_extract_string_array,
json_query,
json_query_array,
json_set,
json_value,
parse_json,
Expand Down Expand Up @@ -67,6 +68,7 @@
"json_extract_array",
"json_extract_string_array",
"json_query",
"json_query_array",
"json_set",
"json_value",
"parse_json",
Expand Down
59 changes: 59 additions & 0 deletions 59 bigframes/bigquery/_operations/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ def json_extract_array(
`STRING` or `JSON` values. This function uses single quotes and brackets to
escape invalid JSONPath characters in JSON keys.

.. deprecated:: 2.5.0
The ``json_extract_array`` is deprecated and will be removed in a future version.
Use ``json_query_array`` instead.

**Examples:**

>>> import bigframes.pandas as bpd
Expand Down Expand Up @@ -172,6 +176,11 @@ def json_extract_array(
Returns:
bigframes.series.Series: A new Series with the parsed arrays from the input.
"""
msg = (
"The `json_extract_array` is deprecated and will be removed in a future version. "
"Use `json_query_array` instead."
)
warnings.warn(bfe.format_message(msg), category=UserWarning)
return input._apply_unary_op(ops.JSONExtractArray(json_path=json_path))


Expand Down Expand Up @@ -273,6 +282,56 @@ def json_query(
return input._apply_unary_op(ops.JSONQuery(json_path=json_path))


def json_query_array(
input: series.Series,
json_path: str = "$",
) -> series.Series:
"""Extracts a JSON array and converts it to a SQL array of JSON-formatted
`STRING` or `JSON` values. This function uses double quotes to escape invalid
JSONPath characters in JSON keys. For example: `"a.b"`.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> bpd.options.display.progress_bar = None

>>> s = bpd.Series(['[1, 2, 3]', '[4, 5]'])
>>> bbq.json_query_array(s)
0 ['1' '2' '3']
1 ['4' '5']
dtype: list<item: string>[pyarrow]

>>> s = bpd.Series([
... '{"fruits": [{"name": "apple"}, {"name": "cherry"}]}',
... '{"fruits": [{"name": "guava"}, {"name": "grapes"}]}'
... ])
>>> bbq.json_query_array(s, "$.fruits")
0 ['{"name":"apple"}' '{"name":"cherry"}']
1 ['{"name":"guava"}' '{"name":"grapes"}']
dtype: list<item: string>[pyarrow]

>>> s = bpd.Series([
... '{"fruits": {"color": "red", "names": ["apple","cherry"]}}',
... '{"fruits": {"color": "green", "names": ["guava", "grapes"]}}'
... ])
>>> bbq.json_query_array(s, "$.fruits.names")
0 ['"apple"' '"cherry"']
1 ['"guava"' '"grapes"']
dtype: list<item: string>[pyarrow]
shobsi marked this conversation as resolved.
Show resolved Hide resolved

Args:
input (bigframes.series.Series):
The Series containing JSON data (as native JSON objects or JSON-formatted strings).
json_path (str):
The JSON path identifying the data that you want to obtain from the input.

Returns:
bigframes.series.Series: A new Series with the parsed arrays from the input.
"""
return input._apply_unary_op(ops.JSONQueryArray(json_path=json_path))


def json_value(
input: series.Series,
json_path: str,
Expand Down
13 changes: 13 additions & 0 deletions 13 bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1379,6 +1379,19 @@ def json_query(json_or_json_string, json_path: ibis_dtypes.str): # type: ignore
return json_query_op(json_or_json_string=x, json_path=op.json_path)


@scalar_op_compiler.register_unary_op(ops.JSONQueryArray, pass_op=True)
def json_query_array_op_impl(x: ibis_types.Value, op: ops.JSONQueryArray):
# Define a user-defined function whose returned type is dynamically matching the input.
def json_query_array(json_or_json_string, json_path: ibis_dtypes.str): # type: ignore
"""Extracts a JSON value and converts it to a SQL JSON-formatted STRING or JSON value."""
...

return_type = x.type()
json_query_array.__annotations__["return"] = ibis_dtypes.Array[return_type] # type: ignore
json_query_op = ibis_udf.scalar.builtin(json_query_array)
return json_query_op(json_or_json_string=x, json_path=op.json_path)


@scalar_op_compiler.register_unary_op(ops.ParseJSON, pass_op=True)
def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON):
return parse_json(json_str=x)
Expand Down
2 changes: 2 additions & 0 deletions 2 bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
JSONExtractArray,
JSONExtractStringArray,
JSONQuery,
JSONQueryArray,
JSONSet,
JSONValue,
ParseJSON,
Expand Down Expand Up @@ -359,6 +360,7 @@
"JSONExtractArray",
"JSONExtractStringArray",
"JSONQuery",
"JSONQueryArray",
"JSONSet",
"JSONValue",
"ParseJSON",
Expand Down
17 changes: 17 additions & 0 deletions 17 bigframes/operations/json_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,23 @@ def output_type(self, *input_types):
return input_type


@dataclasses.dataclass(frozen=True)
class JSONQueryArray(base_ops.UnaryOp):
name: typing.ClassVar[str] = "json_query_array"
json_path: str

def output_type(self, *input_types):
input_type = input_types[0]
if not dtypes.is_json_like(input_type):
raise TypeError(
"Input type must be a valid JSON object or JSON-formatted string type."
+ f" Received type: {input_type}"
)
return pd.ArrowDtype(
pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(input_type))
)


@dataclasses.dataclass(frozen=True)
class JSONExtractArray(base_ops.UnaryOp):
name: typing.ClassVar[str] = "json_extract_array"
Expand Down
63 changes: 62 additions & 1 deletion 63 tests/system/small/bigquery/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def test_json_extract_array_from_json():
['{"a": ["ab", "2", "3 xy"]}', '{"a": []}', '{"a": ["4", "5"]}', "{}"],
dtype=dtypes.JSON_DTYPE,
)
actual = bbq.json_extract_array(s, "$.a")
with pytest.warns(UserWarning, match="The `json_extract_array` is deprecated"):
actual = bbq.json_extract_array(s, "$.a")

# This code provides a workaround for issue https://github.com/apache/arrow/issues/45262,
# which currently prevents constructing a series using the pa.list_(db_types.JSONArrrowType())
Expand Down Expand Up @@ -241,6 +242,66 @@ def test_json_query_w_invalid_series_type():
bbq.json_query(s, "$.a")


def test_json_query_array_from_json():
s = bpd.Series(
['{"a": ["ab", "2", "3 xy"]}', '{"a": []}', '{"a": ["4", "5"]}', "{}"],
dtype=dtypes.JSON_DTYPE,
)
actual = bbq.json_query_array(s, "$.a")

# This code provides a workaround for issue https://github.com/apache/arrow/issues/45262,
# which currently prevents constructing a series using the pa.list_(db_types.JSONArrrowType())
sql = """
SELECT 0 AS id, [JSON '"ab"', JSON '"2"', JSON '"3 xy"'] AS data,
UNION ALL
SELECT 1, [],
UNION ALL
SELECT 2, [JSON '"4"', JSON '"5"'],
UNION ALL
SELECT 3, null,
"""
df = bpd.read_gbq(sql).set_index("id").sort_index()
expected = df["data"]
expected.index.name = None
expected.name = None

pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_query_array_from_json_strings():
s = bpd.Series(
['{"a": ["ab", "2", "3 xy"]}', '{"a": []}', '{"a": ["4","5"]}', "{}"],
dtype=pd.StringDtype(storage="pyarrow"),
)
actual = bbq.json_query_array(s, "$.a")
expected = bpd.Series(
[['"ab"', '"2"', '"3 xy"'], [], ['"4"', '"5"'], None],
dtype=pd.ArrowDtype(pa.list_(pa.string())),
)

pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_query_array_from_json_array_strings():
s = bpd.Series(
["[1, 2, 3]", "[]", "[4,5]"],
dtype=pd.StringDtype(storage="pyarrow"),
)
actual = bbq.json_query_array(s)
expected = bpd.Series(
[["1", "2", "3"], [], ["4", "5"]],
dtype=pd.ArrowDtype(pa.list_(pa.string())),
)

pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_query_array_w_invalid_series_type():
s = bpd.Series([1, 2])
with pytest.raises(TypeError):
bbq.json_query_array(s)


def test_json_value_from_json():
s = bpd.Series(
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.