Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions 2 bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
json_extract,
json_extract_array,
json_extract_string_array,
json_keys,
json_query,
json_query_array,
json_set,
Expand Down Expand Up @@ -138,6 +139,7 @@
"json_extract",
"json_extract_array",
"json_extract_string_array",
"json_keys",
"json_query",
"json_query_array",
"json_set",
Expand Down
29 changes: 29 additions & 0 deletions 29 bigframes/bigquery/_operations/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,35 @@ def json_value_array(
return input._apply_unary_op(ops.JSONValueArray(json_path=json_path))


def json_keys(
input: series.Series,
max_depth: Optional[int] = None,
) -> series.Series:
"""Returns all keys in the root of a JSON object as an ARRAY of STRINGs.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq

>>> s = bpd.Series(['{"b": {"c": 2}, "a": 1}'], dtype="json")
>>> bbq.json_keys(s)
0 ['a' 'b' 'b.c']
dtype: list<item: string>[pyarrow]

Args:
input (bigframes.series.Series):
The Series containing JSON data.
max_depth (int, optional):
Specifies the maximum depth of nested fields to search for keys. If not
provided, searched keys at all levels.

Returns:
bigframes.series.Series: A new Series containing arrays of keys from the input JSON.
"""
return input._apply_unary_op(ops.JSONKeys(max_depth=max_depth))


def to_json(
input: series.Series,
) -> series.Series:
Expand Down
13 changes: 13 additions & 0 deletions 13 bigframes/core/compile/ibis_compiler/scalar_op_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,11 @@ def json_value_array_op_impl(x: ibis_types.Value, op: ops.JSONValueArray):
return json_value_array(json_obj=x, json_path=op.json_path)


@scalar_op_compiler.register_unary_op(ops.JSONKeys, pass_op=True)
def json_keys_op_impl(x: ibis_types.Value, op: ops.JSONKeys):
return json_keys(x, op.max_depth)


# Blob Ops
@scalar_op_compiler.register_unary_op(ops.obj_fetch_metadata_op)
def obj_fetch_metadata_op_impl(obj_ref: ibis_types.Value):
Expand Down Expand Up @@ -2059,6 +2064,14 @@ def to_json_string(value) -> ibis_dtypes.String: # type: ignore[empty-body]
"""Convert value to JSON-formatted string."""


@ibis_udf.scalar.builtin(name="json_keys")
def json_keys( # type: ignore[empty-body]
json_obj: ibis_dtypes.JSON,
max_depth: ibis_dtypes.Int64,
) -> ibis_dtypes.Array[ibis_dtypes.String]:
"""Extracts unique JSON keys from a JSON expression."""


@ibis_udf.scalar.builtin(name="json_value")
def json_value( # type: ignore[empty-body]
json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.String
Expand Down
5 changes: 5 additions & 0 deletions 5 bigframes/core/compile/sqlglot/expressions/json_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ def _(expr: TypedExpr, op: ops.JSONExtractStringArray) -> sge.Expression:
return sge.func("JSON_EXTRACT_STRING_ARRAY", expr.expr, sge.convert(op.json_path))


@register_unary_op(ops.JSONKeys, pass_op=True)
def _(expr: TypedExpr, op: ops.JSONKeys) -> sge.Expression:
return sge.func("JSON_KEYS", expr.expr, sge.convert(op.max_depth))


@register_unary_op(ops.JSONQuery, pass_op=True)
def _(expr: TypedExpr, op: ops.JSONQuery) -> sge.Expression:
return sge.func("JSON_QUERY", expr.expr, sge.convert(op.json_path))
Expand Down
2 changes: 2 additions & 0 deletions 2 bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
JSONExtract,
JSONExtractArray,
JSONExtractStringArray,
JSONKeys,
JSONQuery,
JSONQueryArray,
JSONSet,
Expand Down Expand Up @@ -381,6 +382,7 @@
"JSONExtract",
"JSONExtractArray",
"JSONExtractStringArray",
"JSONKeys",
"JSONQuery",
"JSONQueryArray",
"JSONSet",
Expand Down
17 changes: 17 additions & 0 deletions 17 bigframes/operations/json_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,23 @@ def output_type(self, *input_types):
return input_type


@dataclasses.dataclass(frozen=True)
class JSONKeys(base_ops.UnaryOp):
name: typing.ClassVar[str] = "json_keys"
max_depth: typing.Optional[int] = None

def output_type(self, *input_types):
input_type = input_types[0]
if input_type != dtypes.JSON_DTYPE:
raise TypeError(
"Input type must be a valid JSON object or JSON-formatted string type."
+ f" Received type: {input_type}"
)
return pd.ArrowDtype(
pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(dtypes.STRING_DTYPE))
)


@dataclasses.dataclass(frozen=True)
class JSONDecode(base_ops.UnaryOp):
name: typing.ClassVar[str] = "json_decode"
Expand Down
50 changes: 50 additions & 0 deletions 50 tests/system/small/bigquery/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,3 +434,53 @@ def test_to_json_string_from_struct():
)

pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_keys():
json_data = [
'{"name": "Alice", "age": 30}',
'{"city": "New York", "country": "USA", "active": true}',
"{}",
'{"items": [1, 2, 3]}',
]
s = bpd.Series(json_data, dtype=dtypes.JSON_DTYPE)
actual = bbq.json_keys(s)

expected_data_pandas = [
["age", "name"],
[
"active",
"city",
"country",
],
[],
["items"],
]
expected = bpd.Series(
expected_data_pandas, dtype=pd.ArrowDtype(pa.list_(pa.string()))
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_keys_with_max_depth():
json_data = [
'{"user": {"name": "Bob", "details": {"id": 123, "status": "approved"}}}',
'{"user": {"name": "Charlie"}}',
]
s = bpd.Series(json_data, dtype=dtypes.JSON_DTYPE)
actual = bbq.json_keys(s, max_depth=2)

expected_data_pandas = [
["user", "user.details", "user.name"],
["user", "user.name"],
]
expected = bpd.Series(
expected_data_pandas, dtype=pd.ArrowDtype(pa.list_(pa.string()))
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_keys_from_string_error():
s = bpd.Series(['{"a": 1, "b": 2}', '{"c": 3}'])
with pytest.raises(TypeError):
bbq.json_keys(s)
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
WITH `bfcte_0` AS (
SELECT
`json_col`
FROM `bigframes-dev`.`sqlglot_test`.`json_types`
), `bfcte_1` AS (
SELECT
*,
JSON_KEYS(`json_col`, NULL) AS `bfcol_1`,
JSON_KEYS(`json_col`, 2) AS `bfcol_2`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `json_keys`,
`bfcol_2` AS `json_keys_w_max_depth`
FROM `bfcte_1`
13 changes: 13 additions & 0 deletions 13 tests/unit/core/compile/sqlglot/expressions/test_json_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,19 @@ def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot):
snapshot.assert_match(sql, "out.sql")


def test_json_keys(json_types_df: bpd.DataFrame, snapshot):
col_name = "json_col"
bf_df = json_types_df[[col_name]]

ops_map = {
"json_keys": ops.JSONKeys().as_expr(col_name),
"json_keys_w_max_depth": ops.JSONKeys(max_depth=2).as_expr(col_name),
}

sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
snapshot.assert_match(sql, "out.sql")


def test_json_query(json_types_df: bpd.DataFrame, snapshot):
col_name = "json_col"
bf_df = json_types_df[[col_name]]
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.