Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit b487cf1

Browse filesBrowse files
authored
feat: add bigquery.json_keys (#2286)
1 parent 0cb5217 commit b487cf1
Copy full SHA for b487cf1

9 files changed

+146Lines changed: 146 additions & 0 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/bigquery/__init__.py‎

Copy file name to clipboardExpand all lines: bigframes/bigquery/__init__.py
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
json_extract,
4848
json_extract_array,
4949
json_extract_string_array,
50+
json_keys,
5051
json_query,
5152
json_query_array,
5253
json_set,
@@ -138,6 +139,7 @@
138139
"json_extract",
139140
"json_extract_array",
140141
"json_extract_string_array",
142+
"json_keys",
141143
"json_query",
142144
"json_query_array",
143145
"json_set",
Collapse file

‎bigframes/bigquery/_operations/json.py‎

Copy file name to clipboardExpand all lines: bigframes/bigquery/_operations/json.py
+29Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,35 @@ def json_value_array(
421421
return input._apply_unary_op(ops.JSONValueArray(json_path=json_path))
422422

423423

424+
def json_keys(
425+
input: series.Series,
426+
max_depth: Optional[int] = None,
427+
) -> series.Series:
428+
"""Returns all keys in the root of a JSON object as an ARRAY of STRINGs.
429+
430+
**Examples:**
431+
432+
>>> import bigframes.pandas as bpd
433+
>>> import bigframes.bigquery as bbq
434+
435+
>>> s = bpd.Series(['{"b": {"c": 2}, "a": 1}'], dtype="json")
436+
>>> bbq.json_keys(s)
437+
0 ['a' 'b' 'b.c']
438+
dtype: list<item: string>[pyarrow]
439+
440+
Args:
441+
input (bigframes.series.Series):
442+
The Series containing JSON data.
443+
max_depth (int, optional):
444+
Specifies the maximum depth of nested fields to search for keys. If not
445+
provided, searched keys at all levels.
446+
447+
Returns:
448+
bigframes.series.Series: A new Series containing arrays of keys from the input JSON.
449+
"""
450+
return input._apply_unary_op(ops.JSONKeys(max_depth=max_depth))
451+
452+
424453
def to_json(
425454
input: series.Series,
426455
) -> series.Series:
Collapse file

‎bigframes/core/compile/ibis_compiler/scalar_op_registry.py‎

Copy file name to clipboardExpand all lines: bigframes/core/compile/ibis_compiler/scalar_op_registry.py
+13Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,11 @@ def json_value_array_op_impl(x: ibis_types.Value, op: ops.JSONValueArray):
12341234
return json_value_array(json_obj=x, json_path=op.json_path)
12351235

12361236

1237+
@scalar_op_compiler.register_unary_op(ops.JSONKeys, pass_op=True)
1238+
def json_keys_op_impl(x: ibis_types.Value, op: ops.JSONKeys):
1239+
return json_keys(x, op.max_depth)
1240+
1241+
12371242
# Blob Ops
12381243
@scalar_op_compiler.register_unary_op(ops.obj_fetch_metadata_op)
12391244
def obj_fetch_metadata_op_impl(obj_ref: ibis_types.Value):
@@ -2059,6 +2064,14 @@ def to_json_string(value) -> ibis_dtypes.String: # type: ignore[empty-body]
20592064
"""Convert value to JSON-formatted string."""
20602065

20612066

2067+
@ibis_udf.scalar.builtin(name="json_keys")
2068+
def json_keys( # type: ignore[empty-body]
2069+
json_obj: ibis_dtypes.JSON,
2070+
max_depth: ibis_dtypes.Int64,
2071+
) -> ibis_dtypes.Array[ibis_dtypes.String]:
2072+
"""Extracts unique JSON keys from a JSON expression."""
2073+
2074+
20622075
@ibis_udf.scalar.builtin(name="json_value")
20632076
def json_value( # type: ignore[empty-body]
20642077
json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.String
Collapse file

‎bigframes/core/compile/sqlglot/expressions/json_ops.py‎

Copy file name to clipboardExpand all lines: bigframes/core/compile/sqlglot/expressions/json_ops.py
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ def _(expr: TypedExpr, op: ops.JSONExtractStringArray) -> sge.Expression:
3939
return sge.func("JSON_EXTRACT_STRING_ARRAY", expr.expr, sge.convert(op.json_path))
4040

4141

42+
@register_unary_op(ops.JSONKeys, pass_op=True)
43+
def _(expr: TypedExpr, op: ops.JSONKeys) -> sge.Expression:
44+
return sge.func("JSON_KEYS", expr.expr, sge.convert(op.max_depth))
45+
46+
4247
@register_unary_op(ops.JSONQuery, pass_op=True)
4348
def _(expr: TypedExpr, op: ops.JSONQuery) -> sge.Expression:
4449
return sge.func("JSON_QUERY", expr.expr, sge.convert(op.json_path))
Collapse file

‎bigframes/operations/__init__.py‎

Copy file name to clipboardExpand all lines: bigframes/operations/__init__.py
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@
128128
JSONExtract,
129129
JSONExtractArray,
130130
JSONExtractStringArray,
131+
JSONKeys,
131132
JSONQuery,
132133
JSONQueryArray,
133134
JSONSet,
@@ -381,6 +382,7 @@
381382
"JSONExtract",
382383
"JSONExtractArray",
383384
"JSONExtractStringArray",
385+
"JSONKeys",
384386
"JSONQuery",
385387
"JSONQueryArray",
386388
"JSONSet",
Collapse file

‎bigframes/operations/json_ops.py‎

Copy file name to clipboardExpand all lines: bigframes/operations/json_ops.py
+17Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,23 @@ def output_type(self, *input_types):
199199
return input_type
200200

201201

202+
@dataclasses.dataclass(frozen=True)
203+
class JSONKeys(base_ops.UnaryOp):
204+
name: typing.ClassVar[str] = "json_keys"
205+
max_depth: typing.Optional[int] = None
206+
207+
def output_type(self, *input_types):
208+
input_type = input_types[0]
209+
if input_type != dtypes.JSON_DTYPE:
210+
raise TypeError(
211+
"Input type must be a valid JSON object or JSON-formatted string type."
212+
+ f" Received type: {input_type}"
213+
)
214+
return pd.ArrowDtype(
215+
pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(dtypes.STRING_DTYPE))
216+
)
217+
218+
202219
@dataclasses.dataclass(frozen=True)
203220
class JSONDecode(base_ops.UnaryOp):
204221
name: typing.ClassVar[str] = "json_decode"
Collapse file

‎tests/system/small/bigquery/test_json.py‎

Copy file name to clipboardExpand all lines: tests/system/small/bigquery/test_json.py
+50Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,3 +434,53 @@ def test_to_json_string_from_struct():
434434
)
435435

436436
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
437+
438+
439+
def test_json_keys():
440+
json_data = [
441+
'{"name": "Alice", "age": 30}',
442+
'{"city": "New York", "country": "USA", "active": true}',
443+
"{}",
444+
'{"items": [1, 2, 3]}',
445+
]
446+
s = bpd.Series(json_data, dtype=dtypes.JSON_DTYPE)
447+
actual = bbq.json_keys(s)
448+
449+
expected_data_pandas = [
450+
["age", "name"],
451+
[
452+
"active",
453+
"city",
454+
"country",
455+
],
456+
[],
457+
["items"],
458+
]
459+
expected = bpd.Series(
460+
expected_data_pandas, dtype=pd.ArrowDtype(pa.list_(pa.string()))
461+
)
462+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
463+
464+
465+
def test_json_keys_with_max_depth():
466+
json_data = [
467+
'{"user": {"name": "Bob", "details": {"id": 123, "status": "approved"}}}',
468+
'{"user": {"name": "Charlie"}}',
469+
]
470+
s = bpd.Series(json_data, dtype=dtypes.JSON_DTYPE)
471+
actual = bbq.json_keys(s, max_depth=2)
472+
473+
expected_data_pandas = [
474+
["user", "user.details", "user.name"],
475+
["user", "user.name"],
476+
]
477+
expected = bpd.Series(
478+
expected_data_pandas, dtype=pd.ArrowDtype(pa.list_(pa.string()))
479+
)
480+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
481+
482+
483+
def test_json_keys_from_string_error():
484+
s = bpd.Series(['{"a": 1, "b": 2}', '{"c": 3}'])
485+
with pytest.raises(TypeError):
486+
bbq.json_keys(s)
Collapse file
+15Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`json_col`
4+
FROM `bigframes-dev`.`sqlglot_test`.`json_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
JSON_KEYS(`json_col`, NULL) AS `bfcol_1`,
9+
JSON_KEYS(`json_col`, 2) AS `bfcol_2`
10+
FROM `bfcte_0`
11+
)
12+
SELECT
13+
`bfcol_1` AS `json_keys`,
14+
`bfcol_2` AS `json_keys_w_max_depth`
15+
FROM `bfcte_1`
Collapse file

‎tests/unit/core/compile/sqlglot/expressions/test_json_ops.py‎

Copy file name to clipboardExpand all lines: tests/unit/core/compile/sqlglot/expressions/test_json_ops.py
+13Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,19 @@ def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot):
5252
snapshot.assert_match(sql, "out.sql")
5353

5454

55+
def test_json_keys(json_types_df: bpd.DataFrame, snapshot):
56+
col_name = "json_col"
57+
bf_df = json_types_df[[col_name]]
58+
59+
ops_map = {
60+
"json_keys": ops.JSONKeys().as_expr(col_name),
61+
"json_keys_w_max_depth": ops.JSONKeys(max_depth=2).as_expr(col_name),
62+
}
63+
64+
sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
65+
snapshot.assert_match(sql, "out.sql")
66+
67+
5568
def test_json_query(json_types_df: bpd.DataFrame, snapshot):
5669
col_name = "json_col"
5770
bf_df = json_types_df[[col_name]]

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.