Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 888f2ea

Browse filesBrowse files
committed
Add array_sample() and array_shuffle() functions.
These are useful in Monte Carlo applications. Martin Kalcher, reviewed/adjusted by Daniel Gustafsson and myself Discussion: https://postgr.es/m/9d160a44-7675-51e8-60cf-6d64b76db831@aboutsource.net
1 parent cd82e5c commit 888f2ea
Copy full SHA for 888f2ea

File tree

6 files changed

+284
-2
lines changed
Filter options

6 files changed

+284
-2
lines changed

‎doc/src/sgml/func.sgml

Copy file name to clipboardExpand all lines: doc/src/sgml/func.sgml
+43-1
Original file line numberDiff line numberDiff line change
@@ -16053,7 +16053,7 @@ SELECT js,
1605316053
js IS JSON ARRAY "array?"
1605416054
FROM (VALUES
1605516055
('123'), ('"abc"'), ('{"a": "b"}'), ('[1,2]'),('abc')) foo(js);
16056-
js | json? | scalar? | object? | array?
16056+
js | json? | scalar? | object? | array?
1605716057
------------+-------+---------+---------+--------
1605816058
123 | t | t | f | f
1605916059
"abc" | t | t | f | f
@@ -18777,6 +18777,48 @@ SELECT NULLIF(value, '(none)') ...
1877718777
</para></entry>
1877818778
</row>
1877918779

18780+
<row>
18781+
<entry role="func_table_entry"><para role="func_signature">
18782+
<indexterm>
18783+
<primary>array_sample</primary>
18784+
</indexterm>
18785+
<function>array_sample</function> ( <parameter>array</parameter> <type>anyarray</type>, <parameter>n</parameter> <type>integer</type> )
18786+
<returnvalue>anyarray</returnvalue>
18787+
</para>
18788+
<para>
18789+
Returns an array of <parameter>n</parameter> items randomly selected
18790+
from <parameter>array</parameter>. <parameter>n</parameter> may not
18791+
exceed the length of <parameter>array</parameter>'s first dimension.
18792+
If <parameter>array</parameter> is multi-dimensional,
18793+
an <quote>item</quote> is a slice having a given first subscript.
18794+
</para>
18795+
<para>
18796+
<literal>array_sample(ARRAY[1,2,3,4,5,6], 3)</literal>
18797+
<returnvalue>{2,6,1}</returnvalue>
18798+
</para>
18799+
<para>
18800+
<literal>array_sample(ARRAY[[1,2],[3,4],[5,6]], 2)</literal>
18801+
<returnvalue>{{5,6},{1,2}}</returnvalue>
18802+
</para></entry>
18803+
</row>
18804+
18805+
<row>
18806+
<entry role="func_table_entry"><para role="func_signature">
18807+
<indexterm>
18808+
<primary>array_shuffle</primary>
18809+
</indexterm>
18810+
<function>array_shuffle</function> ( <type>anyarray</type> )
18811+
<returnvalue>anyarray</returnvalue>
18812+
</para>
18813+
<para>
18814+
Randomly shuffles the first dimension of the array.
18815+
</para>
18816+
<para>
18817+
<literal>array_shuffle(ARRAY[[1,2],[3,4],[5,6]])</literal>
18818+
<returnvalue>{{5,6},{1,2},{3,4}}</returnvalue>
18819+
</para></entry>
18820+
</row>
18821+
1878018822
<row>
1878118823
<entry role="func_table_entry"><para role="func_signature">
1878218824
<indexterm id="function-array-to-string">

‎src/backend/utils/adt/array_userfuncs.c

Copy file name to clipboardExpand all lines: src/backend/utils/adt/array_userfuncs.c
+166
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "catalog/pg_type.h"
1616
#include "libpq/pqformat.h"
1717
#include "common/int.h"
18+
#include "common/pg_prng.h"
1819
#include "port/pg_bitutils.h"
1920
#include "utils/array.h"
2021
#include "utils/datum.h"
@@ -1525,3 +1526,168 @@ array_positions(PG_FUNCTION_ARGS)
15251526

15261527
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
15271528
}
1529+
1530+
/*
1531+
* array_shuffle_n
1532+
* Return a copy of array with n randomly chosen items.
1533+
*
1534+
* The number of items must not exceed the size of the first dimension of the
1535+
* array. We preserve the first dimension's lower bound if keep_lb,
1536+
* else it's set to 1. Lower-order dimensions are preserved in any case.
1537+
*
1538+
* NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info
1539+
* from the system catalogs, given only the elmtyp. However, the caller is
1540+
* in a better position to cache this info across multiple calls.
1541+
*/
1542+
static ArrayType *
1543+
array_shuffle_n(ArrayType *array, int n, bool keep_lb,
1544+
Oid elmtyp, TypeCacheEntry *typentry)
1545+
{
1546+
ArrayType *result;
1547+
int ndim,
1548+
*dims,
1549+
*lbs,
1550+
nelm,
1551+
nitem,
1552+
rdims[MAXDIM],
1553+
rlbs[MAXDIM];
1554+
int16 elmlen;
1555+
bool elmbyval;
1556+
char elmalign;
1557+
Datum *elms,
1558+
*ielms;
1559+
bool *nuls,
1560+
*inuls;
1561+
1562+
ndim = ARR_NDIM(array);
1563+
dims = ARR_DIMS(array);
1564+
lbs = ARR_LBOUND(array);
1565+
1566+
elmlen = typentry->typlen;
1567+
elmbyval = typentry->typbyval;
1568+
elmalign = typentry->typalign;
1569+
1570+
/* If the target array is empty, exit fast */
1571+
if (ndim < 1 || dims[0] < 1 || n < 1)
1572+
return construct_empty_array(elmtyp);
1573+
1574+
deconstruct_array(array, elmtyp, elmlen, elmbyval, elmalign,
1575+
&elms, &nuls, &nelm);
1576+
1577+
nitem = dims[0]; /* total number of items */
1578+
nelm /= nitem; /* number of elements per item */
1579+
1580+
Assert(n <= nitem); /* else it's caller error */
1581+
1582+
/*
1583+
* Shuffle array using Fisher-Yates algorithm. Scan the array and swap
1584+
* current item (nelm datums starting at ielms) with a randomly chosen
1585+
* later item (nelm datums starting at jelms) in each iteration. We can
1586+
* stop once we've done n iterations; then first n items are the result.
1587+
*/
1588+
ielms = elms;
1589+
inuls = nuls;
1590+
for (int i = 0; i < n; i++)
1591+
{
1592+
int j = (int) pg_prng_uint64_range(&pg_global_prng_state, i, nitem - 1) * nelm;
1593+
Datum *jelms = elms + j;
1594+
bool *jnuls = nuls + j;
1595+
1596+
/* Swap i'th and j'th items; advance ielms/inuls to next item */
1597+
for (int k = 0; k < nelm; k++)
1598+
{
1599+
Datum elm = *ielms;
1600+
bool nul = *inuls;
1601+
1602+
*ielms++ = *jelms;
1603+
*inuls++ = *jnuls;
1604+
*jelms++ = elm;
1605+
*jnuls++ = nul;
1606+
}
1607+
}
1608+
1609+
/* Set up dimensions of the result */
1610+
memcpy(rdims, dims, ndim * sizeof(int));
1611+
memcpy(rlbs, lbs, ndim * sizeof(int));
1612+
rdims[0] = n;
1613+
if (!keep_lb)
1614+
rlbs[0] = 1;
1615+
1616+
result = construct_md_array(elms, nuls, ndim, rdims, rlbs,
1617+
elmtyp, elmlen, elmbyval, elmalign);
1618+
1619+
pfree(elms);
1620+
pfree(nuls);
1621+
1622+
return result;
1623+
}
1624+
1625+
/*
1626+
* array_shuffle
1627+
*
1628+
* Returns an array with the same dimensions as the input array, with its
1629+
* first-dimension elements in random order.
1630+
*/
1631+
Datum
1632+
array_shuffle(PG_FUNCTION_ARGS)
1633+
{
1634+
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
1635+
ArrayType *result;
1636+
Oid elmtyp;
1637+
TypeCacheEntry *typentry;
1638+
1639+
/*
1640+
* There is no point in shuffling empty arrays or arrays with less than
1641+
* two items.
1642+
*/
1643+
if (ARR_NDIM(array) < 1 || ARR_DIMS(array)[0] < 2)
1644+
PG_RETURN_ARRAYTYPE_P(array);
1645+
1646+
elmtyp = ARR_ELEMTYPE(array);
1647+
typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
1648+
if (typentry == NULL || typentry->type_id != elmtyp)
1649+
{
1650+
typentry = lookup_type_cache(elmtyp, 0);
1651+
fcinfo->flinfo->fn_extra = (void *) typentry;
1652+
}
1653+
1654+
result = array_shuffle_n(array, ARR_DIMS(array)[0], true, elmtyp, typentry);
1655+
1656+
PG_RETURN_ARRAYTYPE_P(result);
1657+
}
1658+
1659+
/*
1660+
* array_sample
1661+
*
1662+
* Returns an array of n randomly chosen first-dimension elements
1663+
* from the input array.
1664+
*/
1665+
Datum
1666+
array_sample(PG_FUNCTION_ARGS)
1667+
{
1668+
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
1669+
int n = PG_GETARG_INT32(1);
1670+
ArrayType *result;
1671+
Oid elmtyp;
1672+
TypeCacheEntry *typentry;
1673+
int nitem;
1674+
1675+
nitem = (ARR_NDIM(array) < 1) ? 0 : ARR_DIMS(array)[0];
1676+
1677+
if (n < 0 || n > nitem)
1678+
ereport(ERROR,
1679+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1680+
errmsg("sample size must be between 0 and %d", nitem)));
1681+
1682+
elmtyp = ARR_ELEMTYPE(array);
1683+
typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
1684+
if (typentry == NULL || typentry->type_id != elmtyp)
1685+
{
1686+
typentry = lookup_type_cache(elmtyp, 0);
1687+
fcinfo->flinfo->fn_extra = (void *) typentry;
1688+
}
1689+
1690+
result = array_shuffle_n(array, n, false, elmtyp, typentry);
1691+
1692+
PG_RETURN_ARRAYTYPE_P(result);
1693+
}

‎src/include/catalog/catversion.h

Copy file name to clipboardExpand all lines: src/include/catalog/catversion.h
+1-1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/* yyyymmddN */
60-
#define CATALOG_VERSION_NO 202304051
60+
#define CATALOG_VERSION_NO 202304071
6161

6262
#endif

‎src/include/catalog/pg_proc.dat

Copy file name to clipboardExpand all lines: src/include/catalog/pg_proc.dat
+6
Original file line numberDiff line numberDiff line change
@@ -1717,6 +1717,12 @@
17171717
{ oid => '6172', descr => 'remove last N elements of array',
17181718
proname => 'trim_array', prorettype => 'anyarray',
17191719
proargtypes => 'anyarray int4', prosrc => 'trim_array' },
1720+
{ oid => '8464', descr => 'shuffle array',
1721+
proname => 'array_shuffle', provolatile => 'v', prorettype => 'anyarray',
1722+
proargtypes => 'anyarray', prosrc => 'array_shuffle' },
1723+
{ oid => '8465', descr => 'take samples from array',
1724+
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
1725+
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
17201726
{ oid => '3816', descr => 'array typanalyze',
17211727
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
17221728
proargtypes => 'internal', prosrc => 'array_typanalyze' },

‎src/test/regress/expected/arrays.out

Copy file name to clipboardExpand all lines: src/test/regress/expected/arrays.out
+54
Original file line numberDiff line numberDiff line change
@@ -2472,3 +2472,57 @@ SELECT trim_array(ARRAY[1, 2, 3], 10); -- fail
24722472
ERROR: number of elements to trim must be between 0 and 3
24732473
SELECT trim_array(ARRAY[]::int[], 1); -- fail
24742474
ERROR: number of elements to trim must be between 0 and 0
2475+
-- array_shuffle
2476+
SELECT array_shuffle('{1,2,3,4,5,6}'::int[]) <@ '{1,2,3,4,5,6}';
2477+
?column?
2478+
----------
2479+
t
2480+
(1 row)
2481+
2482+
SELECT array_shuffle('{1,2,3,4,5,6}'::int[]) @> '{1,2,3,4,5,6}';
2483+
?column?
2484+
----------
2485+
t
2486+
(1 row)
2487+
2488+
SELECT array_dims(array_shuffle('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]));
2489+
array_dims
2490+
-------------
2491+
[-1:2][2:3]
2492+
(1 row)
2493+
2494+
SELECT array_dims(array_shuffle('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[]));
2495+
array_dims
2496+
-----------------
2497+
[1:3][1:2][1:2]
2498+
(1 row)
2499+
2500+
-- array_sample
2501+
SELECT array_sample('{1,2,3,4,5,6}'::int[], 3) <@ '{1,2,3,4,5,6}';
2502+
?column?
2503+
----------
2504+
t
2505+
(1 row)
2506+
2507+
SELECT array_length(array_sample('{1,2,3,4,5,6}'::int[], 3), 1);
2508+
array_length
2509+
--------------
2510+
3
2511+
(1 row)
2512+
2513+
SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[], 3));
2514+
array_dims
2515+
------------
2516+
[1:3][2:3]
2517+
(1 row)
2518+
2519+
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
2520+
array_dims
2521+
-----------------
2522+
[1:2][1:2][1:2]
2523+
(1 row)
2524+
2525+
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
2526+
ERROR: sample size must be between 0 and 6
2527+
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
2528+
ERROR: sample size must be between 0 and 6

‎src/test/regress/sql/arrays.sql

Copy file name to clipboardExpand all lines: src/test/regress/sql/arrays.sql
+14
Original file line numberDiff line numberDiff line change
@@ -761,3 +761,17 @@ FROM
761761
SELECT trim_array(ARRAY[1, 2, 3], -1); -- fail
762762
SELECT trim_array(ARRAY[1, 2, 3], 10); -- fail
763763
SELECT trim_array(ARRAY[]::int[], 1); -- fail
764+
765+
-- array_shuffle
766+
SELECT array_shuffle('{1,2,3,4,5,6}'::int[]) <@ '{1,2,3,4,5,6}';
767+
SELECT array_shuffle('{1,2,3,4,5,6}'::int[]) @> '{1,2,3,4,5,6}';
768+
SELECT array_dims(array_shuffle('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]));
769+
SELECT array_dims(array_shuffle('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[]));
770+
771+
-- array_sample
772+
SELECT array_sample('{1,2,3,4,5,6}'::int[], 3) <@ '{1,2,3,4,5,6}';
773+
SELECT array_length(array_sample('{1,2,3,4,5,6}'::int[], 3), 1);
774+
SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[], 3));
775+
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
776+
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
777+
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.