diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 10cf433a8b..4c0027ff1c 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a -# created: 2025-01-09T12:01:16.422459506Z + digest: sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a +# created: 2025-01-16T15:24:11.364245182Z diff --git a/.kokoro/docker/docs/requirements.in b/.kokoro/docker/docs/requirements.in index 816817c672..586bd07037 100644 --- a/.kokoro/docker/docs/requirements.in +++ b/.kokoro/docker/docs/requirements.in @@ -1 +1,2 @@ nox +gcp-docuploader diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index f99a5c4aac..a9360a25b7 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -2,16 +2,124 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in +# pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.5.2 \ - --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ - --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb +argcomplete==3.5.3 \ + --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ + --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 # via nox +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a + # via google-auth +certifi==2024.12.14 \ + --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ + --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db + # via requests +charset-normalizer==3.4.1 \ + --hash=sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537 \ + --hash=sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa \ + --hash=sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a \ + --hash=sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294 \ + --hash=sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b \ + --hash=sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd \ + --hash=sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601 \ + --hash=sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd \ + --hash=sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4 \ + --hash=sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d \ + --hash=sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2 \ + --hash=sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313 \ + --hash=sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd \ + --hash=sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa \ + --hash=sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8 \ + --hash=sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1 \ + --hash=sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2 \ + --hash=sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496 \ + --hash=sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d \ + --hash=sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b \ + --hash=sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e \ + --hash=sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a \ + --hash=sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4 \ + --hash=sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca \ + --hash=sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78 \ + --hash=sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408 \ + --hash=sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5 \ + --hash=sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3 \ + --hash=sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f \ + --hash=sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a \ + --hash=sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765 \ + --hash=sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6 \ + --hash=sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146 \ + --hash=sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6 \ + --hash=sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9 \ + --hash=sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd \ + --hash=sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c \ + --hash=sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f \ + --hash=sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545 \ + --hash=sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176 \ + --hash=sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770 \ + --hash=sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824 \ + --hash=sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f \ + --hash=sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf \ + --hash=sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487 \ + --hash=sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d \ + --hash=sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd \ + --hash=sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b \ + --hash=sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534 \ + --hash=sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f \ + --hash=sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b \ + --hash=sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9 \ + --hash=sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd \ + --hash=sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125 \ + --hash=sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9 \ + --hash=sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de \ + --hash=sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11 \ + --hash=sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d \ + --hash=sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35 \ + --hash=sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f \ + --hash=sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda \ + --hash=sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7 \ + --hash=sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a \ + --hash=sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971 \ + --hash=sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8 \ + --hash=sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41 \ + --hash=sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d \ + --hash=sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f \ + --hash=sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757 \ + --hash=sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a \ + --hash=sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886 \ + --hash=sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77 \ + --hash=sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76 \ + --hash=sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247 \ + --hash=sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85 \ + --hash=sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb \ + --hash=sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7 \ + --hash=sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e \ + --hash=sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6 \ + --hash=sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037 \ + --hash=sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1 \ + --hash=sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e \ + --hash=sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807 \ + --hash=sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407 \ + --hash=sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c \ + --hash=sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12 \ + --hash=sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3 \ + --hash=sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089 \ + --hash=sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd \ + --hash=sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e \ + --hash=sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00 \ + --hash=sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616 + # via requests +click==8.1.8 \ + --hash=sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2 \ + --hash=sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a + # via gcp-docuploader colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 - # via nox + # via + # gcp-docuploader + # nox distlib==0.3.9 \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 @@ -20,10 +128,78 @@ filelock==3.16.1 \ --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv +gcp-docuploader==0.6.5 \ + --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ + --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea + # via -r requirements.in +google-api-core==2.24.0 \ + --hash=sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9 \ + --hash=sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.37.0 \ + --hash=sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00 \ + --hash=sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0 + # via + # google-api-core + # google-cloud-core + # google-cloud-storage +google-cloud-core==2.4.1 \ + --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ + --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 + # via google-cloud-storage +google-cloud-storage==2.19.0 \ + --hash=sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba \ + --hash=sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2 + # via gcp-docuploader +google-crc32c==1.6.0 \ + --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ + --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ + --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ + --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ + --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ + --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ + --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ + --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ + --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ + --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ + --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ + --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ + --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ + --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ + --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ + --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ + --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ + --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ + --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ + --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ + --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ + --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ + --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ + --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ + --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ + --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ + --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.2 \ + --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ + --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 + # via google-cloud-storage +googleapis-common-protos==1.66.0 \ + --hash=sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c \ + --hash=sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed + # via google-api-core +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 + # via requests nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in + # via -r requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,6 +208,51 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv +proto-plus==1.25.0 \ + --hash=sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961 \ + --hash=sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91 + # via google-api-core +protobuf==5.29.3 \ + --hash=sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f \ + --hash=sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7 \ + --hash=sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888 \ + --hash=sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620 \ + --hash=sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da \ + --hash=sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252 \ + --hash=sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a \ + --hash=sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e \ + --hash=sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107 \ + --hash=sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f \ + --hash=sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84 + # via + # gcp-docuploader + # google-api-core + # googleapis-common-protos + # proto-plus +pyasn1==0.6.1 \ + --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ + --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 \ + --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ + --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c + # via google-auth +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 + # via + # google-api-core + # google-cloud-storage +rsa==4.9 \ + --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ + --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 + # via google-auth +six==1.17.0 \ + --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ + --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 + # via gcp-docuploader tomli==2.2.1 \ --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ @@ -66,7 +287,11 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.0 \ - --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ - --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa +urllib3==2.3.0 \ + --hash=sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df \ + --hash=sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d + # via requests +virtualenv==20.28.1 \ + --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ + --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 # via nox diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 297b14ac90..2d5ba47549 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -20,10 +20,6 @@ export PYTHONUNBUFFERED=1 export PATH="${HOME}/.local/bin:${PATH}" -# Install nox -python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt -python3.10 -m nox --version - # build docs nox -s docs diff --git a/CHANGELOG.md b/CHANGELOG.md index b4bec86e9e..d8befd372d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,29 @@ [1]: https://pypi.org/project/bigframes/#history +## [1.33.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.32.0...v1.33.0) (2025-01-22) + + +### Features + +* Add `bigframes.bigquery.sql_scalar()` to apply SQL syntax on Series objects ([#1293](https://github.com/googleapis/python-bigquery-dataframes/issues/1293)) ([aa2f73a](https://github.com/googleapis/python-bigquery-dataframes/commit/aa2f73ad86e42c37d85ac867a3702eb6f2724b11)) +* Add unix_seconds, unix_millis and unix_micros for timestamp series. ([#1297](https://github.com/googleapis/python-bigquery-dataframes/issues/1297)) ([e4b0c8d](https://github.com/googleapis/python-bigquery-dataframes/commit/e4b0c8dd9edda48e07c433b99f44db82e1ea2054)) +* DataFrame.join supports Series other ([#1303](https://github.com/googleapis/python-bigquery-dataframes/issues/1303)) ([ee37a0a](https://github.com/googleapis/python-bigquery-dataframes/commit/ee37a0ab84e9415046e0e15955c14a1965b3a904)) +* Support array output in `remote_function` ([#1057](https://github.com/googleapis/python-bigquery-dataframes/issues/1057)) ([bdee173](https://github.com/googleapis/python-bigquery-dataframes/commit/bdee1734809589e5a7a3c23ee9cd2f967adf346f)) + + +### Bug Fixes + +* Dataframe sort_values Series input keyerror. ([#1285](https://github.com/googleapis/python-bigquery-dataframes/issues/1285)) ([5a2731b](https://github.com/googleapis/python-bigquery-dataframes/commit/5a2731bda8b2e9ea54bf582f823acdb6153dbb8f)) +* Fix read_gbq_function issue in dataframe apply method ([#1174](https://github.com/googleapis/python-bigquery-dataframes/issues/1174)) ([0318764](https://github.com/googleapis/python-bigquery-dataframes/commit/0318764030f6753a4e925c62612aabbb8e192fdf)) +* Series sort_index and sort_values now raises when axis!=0 ([#1294](https://github.com/googleapis/python-bigquery-dataframes/issues/1294)) ([94bc2f2](https://github.com/googleapis/python-bigquery-dataframes/commit/94bc2f2dc3514fffeac625592ec4b28c32957723)) + + +### Documentation + +* Add snippet to forecast future time series in the Forecast a single time series with a univariate model tutorial ([#1271](https://github.com/googleapis/python-bigquery-dataframes/issues/1271)) ([a687050](https://github.com/googleapis/python-bigquery-dataframes/commit/a687050b2a92bed1af9cb86a812b62f9a69cf959)) +* Update `bigframes.pandas.Series` docs ([#1273](https://github.com/googleapis/python-bigquery-dataframes/issues/1273)) ([0cac64f](https://github.com/googleapis/python-bigquery-dataframes/commit/0cac64f5ba3f3c9e8495fc5acb09d81c39d36de0)) + ## [1.32.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.31.0...v1.32.0) (2025-01-13) diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index ff52ae8d36..21e61bc4b1 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -22,6 +22,11 @@ array_length, array_to_string, ) +from bigframes.bigquery._operations.datetime import ( + unix_micros, + unix_millis, + unix_seconds, +) from bigframes.bigquery._operations.json import ( json_extract, json_extract_array, @@ -30,6 +35,7 @@ parse_json, ) from bigframes.bigquery._operations.search import create_vector_index, vector_search +from bigframes.bigquery._operations.sql import sql_scalar from bigframes.bigquery._operations.struct import struct __all__ = [ @@ -48,6 +54,12 @@ # search ops "create_vector_index", "vector_search", + # sql ops + "sql_scalar", # struct ops "struct", + # datetime ops + "unix_micros", + "unix_millis", + "unix_seconds", ] diff --git a/bigframes/bigquery/_operations/datetime.py b/bigframes/bigquery/_operations/datetime.py new file mode 100644 index 0000000000..f8767336dd --- /dev/null +++ b/bigframes/bigquery/_operations/datetime.py @@ -0,0 +1,97 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes import operations as ops +from bigframes import series + + +def unix_seconds(input: series.Series) -> series.Series: + """Converts a timestmap series to unix epoch seconds + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([pd.Timestamp("1970-01-02", tz="UTC"), pd.Timestamp("1970-01-03", tz="UTC")]) + >>> bbq.unix_seconds(s) + 0 86400 + 1 172800 + dtype: Int64 + + Args: + input (bigframes.pandas.Series): + A timestamp series. + + Returns: + bigframes.pandas.Series: A new series of unix epoch in seconds. + + """ + return input._apply_unary_op(ops.UnixSeconds()) + + +def unix_millis(input: series.Series) -> series.Series: + """Converts a timestmap series to unix epoch milliseconds + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([pd.Timestamp("1970-01-02", tz="UTC"), pd.Timestamp("1970-01-03", tz="UTC")]) + >>> bbq.unix_millis(s) + 0 86400000 + 1 172800000 + dtype: Int64 + + Args: + input (bigframes.pandas.Series): + A timestamp series. + + Returns: + bigframes.pandas.Series: A new series of unix epoch in milliseconds. + + """ + return input._apply_unary_op(ops.UnixMillis()) + + +def unix_micros(input: series.Series) -> series.Series: + """Converts a timestmap series to unix epoch microseconds + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([pd.Timestamp("1970-01-02", tz="UTC"), pd.Timestamp("1970-01-03", tz="UTC")]) + >>> bbq.unix_micros(s) + 0 86400000000 + 1 172800000000 + dtype: Int64 + + Args: + input (bigframes.pandas.Series): + A timestamp series. + + Returns: + bigframes.pandas.Series: A new series of unix epoch in microseconds. + + """ + return input._apply_unary_op(ops.UnixMicros()) diff --git a/bigframes/bigquery/_operations/search.py b/bigframes/bigquery/_operations/search.py index 35633fd4ce..9a1e4b5ac9 100644 --- a/bigframes/bigquery/_operations/search.py +++ b/bigframes/bigquery/_operations/search.py @@ -122,12 +122,12 @@ def vector_search( ... base_table="bigframes-dev.bigframes_tests_sys.base_table", ... column_to_search="my_embedding", ... query=search_query, - ... top_k=2) + ... top_k=2).sort_values("id") query_id embedding id my_embedding distance - 1 cat [3. 5.2] 5 [5. 5.4] 2.009975 0 dog [1. 2.] 1 [1. 2.] 0.0 - 0 dog [1. 2.] 4 [1. 3.2] 1.2 1 cat [3. 5.2] 2 [2. 4.] 1.56205 + 0 dog [1. 2.] 4 [1. 3.2] 1.2 + 1 cat [3. 5.2] 5 [5. 5.4] 2.009975 [4 rows x 5 columns] @@ -141,12 +141,12 @@ def vector_search( ... column_to_search="my_embedding", ... query=search_query, ... top_k=2, - ... use_brute_force=True) + ... use_brute_force=True).sort_values("id") embedding id my_embedding distance dog [1. 2.] 1 [1. 2.] 0.0 - cat [3. 5.2] 5 [5. 5.4] 2.009975 - dog [1. 2.] 4 [1. 3.2] 1.2 cat [3. 5.2] 2 [2. 4.] 1.56205 + dog [1. 2.] 4 [1. 3.2] 1.2 + cat [3. 5.2] 5 [5. 5.4] 2.009975 [4 rows x 4 columns] diff --git a/bigframes/bigquery/_operations/sql.py b/bigframes/bigquery/_operations/sql.py new file mode 100644 index 0000000000..7ccf63fcda --- /dev/null +++ b/bigframes/bigquery/_operations/sql.py @@ -0,0 +1,94 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""SQL escape hatch features.""" + +from __future__ import annotations + +from typing import Sequence + +import google.cloud.bigquery + +import bigframes.core.sql +import bigframes.dataframe +import bigframes.dtypes +import bigframes.operations +import bigframes.series + + +def sql_scalar( + sql_template: str, + columns: Sequence[bigframes.series.Series], +) -> bigframes.series.Series: + """Create a Series from a SQL template. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> import pandas as pd + >>> import pyarrow as pa + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(["1.5", "2.5", "3.5"]) + >>> s = s.astype(pd.ArrowDtype(pa.decimal128(38, 9))) + >>> bbq.sql_scalar("ROUND({0}, 0, 'ROUND_HALF_EVEN')", [s]) + 0 2.000000000 + 1 2.000000000 + 2 4.000000000 + dtype: decimal128(38, 9)[pyarrow] + + Args: + sql_template (str): + A SQL format string with Python-style {0} placeholders for each of + the Series objects in ``columns``. + columns (Sequence[bigframes.pandas.Series]): + Series objects representing the column inputs to the + ``sql_template``. Must contain at least one Series. + + Returns: + bigframes.pandas.Series: + A Series with the SQL applied. + + Raises: + ValueError: If ``columns`` is empty. + """ + if len(columns) == 0: + raise ValueError("Must provide at least one column in columns") + + # To integrate this into our expression trees, we need to get the output + # type, so we do some manual compilation and a dry run query to get that. + # Another benefit of this is that if there is a syntax error in the SQL + # template, then this will fail with an error earlier in the process, + # aiding users in debugging. + base_series = columns[0] + literals = [ + bigframes.dtypes.bigframes_dtype_to_literal(column.dtype) for column in columns + ] + literals_sql = [bigframes.core.sql.simple_literal(literal) for literal in literals] + + # Use the executor directly, because we want the original column IDs, not + # the user-friendly column names that block.to_sql_query() would produce. + select_sql = sql_template.format(*literals_sql) + dry_run_sql = f"SELECT {select_sql}" + bqclient = base_series._session.bqclient + job = bqclient.query( + dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True) + ) + _, output_type = bigframes.dtypes.convert_schema_field(job.schema[0]) + + op = bigframes.operations.SqlScalarOp( + _output_type=output_type, sql_template=sql_template + ) + return base_series._apply_nary_op(op, columns[1:]) diff --git a/bigframes/blob/_functions.py b/bigframes/blob/_functions.py index 4b3841252c..397a37ee92 100644 --- a/bigframes/blob/_functions.py +++ b/bigframes/blob/_functions.py @@ -18,7 +18,7 @@ import google.cloud.bigquery as bigquery -import bigframes +import bigframes.session import bigframes.session._io.bigquery as bf_io_bigquery _PYTHON_TO_BQ_TYPES = {int: "INT64", float: "FLOAT64", str: "STRING", bytes: "BYTES"} @@ -37,7 +37,7 @@ class TransformFunction: """Simple transform function class to deal with Python UDF.""" def __init__( - self, func_def: FunctionDef, session: bigframes.Session, connection: str + self, func_def: FunctionDef, session: bigframes.session.Session, connection: str ): self._func = func_def.func self._requirements = func_def.requirements diff --git a/bigframes/clients.py b/bigframes/clients.py index 8a2dbfed6c..c6e1d47909 100644 --- a/bigframes/clients.py +++ b/bigframes/clients.py @@ -21,6 +21,7 @@ from typing import cast, Optional import google.api_core.exceptions +import google.api_core.retry from google.cloud import bigquery_connection_v1, resourcemanager_v3 from google.iam.v1 import iam_policy_pb2, policy_pb2 diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index a88e365dcd..ee9917f619 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -220,8 +220,14 @@ def filter_by_id(self, predicate_id: str, keep_null: bool = False) -> ArrayValue def filter(self, predicate: ex.Expression): return ArrayValue(nodes.FilterNode(child=self.node, predicate=predicate)) - def order_by(self, by: Sequence[OrderingExpression]) -> ArrayValue: - return ArrayValue(nodes.OrderByNode(child=self.node, by=tuple(by))) + def order_by( + self, by: Sequence[OrderingExpression], is_total_order: bool = False + ) -> ArrayValue: + return ArrayValue( + nodes.OrderByNode( + child=self.node, by=tuple(by), is_total_order=is_total_order + ) + ) def reversed(self) -> ArrayValue: return ArrayValue(nodes.ReversedNode(child=self.node)) diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py index a7f75e7264..8ef3aa123b 100644 --- a/bigframes/core/block_transforms.py +++ b/bigframes/core/block_transforms.py @@ -43,19 +43,16 @@ def equals(block1: blocks.Block, block2: blocks.Block) -> bool: joined_block, (lmap, rmap) = block1.join(block2, how="outer") - equality_ids = [] + exprs = [] for lcol, rcol in zip(block1.value_columns, block2.value_columns): - lcolmapped = lmap[lcol] - rcolmapped = rmap[rcol] - joined_block, result_id = joined_block.project_expr( + exprs.append( ops.fillna_op.as_expr( - ops.eq_null_match_op.as_expr(lcolmapped, rcolmapped), ex.const(False) + ops.eq_null_match_op.as_expr(lmap[lcol], rmap[rcol]), ex.const(False) ) ) - equality_ids.append(result_id) - joined_block = joined_block.select_columns(equality_ids).with_column_labels( - list(range(len(equality_ids))) + joined_block = joined_block.project_exprs( + exprs, labels=list(range(len(exprs))), drop=True ) stacked_block = joined_block.stack() result = stacked_block.get_stat(stacked_block.value_columns[0], agg_ops.all_op) @@ -132,12 +129,16 @@ def quantile( window_spec=window, ) quantile_cols.append(quantile_col) - block, results = block.aggregate( + block, _ = block.aggregate( grouping_column_ids, - tuple((col, agg_ops.AnyValueOp()) for col in quantile_cols), + tuple( + ex.UnaryAggregation(agg_ops.AnyValueOp(), ex.deref(col)) + for col in quantile_cols + ), + column_labels=pd.Index(labels), dropna=dropna, ) - return block.select_columns(results).with_column_labels(labels) + return block def interpolate(block: blocks.Block, method: str = "linear") -> blocks.Block: @@ -358,7 +359,7 @@ def value_counts( block, dummy = block.create_constant(1) block, agg_ids = block.aggregate( by_column_ids=columns, - aggregations=[(dummy, agg_ops.count_op)], + aggregations=[ex.UnaryAggregation(agg_ops.count_op, ex.deref(dummy))], dropna=dropna, ) count_id = agg_ids[0] @@ -395,12 +396,12 @@ def pct_change(block: blocks.Block, periods: int = 1) -> blocks.Block: block, shift_columns = block.multi_apply_window_op( original_columns, agg_ops.ShiftOp(periods), window_spec=window_spec ) - result_ids = [] + exprs = [] for original_col, shifted_col in zip(original_columns, shift_columns): - block, change_id = block.apply_binary_op(original_col, shifted_col, ops.sub_op) - block, pct_change_id = block.apply_binary_op(change_id, shifted_col, ops.div_op) - result_ids.append(pct_change_id) - return block.select_columns(result_ids).with_column_labels(column_labels) + change_expr = ops.sub_op.as_expr(original_col, shifted_col) + pct_change_expr = ops.div_op.as_expr(change_expr, shifted_col) + exprs.append(pct_change_expr) + return block.project_exprs(exprs, labels=column_labels, drop=True) def rank( @@ -470,16 +471,23 @@ def rank( # Step 3: post processing: mask null values and cast to float if method in ["min", "max", "first", "dense"]: # Pandas rank always produces Float64, so must cast for aggregation types that produce ints - block = block.multi_apply_unary_op( - rownum_col_ids, ops.AsTypeOp(pd.Float64Dtype()) + return ( + block.select_columns(rownum_col_ids) + .multi_apply_unary_op(ops.AsTypeOp(pd.Float64Dtype())) + .with_column_labels(labels) ) if na_option == "keep": # For na_option "keep", null inputs must produce null outputs + exprs = [] for i in range(len(columns)): - block, null_const = block.create_constant(pd.NA, dtype=pd.Float64Dtype()) - block, rownum_col_ids[i] = block.apply_ternary_op( - null_const, nullity_col_ids[i], rownum_col_ids[i], ops.where_op + exprs.append( + ops.where_op.as_expr( + ex.const(pd.NA, dtype=pd.Float64Dtype()), + nullity_col_ids[i], + rownum_col_ids[i], + ) ) + return block.project_exprs(exprs, labels=labels, drop=True) return block.select_columns(rownum_col_ids).with_column_labels(labels) @@ -585,9 +593,18 @@ def skew( # counts, moment3 for each column aggregations = [] for i, col in enumerate(original_columns): - count_agg = (col, agg_ops.count_op) - moment3_agg = (delta3_ids[i], agg_ops.mean_op) - variance_agg = (col, agg_ops.PopVarOp()) + count_agg = ex.UnaryAggregation( + agg_ops.count_op, + ex.deref(col), + ) + moment3_agg = ex.UnaryAggregation( + agg_ops.mean_op, + ex.deref(delta3_ids[i]), + ) + variance_agg = ex.UnaryAggregation( + agg_ops.PopVarOp(), + ex.deref(col), + ) aggregations.extend([count_agg, moment3_agg, variance_agg]) block, agg_ids = block.aggregate( @@ -627,9 +644,9 @@ def kurt( # counts, moment4 for each column aggregations = [] for i, col in enumerate(original_columns): - count_agg = (col, agg_ops.count_op) - moment4_agg = (delta4_ids[i], agg_ops.mean_op) - variance_agg = (col, agg_ops.PopVarOp()) + count_agg = ex.UnaryAggregation(agg_ops.count_op, ex.deref(col)) + moment4_agg = ex.UnaryAggregation(agg_ops.mean_op, ex.deref(delta4_ids[i])) + variance_agg = ex.UnaryAggregation(agg_ops.PopVarOp(), ex.deref(col)) aggregations.extend([count_agg, moment4_agg, variance_agg]) block, agg_ids = block.aggregate( diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 522d1743ff..afc03dbdea 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -897,7 +897,6 @@ def multi_apply_window_op( def multi_apply_unary_op( self, - columns: typing.Sequence[str], op: Union[ops.UnaryOp, ex.Expression], ) -> Block: if isinstance(op, ops.UnaryOp): @@ -911,27 +910,37 @@ def multi_apply_unary_op( block = self - result_ids = [] - for col_id in columns: - label = self.col_id_to_label[col_id] - block, result_id = block.project_expr( - expr.bind_variables({input_varname: ex.deref(col_id)}), - label=label, - ) - block = block.copy_values(result_id, col_id) - result_ids.append(result_id) - block = block.drop_columns(result_ids) + exprs = [ + expr.bind_variables({input_varname: ex.deref(col_id)}) + for col_id in self.value_columns + ] + block = self.project_exprs(exprs, labels=self.column_labels, drop=True) + # Special case, we can preserve transpose cache for full-frame unary ops - if (self._transpose_cache is not None) and set(self.value_columns) == set( - columns - ): - transpose_columns = self._transpose_cache.value_columns - new_transpose_cache = self._transpose_cache.multi_apply_unary_op( - transpose_columns, op - ) + if self._transpose_cache is not None: + new_transpose_cache = self._transpose_cache.multi_apply_unary_op(op) block = block.with_transpose_cache(new_transpose_cache) return block + def project_exprs( + self, + exprs: Sequence[ex.Expression], + labels: Union[Sequence[Label], pd.Index], + drop=False, + ) -> Block: + new_array, _ = self.expr.compute_values(exprs) + if drop: + new_array = new_array.drop_columns(self.value_columns) + + return Block( + new_array, + index_columns=self.index_columns, + column_labels=labels + if drop + else self.column_labels.append(pd.Index(labels)), + index_labels=self._index_labels, + ) + def apply_window_op( self, column: str, @@ -1165,38 +1174,31 @@ def remap_f(x): def aggregate( self, by_column_ids: typing.Sequence[str] = (), - aggregations: typing.Sequence[ - typing.Tuple[ - str, typing.Union[agg_ops.UnaryAggregateOp, agg_ops.NullaryAggregateOp] - ] - ] = (), + aggregations: typing.Sequence[ex.Aggregation] = (), + column_labels: Optional[pd.Index] = None, *, dropna: bool = True, ) -> typing.Tuple[Block, typing.Sequence[str]]: """ - Apply aggregations to the block. Callers responsible for setting index column(s) after. + Apply aggregations to the block. Arguments: by_column_id: column id of the aggregation key, this is preserved through the transform and used as index. aggregations: input_column_id, operation tuples - as_index: if True, grouping keys will be index columns in result, otherwise they will be non-index columns. dropna: whether null keys should be dropped """ + if column_labels is None: + column_labels = pd.Index(range(len(aggregations))) + agg_specs = [ ( - ex.UnaryAggregation(operation, ex.deref(input_id)) - if isinstance(operation, agg_ops.UnaryAggregateOp) - else ex.NullaryAggregation(operation), + aggregation, guid.generate_guid(), ) - for input_id, operation in aggregations + for aggregation in aggregations ] output_col_ids = [agg_spec[1] for agg_spec in agg_specs] result_expr = self.expr.aggregate(agg_specs, by_column_ids, dropna=dropna) - aggregate_labels = self._get_labels_for_columns( - [agg[0] for agg in aggregations] - ) - names: typing.List[Label] = [] if len(by_column_ids) == 0: result_expr, label_id = result_expr.create_constant(0, pd.Int64Dtype()) @@ -1214,7 +1216,7 @@ def aggregate( Block( result_expr, index_columns=index_columns, - column_labels=aggregate_labels, + column_labels=column_labels, index_labels=names, ), output_col_ids, @@ -1552,7 +1554,10 @@ def pivot( column_ids.append(masked_id) block = block.select_columns(column_ids) - aggregations = [(col_id, agg_ops.AnyValueOp()) for col_id in column_ids] + aggregations = [ + ex.UnaryAggregation(agg_ops.AnyValueOp(), ex.deref(col_id)) + for col_id in column_ids + ] result_block, _ = block.aggregate( by_column_ids=self.index_columns, aggregations=aggregations, @@ -2279,18 +2284,15 @@ def _apply_binop( labels: pd.Index, reverse: bool = False, ) -> Block: - block = self - binop_result_ids = [] + exprs = [] for left_input, right_input in inputs: - expr = ( + exprs.append( op.as_expr(right_input, left_input) if reverse else op.as_expr(left_input, right_input) ) - block, result_col_id = block.project_expr(expr) - binop_result_ids.append(result_col_id) - return block.select_columns(binop_result_ids).with_column_labels(labels) + return self.project_exprs(exprs, labels=labels, drop=True) def join( self, diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py index f879eb3feb..526826495e 100644 --- a/bigframes/core/compile/compiled.py +++ b/bigframes/core/compile/compiled.py @@ -278,6 +278,24 @@ def to_sql( sql = ibis_bigquery.Backend().compile(self._to_ibis_expr()) return typing.cast(str, sql) + def with_total_order(self, by: Sequence[OrderingExpression]) -> OrderedIR: + return OrderedIR( + table=self._table, + columns=self._columns, + predicates=self._predicates, + ordering=TotalOrdering( + ordering_value_columns=tuple(by), + total_ordering_columns=frozenset( + map( + ex.DerefOp, + itertools.chain.from_iterable( + col.referenced_columns for col in by + ), + ) + ), + ), + ) + def row_count(self, name: str) -> OrderedIR: original_table = self._to_ibis_expr() ibis_table = original_table.agg( @@ -576,6 +594,13 @@ def __init__( def is_ordered_ir(self) -> bool: return True + @property + def order_non_deterministic(self) -> bool: + # ordering suffix non-determinism is ok, as rand() is used as suffix for auto-generated order keys. + # but must be resolved before or explode, otherwise the engine might pull the rand() evaluation above the join, + # creating inconsistencies + return not all(col.deterministic for col in self._ordering.all_ordering_columns) + @property def has_total_order(self) -> bool: return isinstance(self._ordering, TotalOrdering) @@ -722,6 +747,9 @@ def _uniform_sampling(self, fraction: float) -> OrderedIR: ) def explode(self, columns: typing.Sequence[ex.DerefOp]) -> OrderedIR: + if self.order_non_deterministic: + id = bigframes.core.guid.generate_guid() + return self.promote_offsets(id) table = self._to_ibis_expr(ordering_mode="unordered", expose_hidden_cols=True) column_ids = tuple(ref.id.sql for ref in columns) @@ -1229,7 +1257,14 @@ def _bake_ordering(self) -> OrderedIR: tuple(new_exprs), self._ordering.integer_encoding, self._ordering.string_encoding, - self._ordering.total_ordering_columns, + total_ordering_columns=frozenset( + map( + ex.DerefOp, + itertools.chain.from_iterable( + col.referenced_columns for col in new_exprs + ), + ) + ), ) else: new_ordering = RowOrdering( diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py index 78dff26228..9e87b4b4e8 100644 --- a/bigframes/core/compile/compiler.py +++ b/bigframes/core/compile/compiler.py @@ -26,7 +26,6 @@ import bigframes.core.compile.compiled as compiled import bigframes.core.compile.concat as concat_impl -import bigframes.core.compile.default_ordering as default_ordering import bigframes.core.compile.ibis_types import bigframes.core.compile.scalar_op_compiler import bigframes.core.compile.scalar_op_compiler as compile_scalar @@ -104,10 +103,7 @@ def set_output_names( ) def compile_ordered_ir(self, node: nodes.BigFrameNode) -> compiled.OrderedIR: - ir = typing.cast(compiled.OrderedIR, self.compile_node(node, True)) - if self.strict: - assert ir.has_total_order - return ir + return typing.cast(compiled.OrderedIR, self.compile_node(node, True)) def compile_unordered_ir(self, node: nodes.BigFrameNode) -> compiled.UnorderedIR: return typing.cast(compiled.UnorderedIR, self.compile_node(node, False)) @@ -274,17 +270,6 @@ def compile_read_table_ordered( for source_id, out_id in full_mapping.items() if source_id not in visible_column_mapping ) - elif self.strict: # In strict mode, we fallback to ordering by row hash - order_values = [ - col.name(guids.generate_guid()) - for col in default_ordering.gen_default_ordering( - ibis_table, use_double_hash=True - ) - ] - ordering = bf_ordering.TotalOrdering.from_primary_key( - [value.get_name() for value in order_values] - ) - hidden_columns = tuple(order_values) else: # In unstrict mode, don't generate total ordering from hashing as this is # expensive (prevent removing any columns from table scan) @@ -316,7 +301,11 @@ def compile_filter(self, node: nodes.FilterNode, ordered: bool = True): @_compile_node.register def compile_orderby(self, node: nodes.OrderByNode, ordered: bool = True): if ordered: - return self.compile_ordered_ir(node.child).order_by(node.by) + if node.is_total_order: + # more efficient, can just discard any previous ordering and get same result + return self.compile_unordered_ir(node.child).with_total_order(node.by) + else: + return self.compile_ordered_ir(node.child).order_by(node.by) else: return self.compile_unordered_ir(node.child) diff --git a/bigframes/core/compile/default_ordering.py b/bigframes/core/compile/default_ordering.py index 178857b34c..1a1350cfd6 100644 --- a/bigframes/core/compile/default_ordering.py +++ b/bigframes/core/compile/default_ordering.py @@ -18,7 +18,7 @@ from __future__ import annotations -from typing import cast +from typing import cast, Sequence import bigframes_vendored.ibis import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes @@ -28,7 +28,7 @@ import bigframes.core.guid as guid -def _convert_to_nonnull_string(column: ibis_types.Column) -> ibis_types.StringValue: +def _convert_to_nonnull_string(column: ibis_types.Value) -> ibis_types.StringValue: col_type = column.type() if ( col_type.is_numeric() @@ -60,29 +60,35 @@ def _convert_to_nonnull_string(column: ibis_types.Column) -> ibis_types.StringVa ) -def gen_default_ordering( - table: ibis_types.Table, use_double_hash: bool = True -) -> list[bigframes_vendored.ibis.Value]: +def gen_row_key( + columns: Sequence[ibis_types.Value], +) -> bigframes_vendored.ibis.Value: ordering_hash_part = guid.generate_guid("bigframes_ordering_") ordering_hash_part2 = guid.generate_guid("bigframes_ordering_") ordering_rand_part = guid.generate_guid("bigframes_ordering_") # All inputs into hash must be non-null or resulting hash will be null - str_values = list( - map(lambda col: _convert_to_nonnull_string(table[col]), table.columns) - ) + str_values = list(map(_convert_to_nonnull_string, columns)) full_row_str = ( str_values[0].concat(*str_values[1:]) if len(str_values) > 1 else str_values[0] ) - full_row_hash = full_row_str.hash().name(ordering_hash_part) + full_row_hash = ( + full_row_str.hash() + .name(ordering_hash_part) + .cast(ibis_dtypes.String(nullable=True)) + ) # By modifying value slightly, we get another hash uncorrelated with the first - full_row_hash_p2 = (full_row_str + "_").hash().name(ordering_hash_part2) + full_row_hash_p2 = ( + (full_row_str + "_") + .hash() + .name(ordering_hash_part2) + .cast(ibis_dtypes.String(nullable=True)) + ) # Used to disambiguate between identical rows (which will have identical hash) - random_value = bigframes_vendored.ibis.random().name(ordering_rand_part) - - order_values = ( - [full_row_hash, full_row_hash_p2, random_value] - if use_double_hash - else [full_row_hash, random_value] + random_value = ( + bigframes_vendored.ibis.random() + .name(ordering_rand_part) + .cast(ibis_dtypes.String(nullable=True)) ) - return order_values + + return full_row_hash.concat(full_row_hash_p2, random_value) diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py index a6d3949bc0..18f0834903 100644 --- a/bigframes/core/compile/ibis_types.py +++ b/bigframes/core/compile/ibis_types.py @@ -14,6 +14,7 @@ from __future__ import annotations import textwrap +import typing from typing import Any, cast, Dict, Iterable, Optional, Tuple, Union import warnings @@ -22,7 +23,7 @@ import bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes from bigframes_vendored.ibis.expr.datatypes.core import ( - dtype as python_type_to_bigquery_type, + dtype as python_type_to_ibis_type, ) import bigframes_vendored.ibis.expr.types as ibis_types import geopandas as gpd # type: ignore @@ -472,12 +473,24 @@ class UnsupportedTypeError(ValueError): def __init__(self, type_, supported_types): self.type = type_ self.supported_types = supported_types + super().__init__( + f"'{type_}' is not one of the supported types {supported_types}" + ) def ibis_type_from_python_type(t: type) -> ibis_dtypes.DataType: if t not in bigframes.dtypes.RF_SUPPORTED_IO_PYTHON_TYPES: raise UnsupportedTypeError(t, bigframes.dtypes.RF_SUPPORTED_IO_PYTHON_TYPES) - return python_type_to_bigquery_type(t) + return python_type_to_ibis_type(t) + + +def ibis_array_output_type_from_python_type(t: type) -> ibis_dtypes.DataType: + array_of = typing.get_args(t)[0] + if array_of not in bigframes.dtypes.RF_SUPPORTED_ARRAY_OUTPUT_PYTHON_TYPES: + raise UnsupportedTypeError( + array_of, bigframes.dtypes.RF_SUPPORTED_ARRAY_OUTPUT_PYTHON_TYPES + ) + return python_type_to_ibis_type(t) def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType: diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index d594cb3d68..4f670b51ca 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -26,6 +26,7 @@ import numpy as np import pandas as pd +import bigframes.core.compile.default_ordering import bigframes.core.compile.ibis_types import bigframes.core.expression as ex import bigframes.dtypes @@ -721,6 +722,21 @@ def strftime_op_impl(x: ibis_types.Value, op: ops.StrftimeOp): ) +@scalar_op_compiler.register_unary_op(ops.UnixSeconds) +def unix_seconds_op_impl(x: ibis_types.TimestampValue): + return x.epoch_seconds() + + +@scalar_op_compiler.register_unary_op(ops.UnixMicros) +def unix_micros_op_impl(x: ibis_types.TimestampValue): + return unix_micros(x) + + +@scalar_op_compiler.register_unary_op(ops.UnixMillis) +def unix_millis_op_impl(x: ibis_types.TimestampValue): + return unix_millis(x) + + @scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True) def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp): supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"] @@ -1219,6 +1235,11 @@ def to_json_string_op_impl(json_obj: ibis_types.Value): return to_json_string(json_obj=json_obj) +@scalar_op_compiler.register_unary_op(ops.JSONValue, pass_op=True) +def json_value_op_impl(x: ibis_types.Value, op: ops.JSONValue): + return json_value(json_obj=x, json_path=op.json_path) + + # Blob Ops @scalar_op_compiler.register_unary_op(ops.obj_fetch_metadata_op) def obj_fetch_metadata_op_impl(obj_ref: ibis_types.Value): @@ -1839,6 +1860,17 @@ def nary_remote_function_op_impl( return result +@scalar_op_compiler.register_nary_op(ops.SqlScalarOp, pass_op=True) +def sql_scalar_op_impl(*operands: ibis_types.Value, op: ops.SqlScalarOp): + return ibis_generic.SqlScalar( + op.sql_template, + values=tuple(typing.cast(ibis_generic.Value, expr.op()) for expr in operands), + output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( + op.output_type() + ), + ).to_expr() + + @scalar_op_compiler.register_nary_op(ops.StructOp, pass_op=True) def struct_op_impl( *values: ibis_types.Value, op: ops.StructOp @@ -1850,6 +1882,11 @@ def struct_op_impl( return ibis_types.struct(data) +@scalar_op_compiler.register_nary_op(ops.RowKey, pass_op=True) +def rowkey_op_impl(*values: ibis_types.Value, op: ops.RowKey) -> ibis_types.Value: + return bigframes.core.compile.default_ordering.gen_row_key(values) + + # Helpers def is_null(value) -> bool: # float NaN/inf should be treated as distinct from 'true' null values @@ -1865,6 +1902,16 @@ def timestamp(a: str) -> ibis_dtypes.timestamp: # type: ignore """Convert string to timestamp.""" +@ibis_udf.scalar.builtin +def unix_millis(a: ibis_dtypes.timestamp) -> int: # type: ignore + """Convert a timestamp to milliseconds""" + + +@ibis_udf.scalar.builtin +def unix_micros(a: ibis_dtypes.timestamp) -> int: # type: ignore + """Convert a timestamp to microseconds""" + + # Need these because ibis otherwise tries to do casts to int that can fail @ibis_udf.scalar.builtin(name="floor") def float_floor(a: float) -> float: @@ -1925,6 +1972,13 @@ def to_json_string( # type: ignore[empty-body] """Convert JSON to STRING.""" +@ibis_udf.scalar.builtin(name="json_value") +def json_value( # type: ignore[empty-body] + json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.String +) -> ibis_dtypes.String: + """Retrieve value of a JSON field as plain STRING.""" + + @ibis_udf.scalar.builtin(name="ML.DISTANCE") def vector_distance(vector1, vector2, type: str) -> ibis_dtypes.Float64: # type: ignore[empty-body] """Computes the distance between two vectors using specified type ("EUCLIDEAN", "MANHATTAN", or "COSINE")""" diff --git a/bigframes/core/compile/single_column.py b/bigframes/core/compile/single_column.py index ef554ad499..b903f9b552 100644 --- a/bigframes/core/compile/single_column.py +++ b/bigframes/core/compile/single_column.py @@ -50,6 +50,12 @@ def join_by_column_ordered( first the coalesced join keys, then, all the left columns, and finally, all the right columns. """ + if type == "right": + if left.order_non_deterministic: + right = right._bake_ordering() + else: + if left.order_non_deterministic: + left = left._bake_ordering() # Do not reset the generator l_value_mapping = dict(zip(left.column_ids, left.column_ids)) diff --git a/bigframes/core/expression.py b/bigframes/core/expression.py index 3b7828bbf0..2d561657cb 100644 --- a/bigframes/core/expression.py +++ b/bigframes/core/expression.py @@ -205,6 +205,10 @@ def bind_variables( def is_bijective(self) -> bool: return False + @property + def deterministic(self) -> bool: + return True + @property def is_identity(self) -> bool: """True for identity operation that does not transform input.""" @@ -409,4 +413,10 @@ def bind_refs( @property def is_bijective(self) -> bool: # TODO: Mark individual functions as bijective? - return False + return all(input.is_bijective for input in self.inputs) and self.op.is_bijective + + @property + def deterministic(self) -> bool: + return ( + all(input.deterministic for input in self.inputs) and self.op.deterministic + ) diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py index dfbe2ddea2..5fb5fb14d2 100644 --- a/bigframes/core/groupby/__init__.py +++ b/bigframes/core/groupby/__init__.py @@ -15,7 +15,7 @@ from __future__ import annotations import typing -from typing import Sequence, Union +from typing import Sequence, Tuple, Union import bigframes_vendored.constants as constants import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby @@ -26,6 +26,7 @@ import bigframes.core as core import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks +import bigframes.core.expression import bigframes.core.ordering as order import bigframes.core.utils as utils import bigframes.core.validations as validations @@ -334,24 +335,19 @@ def agg(self, func=None, **kwargs) -> typing.Union[df.DataFrame, series.Series]: return self._agg_named(**kwargs) def _agg_string(self, func: str) -> df.DataFrame: - aggregations = [ - (col_id, agg_ops.lookup_agg_func(func)) - for col_id in self._aggregated_columns() - ] + ids, labels = self._aggregated_columns() + aggregations = [agg(col_id, agg_ops.lookup_agg_func(func)) for col_id in ids] agg_block, _ = self._block.aggregate( by_column_ids=self._by_col_ids, aggregations=aggregations, dropna=self._dropna, + column_labels=labels, ) dataframe = df.DataFrame(agg_block) return dataframe if self._as_index else self._convert_index(dataframe) def _agg_dict(self, func: typing.Mapping) -> df.DataFrame: - aggregations: typing.List[ - typing.Tuple[ - str, typing.Union[agg_ops.UnaryAggregateOp, agg_ops.NullaryAggregateOp] - ] - ] = [] + aggregations: typing.List[bigframes.core.expression.Aggregation] = [] column_labels = [] want_aggfunc_level = any(utils.is_list_like(aggs) for aggs in func.values()) @@ -362,7 +358,7 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame: funcs_for_id if utils.is_list_like(funcs_for_id) else [funcs_for_id] ) for f in func_list: - aggregations.append((col_id, agg_ops.lookup_agg_func(f))) + aggregations.append(agg(col_id, agg_ops.lookup_agg_func(f))) column_labels.append(label) agg_block, _ = self._block.aggregate( by_column_ids=self._by_col_ids, @@ -373,7 +369,10 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame: agg_block = agg_block.with_column_labels( utils.combine_indices( pd.Index(column_labels), - pd.Index(agg[1].name for agg in aggregations), + pd.Index( + typing.cast(agg_ops.AggregateOp, agg.op).name + for agg in aggregations + ), ) ) else: @@ -382,34 +381,21 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame: return dataframe if self._as_index else self._convert_index(dataframe) def _agg_list(self, func: typing.Sequence) -> df.DataFrame: + ids, labels = self._aggregated_columns() aggregations = [ - (col_id, agg_ops.lookup_agg_func(f)) - for col_id in self._aggregated_columns() - for f in func + agg(col_id, agg_ops.lookup_agg_func(f)) for col_id in ids for f in func ] if self._block.column_labels.nlevels > 1: # Restructure MultiIndex for proper format: (idx1, idx2, func) # rather than ((idx1, idx2), func). - aggregated_columns = pd.MultiIndex.from_tuples( - [ - self._block.col_id_to_label[col_id] - for col_id in self._aggregated_columns() - ], - names=[*self._block.column_labels.names], - ).to_frame(index=False) - column_labels = [ - tuple(col_id) + (f,) - for col_id in aggregated_columns.to_numpy() - for f in func - ] - else: - column_labels = [ - (self._block.col_id_to_label[col_id], f) - for col_id in self._aggregated_columns() + tuple(label) + (f,) + for label in labels.to_frame(index=False).to_numpy() for f in func ] + else: # Single-level index + column_labels = [(label, f) for label in labels for f in func] agg_block, _ = self._block.aggregate( by_column_ids=self._by_col_ids, @@ -435,7 +421,7 @@ def _agg_named(self, **kwargs) -> df.DataFrame: if not isinstance(v, tuple) or (len(v) != 2): raise TypeError("kwargs values must be 2-tuples of column, aggfunc") col_id = self._resolve_label(v[0]) - aggregations.append((col_id, agg_ops.lookup_agg_func(v[1]))) + aggregations.append(agg(col_id, agg_ops.lookup_agg_func(v[1]))) column_labels.append(k) agg_block, _ = self._block.aggregate( by_column_ids=self._by_col_ids, @@ -470,15 +456,19 @@ def _raise_on_non_numeric(self, op: str): ) return self - def _aggregated_columns(self, numeric_only: bool = False) -> typing.Sequence[str]: + def _aggregated_columns( + self, numeric_only: bool = False + ) -> Tuple[typing.Sequence[str], pd.Index]: valid_agg_cols: list[str] = [] - for col_id in self._selected_cols: + offsets: list[int] = [] + for i, col_id in enumerate(self._block.value_columns): is_numeric = ( self._column_type(col_id) in dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE ) - if is_numeric or not numeric_only: + if (col_id in self._selected_cols) and (is_numeric or not numeric_only): + offsets.append(i) valid_agg_cols.append(col_id) - return valid_agg_cols + return valid_agg_cols, self._block.column_labels.take(offsets) def _column_type(self, col_id: str) -> dtypes.Dtype: col_offset = self._block.value_columns.index(col_id) @@ -488,11 +478,12 @@ def _column_type(self, col_id: str) -> dtypes.Dtype: def _aggregate_all( self, aggregate_op: agg_ops.UnaryAggregateOp, numeric_only: bool = False ) -> df.DataFrame: - aggregated_col_ids = self._aggregated_columns(numeric_only=numeric_only) - aggregations = [(col_id, aggregate_op) for col_id in aggregated_col_ids] + aggregated_col_ids, labels = self._aggregated_columns(numeric_only=numeric_only) + aggregations = [agg(col_id, aggregate_op) for col_id in aggregated_col_ids] result_block, _ = self._block.aggregate( by_column_ids=self._by_col_ids, aggregations=aggregations, + column_labels=labels, dropna=self._dropna, ) dataframe = df.DataFrame(result_block) @@ -508,7 +499,7 @@ def _apply_window_op( window_spec = window or window_specs.cumulative_rows( grouping_keys=tuple(self._by_col_ids) ) - columns = self._aggregated_columns(numeric_only=numeric_only) + columns, _ = self._aggregated_columns(numeric_only=numeric_only) block, result_ids = self._block.multi_apply_window_op( columns, op, window_spec=window_spec ) @@ -639,11 +630,11 @@ def prod(self, *args) -> series.Series: def agg(self, func=None) -> typing.Union[df.DataFrame, series.Series]: column_names: list[str] = [] if isinstance(func, str): - aggregations = [(self._value_column, agg_ops.lookup_agg_func(func))] + aggregations = [agg(self._value_column, agg_ops.lookup_agg_func(func))] column_names = [func] elif utils.is_list_like(func): aggregations = [ - (self._value_column, agg_ops.lookup_agg_func(f)) for f in func + agg(self._value_column, agg_ops.lookup_agg_func(f)) for f in func ] column_names = list(func) else: @@ -756,7 +747,7 @@ def expanding(self, min_periods: int = 1) -> windows.Window: def _aggregate(self, aggregate_op: agg_ops.UnaryAggregateOp) -> series.Series: result_block, _ = self._block.aggregate( self._by_col_ids, - ((self._value_column, aggregate_op),), + (agg(self._value_column, aggregate_op),), dropna=self._dropna, ) @@ -781,3 +772,13 @@ def _apply_window_op( window_spec=window_spec, ) return series.Series(block.select_column(result_id)) + + +def agg(input: str, op: agg_ops.AggregateOp) -> bigframes.core.expression.Aggregation: + if isinstance(op, agg_ops.UnaryAggregateOp): + return bigframes.core.expression.UnaryAggregation( + op, bigframes.core.expression.deref(input) + ) + else: + assert isinstance(op, agg_ops.NullaryAggregateOp) + return bigframes.core.expression.NullaryAggregation(op) diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py index cc7d4ad58d..36aa6682bd 100644 --- a/bigframes/core/log_adapter.py +++ b/bigframes/core/log_adapter.py @@ -13,15 +13,21 @@ # limitations under the License. import functools +import inspect import threading from typing import List +from google.cloud import bigquery +import pandas + _lock = threading.Lock() # The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements), # but leave a few spare for internal labels to be added. # See internal issue 386825477. MAX_LABELS_COUNT = 64 - 8 +PANDAS_API_TRACKING_TASK = "pandas_api_tracking" +PANDAS_PARAM_TRACKING_TASK = "pandas_param_tracking" _api_methods: List = [] _excluded_methods = ["__setattr__", "__getattr__"] @@ -30,11 +36,86 @@ _call_stack: List = [] +def submit_pandas_labels( + bq_client: bigquery.Client, + class_name: str, + method_name: str, + args=(), + kwargs={}, + task: str = PANDAS_API_TRACKING_TASK, +): + """ + Submits usage of API to BigQuery using a simulated failed query. + + This function is designed to capture and log details about the usage of pandas methods, + including class and method names, the count of positional arguments, and any keyword + arguments that match the method's signature. To avoid incurring costs, it simulates a + query execution using a query with syntax errors. + + Args: + bq_client (bigquery.Client): The client used to interact with BigQuery. + class_name (str): The name of the pandas class being used. + method_name (str): The name of the method being invoked. + args (tuple): The positional arguments passed to the method. + kwargs (dict): The keyword arguments passed to the method. + task (str): The specific task type for the logging event: + - 'PANDAS_API_TRACKING_TASK': Indicates that the unimplemented feature is a method. + - 'PANDAS_PARAM_TRACKING_TASK': Indicates that the unimplemented feature is a + parameter of a method. + """ + labels_dict = { + "task": task, + "class_name": class_name.lower(), + "method_name": method_name.lower(), + "args_count": len(args), + } + + if hasattr(pandas, class_name): + cls = getattr(pandas, class_name) + else: + return + + if hasattr(cls, method_name): + method = getattr(cls, method_name) + else: + return + + if kwargs: + # Iterate through the keyword arguments and add them to the labels dictionary if they + # are parameters that are implemented in pandas and the maximum label count has not been reached. + signature = inspect.signature(method) + param_names = [param.name for param in signature.parameters.values()] + + idx = 0 + for key in kwargs.keys(): + if len(labels_dict) >= MAX_LABELS_COUNT: + break + if key in param_names: + labels_dict[f"kwargs_{idx}"] = key.lower() + idx += 1 + + # If this log is for tracking unimplemented parameters and no keyword arguments were + # provided, skip logging. + if len(labels_dict) == 4 and task == PANDAS_PARAM_TRACKING_TASK: + return + + # Run a query with syntax error to avoid cost. + query = "SELECT COUNT(x FROM data_table—" + job_config = bigquery.QueryJobConfig(labels=labels_dict) + bq_client.query(query, job_config=job_config) + + def class_logger(decorated_cls): """Decorator that adds logging functionality to each method of the class.""" for attr_name, attr_value in decorated_cls.__dict__.items(): if callable(attr_value) and (attr_name not in _excluded_methods): - setattr(decorated_cls, attr_name, method_logger(attr_value, decorated_cls)) + if isinstance(attr_value, staticmethod): + # TODO(b/390244171) support for staticmethod + pass + else: + setattr( + decorated_cls, attr_name, method_logger(attr_value, decorated_cls) + ) elif isinstance(attr_value, property): setattr( decorated_cls, attr_name, property_logger(attr_value, decorated_cls) @@ -46,7 +127,7 @@ def method_logger(method, decorated_cls): """Decorator that adds logging functionality to a method.""" @functools.wraps(method) - def wrapper(*args, **kwargs): + def wrapper(self, *args, **kwargs): class_name = decorated_cls.__name__ # Access decorated class name api_method_name = str(method.__name__) full_method_name = f"{class_name.lower()}-{api_method_name}" @@ -58,7 +139,23 @@ def wrapper(*args, **kwargs): _call_stack.append(full_method_name) try: - return method(*args, **kwargs) + return method(self, *args, **kwargs) + except (NotImplementedError, TypeError) as e: + # Log method parameters that are implemented in pandas but either missing (TypeError) + # or not fully supported (NotImplementedError) in BigFrames. + # Logging is currently supported only when we can access the bqclient through + # self._block.expr.session.bqclient. Also, to avoid generating multiple queries + # because of internal calls, we log only when the method is directly invoked. + if hasattr(self, "_block") and len(_call_stack) == 1: + submit_pandas_labels( + self._block.expr.session.bqclient, + class_name, + api_method_name, + args, + kwargs, + task=PANDAS_PARAM_TRACKING_TASK, + ) + raise e finally: _call_stack.pop() diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 244f1e7751..fe79da2bf6 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -976,6 +976,9 @@ def remap_refs(self, mappings: Mapping[bfet_ids.ColumnId, bfet_ids.ColumnId]): @dataclasses.dataclass(frozen=True, eq=False) class OrderByNode(UnaryNode): by: Tuple[OrderingExpression, ...] + # This is an optimization, if true, can discard previous orderings. + # might be a total ordering even if false + is_total_order: bool = False @property def variables_introduced(self) -> int: diff --git a/bigframes/core/ordering.py b/bigframes/core/ordering.py index 059987d597..357cc8145c 100644 --- a/bigframes/core/ordering.py +++ b/bigframes/core/ordering.py @@ -59,6 +59,10 @@ class OrderingExpression: def referenced_columns(self) -> Set[ids.ColumnId]: return set(self.scalar_expression.column_references) + @property + def deterministic(self) -> bool: + return self.scalar_expression.deterministic + def remap_column_refs( self, mapping: Mapping[ids.ColumnId, ids.ColumnId], diff --git a/bigframes/core/sql.py b/bigframes/core/sql.py index ae0a0de2aa..f4de177f37 100644 --- a/bigframes/core/sql.py +++ b/bigframes/core/sql.py @@ -18,10 +18,13 @@ """ import datetime +import decimal import json import math from typing import cast, Collection, Iterable, Mapping, Optional, TYPE_CHECKING, Union +import shapely # type: ignore + import bigframes.core.compile.googlesql as googlesql if TYPE_CHECKING: @@ -31,12 +34,16 @@ ### Writing SQL Values (literals, column references, table references, etc.) -def simple_literal(value: str | int | bool | float | datetime.datetime): +def simple_literal(value: bytes | str | int | bool | float | datetime.datetime | None): """Return quoted input string.""" # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#literals - if isinstance(value, str): + if value is None: + return "NULL" + elif isinstance(value, str): # Single quoting seems to work nicer with ibis than double quoting return f"'{googlesql._escape_chars(value)}'" + elif isinstance(value, bytes): + return repr(value) elif isinstance(value, (bool, int)): return str(value) elif isinstance(value, float): @@ -48,8 +55,21 @@ def simple_literal(value: str | int | bool | float | datetime.datetime): if value == -math.inf: return 'CAST("-inf" as FLOAT)' return str(value) - if isinstance(value, datetime.datetime): - return f"TIMESTAMP('{value.isoformat()}')" + # Check datetime first as it is a subclass of date + elif isinstance(value, datetime.datetime): + if value.tzinfo is None: + return f"DATETIME('{value.isoformat()}')" + else: + return f"TIMESTAMP('{value.isoformat()}')" + elif isinstance(value, datetime.date): + return f"DATE('{value.isoformat()}')" + elif isinstance(value, datetime.time): + return f"TIME(DATETIME('1970-01-01 {value.isoformat()}'))" + elif isinstance(value, shapely.Geometry): + return f"ST_GEOGFROMTEXT({simple_literal(shapely.to_wkt(value))})" + elif isinstance(value, decimal.Decimal): + # TODO: disambiguate BIGNUMERIC based on scale and/or precision + return f"CAST('{str(value)}' AS NUMERIC)" else: raise ValueError(f"Cannot produce literal for {value}") diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 01e9bd6308..ce57661919 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -19,6 +19,7 @@ import datetime import inspect import itertools +import json import re import sys import textwrap @@ -48,7 +49,6 @@ import pyarrow import tabulate -import bigframes import bigframes._config.display_options as display_options import bigframes.constants import bigframes.core @@ -180,9 +180,7 @@ def __init__( if columns: block = block.select_columns(list(columns)) # type:ignore if dtype: - block = block.multi_apply_unary_op( - block.value_columns, ops.AsTypeOp(to_type=dtype) - ) + block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=dtype)) self._block = block else: @@ -645,6 +643,9 @@ def __getattr__(self, key: str): return self.__getitem__(key) if hasattr(pandas.DataFrame, key): + log_adapter.submit_pandas_labels( + self._block.expr.session.bqclient, self.__class__.__name__, key + ) raise AttributeError( textwrap.dedent( f""" @@ -741,16 +742,14 @@ def _repr_html_(self) -> str: df = self.copy() if bigframes.options.experiments.blob: - import bigframes.bigquery as bbq - blob_cols = [ col for col in df.columns if df[col].dtype == bigframes.dtypes.OBJ_REF_DTYPE ] for col in blob_cols: - df[col] = df[col]._apply_unary_op(ops.ObjGetAccessUrl(mode="R")) - df[col] = bbq.json_extract(df[col], "$.access_urls.read_url") + # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. + df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) # TODO(swast): pass max_columns and get the true column count back. Maybe # get 1 more column than we have requested so that pandas can add the @@ -767,11 +766,21 @@ def _repr_html_(self) -> str: # Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas. if bigframes.options.experiments.blob: - def url_to_image_html(url: str) -> str: - # url is a json string, which already contains double-quotes "" - return f"" + def obj_ref_rt_to_html(obj_ref_rt) -> str: + obj_ref_rt_json = json.loads(obj_ref_rt) + content_type = typing.cast( + str, + obj_ref_rt_json["objectref"]["details"]["gcs_metadata"][ + "content_type" + ], + ) + if content_type.startswith("image"): + url = obj_ref_rt_json["access_urls"]["read_url"] + return f'' + + return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}' - formatters = {blob_col: url_to_image_html for blob_col in blob_cols} + formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols} # set max_colwidth so not to truncate the image url with pandas.option_context("display.max_colwidth", None): @@ -844,9 +853,7 @@ def _apply_scalar_binop( left_input=ex.free_var("var1"), right_input=ex.const(other), ) - return DataFrame( - self._block.multi_apply_unary_op(self._block.value_columns, expr) - ) + return DataFrame(self._block.multi_apply_unary_op(expr)) def _apply_series_binop_axis_0( self, @@ -1976,6 +1983,11 @@ def sort_values( kind: str = "quicksort", na_position: typing.Literal["first", "last"] = "last", ) -> DataFrame: + if isinstance(by, (bigframes.series.Series, indexes.Index, DataFrame)): + raise KeyError( + f"Invalid key type: {type(by).__name__}. Please provide valid column name(s)." + ) + if na_position not in {"first", "last"}: raise ValueError("Param na_position must be one of 'first' or 'last'") @@ -2394,9 +2406,7 @@ def dropna( result = result.reset_index() return DataFrame(result) else: - isnull_block = self._block.multi_apply_unary_op( - self._block.value_columns, ops.isnull_op - ) + isnull_block = self._block.multi_apply_unary_op(ops.isnull_op) if how == "any": null_locations = DataFrame(isnull_block).any().to_pandas() else: # 'all' @@ -3033,8 +3043,15 @@ def merge( return DataFrame(block) def join( - self, other: DataFrame, *, on: Optional[str] = None, how: str = "left" + self, + other: Union[DataFrame, bigframes.series.Series], + *, + on: Optional[str] = None, + how: str = "left", ) -> DataFrame: + if isinstance(other, bigframes.series.Series): + other = other.to_frame() + left, right = self, other if not left.columns.intersection(right.columns).empty: @@ -3822,7 +3839,7 @@ def to_orc(self, path=None, **kwargs) -> bytes | None: return as_pandas_default_index.to_orc(path, **kwargs) def _apply_unary_op(self, operation: ops.UnaryOp) -> DataFrame: - block = self._block.multi_apply_unary_op(self._block.value_columns, operation) + block = self._block.multi_apply_unary_op(operation) return DataFrame(block) def _map_clustering_columns( @@ -3917,6 +3934,10 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame: ) def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): + # In Bigframes remote function, DataFrame '.apply' method is specifically + # designed to work with row-wise or column-wise operations, where the input + # to the applied function should be a Series, not a scalar. + if utils.get_axis_number(axis) == 1: msg = "axis=1 scenario is in preview." warnings.warn(msg, category=bfe.PreviewWarning) @@ -4009,10 +4030,34 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): ops.NaryRemoteFunctionOp(func=func), series_list[1:] ) result_series.name = None + + # if the output is an array, reconstruct it from the json serialized + # string form + if bigframes.dtypes.is_array_like(func.output_dtype): + import bigframes.bigquery as bbq + + result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype( + func.output_dtype.pyarrow_dtype.value_type + ) + result_series = bbq.json_extract_string_array( + result_series, value_dtype=result_dtype + ) + return result_series + # At this point column-wise or element-wise remote function operation will + # be performed (not supported). + if hasattr(func, "bigframes_remote_function"): + raise NotImplementedError( + "BigFrames DataFrame '.apply()' does not support remote function " + "for column-wise (i.e. with axis=0) operations, please use a " + "regular python function instead. For element-wise operations of " + "the remote function, please use '.map()'." + ) + # Per-column apply results = {name: func(col, *args, **kwargs) for name, col in self.items()} + if all( [ isinstance(val, bigframes.series.Series) or utils.is_list_like(val) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 6e179225ea..3da3fa24f3 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -18,7 +18,7 @@ import datetime import decimal import typing -from typing import Dict, List, Literal, Union +from typing import Any, Dict, List, Literal, Union import bigframes_vendored.constants as constants import geopandas as gpd # type: ignore @@ -26,6 +26,7 @@ import numpy as np import pandas as pd import pyarrow as pa +import shapely # type: ignore # Type hints for Pandas dtypes supported by BigQuery DataFrame Dtype = Union[ @@ -450,6 +451,74 @@ def bigframes_dtype_to_arrow_dtype( ) +def bigframes_dtype_to_literal( + bigframes_dtype: Dtype, +) -> Any: + """Create a representative literal value for a bigframes dtype. + + The inverse of infer_literal_type(). + """ + if isinstance(bigframes_dtype, pd.ArrowDtype): + arrow_type = bigframes_dtype.pyarrow_dtype + return arrow_type_to_literal(arrow_type) + + if isinstance(bigframes_dtype, pd.Float64Dtype): + return 1.0 + if isinstance(bigframes_dtype, pd.Int64Dtype): + return 1 + if isinstance(bigframes_dtype, pd.BooleanDtype): + return True + if isinstance(bigframes_dtype, pd.StringDtype): + return "string" + if isinstance(bigframes_dtype, gpd.array.GeometryDtype): + return shapely.Point((0, 0)) + + raise ValueError( + f"No literal conversion for {bigframes_dtype}. {constants.FEEDBACK_LINK}" + ) + + +def arrow_type_to_literal( + arrow_type: pa.DataType, +) -> Any: + """Create a representative literal value for an arrow type.""" + if pa.types.is_list(arrow_type): + return [arrow_type_to_literal(arrow_type.value_type)] + if pa.types.is_struct(arrow_type): + return { + field.name: arrow_type_to_literal(field.type) for field in arrow_type.fields + } + if pa.types.is_string(arrow_type): + return "string" + if pa.types.is_binary(arrow_type): + return b"bytes" + if pa.types.is_floating(arrow_type): + return 1.0 + if pa.types.is_integer(arrow_type): + return 1 + if pa.types.is_boolean(arrow_type): + return True + if pa.types.is_date(arrow_type): + return datetime.date(2025, 1, 1) + if pa.types.is_timestamp(arrow_type): + return datetime.datetime( + 2025, + 1, + 1, + 1, + 1, + tzinfo=datetime.timezone.utc if arrow_type.tz is not None else None, + ) + if pa.types.is_decimal(arrow_type): + return decimal.Decimal("1.0") + if pa.types.is_time(arrow_type): + return datetime.time(1, 1, 1) + + raise ValueError( + f"No literal conversion for {arrow_type}. {constants.FEEDBACK_LINK}" + ) + + def infer_literal_type(literal) -> typing.Optional[Dtype]: # Maybe also normalize literal to canonical python representation to remove this burden from compilers? if pd.api.types.is_list_like(literal): @@ -701,6 +770,13 @@ def lcd_type_or_throw(dtype1: Dtype, dtype2: Dtype) -> Dtype: # https://cloud.google.com/bigquery/docs/remote-functions#limitations RF_SUPPORTED_IO_PYTHON_TYPES = {bool, bytes, float, int, str} +# Support array output types in BigQuery DataFrames remote functions even though +# it is not currently (2024-10-06) supported in BigQuery remote functions. +# https://cloud.google.com/bigquery/docs/remote-functions#limitations +# TODO(b/284515241): remove this special handling when BigQuery remote functions +# support array. +RF_SUPPORTED_ARRAY_OUTPUT_PYTHON_TYPES = {bool, float, int, str} + RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS = { "BOOLEAN", "BOOL", diff --git a/bigframes/functions/_remote_function_client.py b/bigframes/functions/_remote_function_client.py index 5acd31b425..0d0cc08128 100644 --- a/bigframes/functions/_remote_function_client.py +++ b/bigframes/functions/_remote_function_client.py @@ -95,6 +95,7 @@ def create_bq_remote_function( endpoint, bq_function_name, max_batching_rows, + metadata, ): """Create a BigQuery remote function given the artifacts of a user defined function and the http endpoint of a corresponding cloud function.""" @@ -120,9 +121,14 @@ def create_bq_remote_function( "max_batching_rows": max_batching_rows, } + if metadata: + # We are using the description field to store this structured + # bigframes specific metadata for the lack of a better option + remote_function_options["description"] = metadata + remote_function_options_str = ", ".join( [ - f'{key}="{val}"' if isinstance(val, str) else f"{key}={val}" + f"{key}='{val}'" if isinstance(val, str) else f"{key}={val}" for key, val in remote_function_options.items() if val is not None ] @@ -200,14 +206,7 @@ def generate_cloud_function_code( package_requirements=None, is_row_processor=False, ): - """Generate the cloud function code for a given user defined function. - - Args: - input_types (tuple[str]): - Types of the input arguments in BigQuery SQL data type names. - output_type (str): - Types of the output scalar as a BigQuery SQL data type name. - """ + """Generate the cloud function code for a given user defined function.""" # requirements.txt if package_requirements: @@ -240,14 +239,7 @@ def create_cloud_function( memory_mib=1024, ingress_settings="all", ): - """Create a cloud function from the given user defined function. - - Args: - input_types (tuple[str]): - Types of the input arguments in BigQuery SQL data type names. - output_type (str): - Types of the output scalar as a BigQuery SQL data type name. - """ + """Create a cloud function from the given user defined function.""" # Build and deploy folder structure containing cloud function with tempfile.TemporaryDirectory() as directory: @@ -394,6 +386,7 @@ def provision_bq_remote_function( cloud_function_vpc_connector, cloud_function_memory_mib, cloud_function_ingress_settings, + bq_metadata, ): """Provision a BigQuery remote function.""" # Augment user package requirements with any internal package @@ -473,6 +466,7 @@ def provision_bq_remote_function( cf_endpoint, remote_function_name, max_batching_rows, + bq_metadata, ) created_new = True diff --git a/bigframes/functions/_remote_function_session.py b/bigframes/functions/_remote_function_session.py index 662c32a6a6..d6b729bf6e 100644 --- a/bigframes/functions/_remote_function_session.py +++ b/bigframes/functions/_remote_function_session.py @@ -34,6 +34,7 @@ import bigframes_vendored.constants as constants import bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes +import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes import bigframes_vendored.ibis.expr.operations.udf as ibis_udf import cloudpickle import google.api_core.exceptions @@ -167,12 +168,19 @@ def remote_function( `$ gcloud projects add-iam-policy-binding PROJECT_ID --member="serviceAccount:CONNECTION_SERVICE_ACCOUNT_ID" --role="roles/run.invoker"`. Args: - input_types (None, type, or sequence(type)): + input_types (type or sequence(type), Optional): For scalar user defined function it should be the input type or - sequence of input types. For row processing user defined function, - type `Series` should be specified. - output_type (Optional[type]): - Data type of the output in the user defined function. + sequence of input types. The supported scalar input types are + `bool`, `bytes`, `float`, `int`, `str`. For row processing user + defined function (i.e. functions that receive a single input + representing a row in form of a Series), type `Series` should be + specified. + output_type (type, Optional): + Data type of the output in the user defined function. If the + user defined function returns an array, then `list[type]` should + be specified. The supported output types are `bool`, `bytes`, + `float`, `int`, `str`, `list[bool]`, `list[float]`, `list[int]` + and `list[str]`. session (bigframes.Session, Optional): BigQuery DataFrames session to use for getting default project, dataset and BigQuery connection. @@ -497,6 +505,24 @@ def try_delattr(attr): try_delattr("is_row_processor") try_delattr("ibis_node") + # resolve the output type that can be supported in the bigframes, + # ibis, BQ remote functions and cloud functions integration + ibis_output_type_for_bqrf = ibis_signature.output_type + bqrf_metadata = None + if isinstance(ibis_signature.output_type, ibis_dtypes.Array): + # TODO(b/284515241): remove this special handling to support + # array output types once BQ remote functions support ARRAY. + # Until then, use json serialized strings at the cloud function + # and BQ level, and parse that to the intended output type at + # the bigframes level. + ibis_output_type_for_bqrf = ibis_dtypes.String() + bqrf_metadata = _utils.get_bigframes_metadata( + python_output_type=output_type + ) + bqrf_output_type = third_party_ibis_bqtypes.BigQueryType.from_ibis( + ibis_output_type_for_bqrf + ) + ( rf_name, cf_name, @@ -508,9 +534,7 @@ def try_delattr(attr): for type_ in ibis_signature.input_types if type_ is not None ), - output_type=third_party_ibis_bqtypes.BigQueryType.from_ibis( - ibis_signature.output_type - ), + output_type=bqrf_output_type, reuse=reuse, name=name, package_requirements=packages, @@ -521,6 +545,7 @@ def try_delattr(attr): cloud_function_vpc_connector=cloud_function_vpc_connector, cloud_function_memory_mib=cloud_function_memory_mib, cloud_function_ingress_settings=cloud_function_ingress_settings, + bq_metadata=bqrf_metadata, ) # TODO(shobs): Find a better way to support udfs with param named "name". @@ -541,7 +566,7 @@ def try_delattr(attr): name=rf_name, catalog=dataset_ref.project, database=dataset_ref.dataset_id, - signature=(ibis_signature.input_types, ibis_signature.output_type), + signature=(ibis_signature.input_types, ibis_output_type_for_bqrf), ) # type: ignore func.bigframes_cloud_function = ( remote_function_client.get_cloud_function_fully_qualified_name(cf_name) diff --git a/bigframes/functions/_utils.py b/bigframes/functions/_utils.py index c50e55cc33..591da01dd0 100644 --- a/bigframes/functions/_utils.py +++ b/bigframes/functions/_utils.py @@ -15,6 +15,8 @@ import hashlib import inspect +import json +import typing from typing import cast, List, NamedTuple, Optional, Sequence, Set import bigframes_vendored.ibis.expr.datatypes.core as ibis_dtypes @@ -26,6 +28,7 @@ import pyarrow import bigframes.core.compile.ibis_types +import bigframes.dtypes # Naming convention for the remote function artifacts _BIGFRAMES_REMOTE_FUNCTION_PREFIX = "bigframes" @@ -194,6 +197,7 @@ class IbisSignature(NamedTuple): parameter_names: List[str] input_types: List[Optional[ibis_dtypes.DataType]] output_type: ibis_dtypes.DataType + output_type_override: Optional[ibis_dtypes.DataType] = None def ibis_signature_from_python_signature( @@ -202,13 +206,77 @@ def ibis_signature_from_python_signature( output_type: type, ) -> IbisSignature: + ibis_input_types: List[Optional[ibis_dtypes.DataType]] = [ + bigframes.core.compile.ibis_types.ibis_type_from_python_type(t) + for t in input_types + ] + + if typing.get_origin(output_type) is list: + ibis_output_type = ( + bigframes.core.compile.ibis_types.ibis_array_output_type_from_python_type( + output_type + ) + ) + else: + ibis_output_type = bigframes.core.compile.ibis_types.ibis_type_from_python_type( + output_type + ) + return IbisSignature( parameter_names=list(signature.parameters.keys()), - input_types=[ - bigframes.core.compile.ibis_types.ibis_type_from_python_type(t) - for t in input_types - ], - output_type=bigframes.core.compile.ibis_types.ibis_type_from_python_type( - output_type - ), + input_types=ibis_input_types, + output_type=ibis_output_type, ) + + +def get_python_output_type_from_bigframes_metadata( + metadata_text: str, +) -> Optional[type]: + try: + metadata_dict = json.loads(metadata_text) + except (TypeError, json.decoder.JSONDecodeError): + return None + + try: + output_type = metadata_dict["value"]["python_array_output_type"] + except KeyError: + return None + + for ( + python_output_array_type + ) in bigframes.dtypes.RF_SUPPORTED_ARRAY_OUTPUT_PYTHON_TYPES: + if python_output_array_type.__name__ == output_type: + return list[python_output_array_type] # type: ignore + + return None + + +def get_bigframes_metadata(*, python_output_type: Optional[type] = None) -> str: + # Let's keep the actual metadata inside one level of nesting so that in + # future we can use a top level key "version" (parallel to "value"), based + # on which "value" can be interpreted according to the "version". The + # absence of "version" should be interpreted as default version. + inner_metadata = {} + if typing.get_origin(python_output_type) is list: + python_output_array_type = typing.get_args(python_output_type)[0] + if ( + python_output_array_type + in bigframes.dtypes.RF_SUPPORTED_ARRAY_OUTPUT_PYTHON_TYPES + ): + inner_metadata[ + "python_array_output_type" + ] = python_output_array_type.__name__ + + metadata = {"value": inner_metadata} + metadata_ser = json.dumps(metadata) + + # let's make sure the serialized value is deserializable + if ( + get_python_output_type_from_bigframes_metadata(metadata_ser) + != python_output_type + ): + raise ValueError( + f"python_output_type {python_output_type} is not serializable." + ) + + return metadata_ser diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py index 9b68843a7d..533c93e7cb 100644 --- a/bigframes/functions/remote_function.py +++ b/bigframes/functions/remote_function.py @@ -16,9 +16,11 @@ import inspect import logging +import typing from typing import cast, Optional, TYPE_CHECKING import warnings +import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes import bigframes_vendored.ibis.expr.operations.udf as ibis_udf if TYPE_CHECKING: @@ -53,9 +55,30 @@ class ReturnTypeMissingError(ValueError): # TODO: Move this to compile folder def ibis_signature_from_routine(routine: bigquery.Routine) -> _utils.IbisSignature: - if not routine.return_type: + if routine.return_type: + ibis_output_type = bigframes.core.compile.ibis_types.ibis_type_from_type_kind( + routine.return_type.type_kind + ) + else: raise ReturnTypeMissingError + ibis_output_type_override: Optional[ibis_dtypes.DataType] = None + if python_output_type := _utils.get_python_output_type_from_bigframes_metadata( + routine.description + ): + if not isinstance(ibis_output_type, ibis_dtypes.String): + raise TypeError( + "An explicit output_type should be provided only for a BigQuery function with STRING output." + ) + if typing.get_origin(python_output_type) is list: + ibis_output_type_override = bigframes.core.compile.ibis_types.ibis_array_output_type_from_python_type( + cast(type, python_output_type) + ) + else: + raise TypeError( + "Currently only list of a type is supported as python output type." + ) + return _utils.IbisSignature( parameter_names=[arg.name for arg in routine.arguments], input_types=[ @@ -66,9 +89,8 @@ def ibis_signature_from_routine(routine: bigquery.Routine) -> _utils.IbisSignatu else None for arg in routine.arguments ], - output_type=bigframes.core.compile.ibis_types.ibis_type_from_type_kind( - routine.return_type.type_kind - ), + output_type=ibis_output_type, + output_type_override=ibis_output_type_override, ) @@ -206,8 +228,11 @@ def func(*bigframes_args, **bigframes_kwargs): func.input_dtypes = tuple(function_input_dtypes) # type: ignore func.output_dtype = bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype( # type: ignore - ibis_signature.output_type + ibis_signature.output_type_override + if ibis_signature.output_type_override + else ibis_signature.output_type ) + func.is_row_processor = is_row_processor # type: ignore func.ibis_node = node # type: ignore return func diff --git a/bigframes/functions/remote_function_template.py b/bigframes/functions/remote_function_template.py index c666f41daa..0809baf5cc 100644 --- a/bigframes/functions/remote_function_template.py +++ b/bigframes/functions/remote_function_template.py @@ -169,6 +169,10 @@ def udf_http(request): reply = convert_to_bq_json( output_type, udf(*convert_call(input_types, call)) ) + if type(reply) is list: + # Since the BQ remote function does not support array yet, + # return a json serialized version of the reply + reply = json.dumps(reply) replies.append(reply) return_json = json.dumps({"replies": replies}) return return_json @@ -191,8 +195,15 @@ def udf_http_row_processor(request): replies = [] for call in calls: reply = convert_to_bq_json(output_type, udf(get_pd_series(call[0]))) - if isinstance(reply, float) and (math.isnan(reply) or math.isinf(reply)): - # json serialization of the special float values (nan, inf, -inf) + if type(reply) is list: + # Since the BQ remote function does not support array yet, + # return a json serialized version of the reply. + # Numpy types are not json serializable, so use their Python + # values instead. + reply = [val.item() if hasattr(val, "item") else val for val in reply] + reply = json.dumps(reply) + elif isinstance(reply, float) and (math.isnan(reply) or math.isinf(reply)): + # Json serialization of the special float values (nan, inf, -inf) # is not in strict compliance of the JSON specification # https://docs.python.org/3/library/json.html#basic-usage. # Let's convert them to a quoted string representation ("NaN", @@ -242,14 +253,7 @@ def generate_cloud_function_main_code( output_type: str, is_row_processor=False, ): - """Get main.py code for the cloud function for the given user defined function. - - Args: - input_types (tuple[str]): - Types of the input arguments in BigQuery SQL data type names. - output_type (str): - Types of the output scalar as a BigQuery SQL data type name. - """ + """Get main.py code for the cloud function for the given user defined function.""" # Pickle the udf with all its dependencies udf_code_file, udf_pickle_file = generate_udf_code(def_, directory) diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py index 9617b5d7a5..d038b8f4c0 100644 --- a/bigframes/ml/core.py +++ b/bigframes/ml/core.py @@ -22,17 +22,17 @@ from google.cloud import bigquery -import bigframes import bigframes.constants as constants import bigframes.formatting_helpers as formatting_helpers from bigframes.ml import sql as ml_sql import bigframes.pandas as bpd +import bigframes.session class BaseBqml: """Base class for BQML functionalities.""" - def __init__(self, session: bigframes.Session): + def __init__(self, session: bigframes.session.Session): self._session = session self._base_sql_generator = ml_sql.BaseSqlGenerator() diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py index 413bc89588..c98e18322a 100644 --- a/bigframes/ml/decomposition.py +++ b/bigframes/ml/decomposition.py @@ -22,10 +22,10 @@ import bigframes_vendored.sklearn.decomposition._pca from google.cloud import bigquery -import bigframes from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd +import bigframes.session _BQML_PARAMS_MAPPING = {"svd_solver": "pcaSolver"} @@ -49,7 +49,9 @@ def __init__( self._bqml_model_factory = globals.bqml_model_factory() @classmethod - def _from_bq(cls, session: bigframes.Session, bq_model: bigquery.Model) -> PCA: + def _from_bq( + cls, session: bigframes.session.Session, bq_model: bigquery.Model + ) -> PCA: assert bq_model.model_type == "PCA" kwargs = utils.retrieve_params_from_bq_model( diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py index 11dd639849..2633f13411 100644 --- a/bigframes/ml/ensemble.py +++ b/bigframes/ml/ensemble.py @@ -23,10 +23,10 @@ import bigframes_vendored.xgboost.sklearn from google.cloud import bigquery -import bigframes from bigframes.core import log_adapter +import bigframes.dataframe from bigframes.ml import base, core, globals, utils -import bigframes.pandas as bpd +import bigframes.session _BQML_PARAMS_MAPPING = { "booster": "boosterType", @@ -102,7 +102,7 @@ def __init__( @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> XGBRegressor: assert bq_model.model_type == "BOOSTED_TREE_REGRESSOR" @@ -169,7 +169,7 @@ def _fit( def predict( self, X: utils.ArrayType, - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: if not self._bqml_model: raise RuntimeError("A model must be fitted before predict") (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session) @@ -261,7 +261,7 @@ def __init__( @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> XGBClassifier: assert bq_model.model_type == "BOOSTED_TREE_CLASSIFIER" @@ -325,7 +325,7 @@ def _fit( ) return self - def predict(self, X: utils.ArrayType) -> bpd.DataFrame: + def predict(self, X: utils.ArrayType) -> bigframes.dataframe.DataFrame: if not self._bqml_model: raise RuntimeError("A model must be fitted before predict") (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session) @@ -410,7 +410,7 @@ def __init__( @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> RandomForestRegressor: assert bq_model.model_type == "RANDOM_FOREST_REGRESSOR" @@ -474,7 +474,7 @@ def _fit( def predict( self, X: utils.ArrayType, - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: if not self._bqml_model: raise RuntimeError("A model must be fitted before predict") (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session) @@ -576,7 +576,7 @@ def __init__( @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> RandomForestClassifier: assert bq_model.model_type == "RANDOM_FOREST_CLASSIFIER" @@ -640,7 +640,7 @@ def _fit( def predict( self, X: utils.ArrayType, - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: if not self._bqml_model: raise RuntimeError("A model must be fitted before predict") (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session) diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py index 9dc1652912..4e6c5036e7 100644 --- a/bigframes/ml/forecasting.py +++ b/bigframes/ml/forecasting.py @@ -20,10 +20,10 @@ from google.cloud import bigquery -import bigframes from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd +import bigframes.session _BQML_PARAMS_MAPPING = { "horizon": "horizon", @@ -133,7 +133,7 @@ def __init__( @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> ARIMAPlus: assert bq_model.model_type == "ARIMA_PLUS" diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index c62fb21cd3..93152a6b99 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -20,10 +20,10 @@ from google.cloud import bigquery -import bigframes from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd +import bigframes.session @log_adapter.class_logger @@ -41,7 +41,7 @@ def __init__( self, model_path: str, *, - session: Optional[bigframes.Session] = None, + session: Optional[bigframes.session.Session] = None, ): self.session = session or bpd.get_global_session() self.model_path = model_path @@ -56,7 +56,7 @@ def _create_bqml_model(self): @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> TensorFlowModel: assert bq_model.model_type == "TENSORFLOW" @@ -120,7 +120,7 @@ def __init__( self, model_path: str, *, - session: Optional[bigframes.Session] = None, + session: Optional[bigframes.session.Session] = None, ): self.session = session or bpd.get_global_session() self.model_path = model_path @@ -135,7 +135,7 @@ def _create_bqml_model(self): @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> ONNXModel: assert bq_model.model_type == "ONNX" @@ -218,7 +218,7 @@ def __init__( *, input: Mapping[str, str] = {}, output: Mapping[str, str] = {}, - session: Optional[bigframes.Session] = None, + session: Optional[bigframes.session.Session] = None, ): self.session = session or bpd.get_global_session() self.model_path = model_path @@ -251,7 +251,7 @@ def _create_bqml_model(self): @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> XGBoostModel: assert bq_model.model_type == "XGBOOST" diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py index 722b72f806..46c5744a42 100644 --- a/bigframes/ml/linear_model.py +++ b/bigframes/ml/linear_model.py @@ -24,10 +24,10 @@ import bigframes_vendored.sklearn.linear_model._logistic from google.cloud import bigquery -import bigframes from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd +import bigframes.session _BQML_PARAMS_MAPPING = { "optimize_strategy": "optimizationStrategy", @@ -87,7 +87,7 @@ def __init__( @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> LinearRegression: assert bq_model.model_type == "LINEAR_REGRESSION" @@ -282,7 +282,7 @@ def __init__( @classmethod def _from_bq( - cls, session: bigframes.Session, bq_model: bigquery.Model + cls, session: bigframes.session.Session, bq_model: bigquery.Model ) -> LogisticRegression: assert bq_model.model_type == "LOGISTIC_REGRESSION" diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 8d1df6e0b9..bdefc793f9 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -23,12 +23,10 @@ from google.cloud import bigquery import typing_extensions -import bigframes from bigframes import clients, exceptions -from bigframes.core import blocks, log_adapter +from bigframes.core import blocks, global_session, log_adapter import bigframes.dataframe from bigframes.ml import base, core, globals, utils -import bigframes.pandas as bpd _BQML_PARAMS_MAPPING = { "max_iterations": "maxIterations", @@ -145,7 +143,7 @@ def __init__( max_iterations: int = 300, ): self.model_name = model_name - self.session = session or bpd.get_global_session() + self.session = session or global_session.get_global_session() self.max_iterations = max_iterations self._bq_connection_manager = self.session.bqconnectionmanager @@ -275,7 +273,7 @@ def predict( max_output_tokens: int = 128, top_k: int = 40, top_p: float = 0.95, - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: """Predict the result from input DataFrame. Args: @@ -374,7 +372,7 @@ def score( task_type: Literal[ "text_generation", "classification", "summarization", "question_answering" ] = "text_generation", - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: """Calculate evaluation metrics of the model. .. note:: @@ -479,7 +477,7 @@ def __init__( ): self.model_name = model_name self.version = version - self.session = session or bpd.get_global_session() + self.session = session or global_session.get_global_session() self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection @@ -556,7 +554,7 @@ def _from_bq( model._bqml_model = core.BqmlModel(session, bq_model) return model - def predict(self, X: utils.ArrayType) -> bpd.DataFrame: + def predict(self, X: utils.ArrayType) -> bigframes.dataframe.DataFrame: """Predict the result from input DataFrame. Args: @@ -644,7 +642,7 @@ def __init__( connection_name: Optional[str] = None, ): self.model_name = model_name - self.session = session or bpd.get_global_session() + self.session = session or global_session.get_global_session() self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection @@ -715,14 +713,20 @@ def _from_bq( return model @property - def _predict_func(self) -> Callable[[bpd.DataFrame, Mapping], bpd.DataFrame]: + def _predict_func( + self, + ) -> Callable[ + [bigframes.dataframe.DataFrame, Mapping], bigframes.dataframe.DataFrame + ]: return self._bqml_model.generate_embedding @property def _status_col(self) -> str: return _ML_GENERATE_EMBEDDING_STATUS - def predict(self, X: utils.ArrayType, *, max_retries: int = 0) -> bpd.DataFrame: + def predict( + self, X: utils.ArrayType, *, max_retries: int = 0 + ) -> bigframes.dataframe.DataFrame: """Predict the result from input DataFrame. Args: @@ -822,7 +826,7 @@ def __init__( ) warnings.warn(msg, category=exceptions.PreviewWarning) self.model_name = model_name - self.session = session or bpd.get_global_session() + self.session = session or global_session.get_global_session() self.max_iterations = max_iterations self._bq_connection_manager = self.session.bqconnectionmanager @@ -899,7 +903,11 @@ def _bqml_options(self) -> dict: return options @property - def _predict_func(self) -> Callable[[bpd.DataFrame, Mapping], bpd.DataFrame]: + def _predict_func( + self, + ) -> Callable[ + [bigframes.dataframe.DataFrame, Mapping], bigframes.dataframe.DataFrame + ]: return self._bqml_model.generate_text @property @@ -962,7 +970,7 @@ def predict( top_p: float = 1.0, ground_with_google_search: bool = False, max_retries: int = 0, - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: """Predict the result from input DataFrame. Args: @@ -1052,7 +1060,7 @@ def score( task_type: Literal[ "text_generation", "classification", "summarization", "question_answering" ] = "text_generation", - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: """Calculate evaluation metrics of the model. Only support "gemini-pro" and "gemini-1.5-pro-002", and "gemini-1.5-flash-002". .. note:: @@ -1170,7 +1178,7 @@ def __init__( connection_name: Optional[str] = None, ): self.model_name = model_name - self.session = session or bpd.get_global_session() + self.session = session or global_session.get_global_session() self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection @@ -1253,7 +1261,11 @@ def _bqml_options(self) -> dict: return options @property - def _predict_func(self) -> Callable[[bpd.DataFrame, Mapping], bpd.DataFrame]: + def _predict_func( + self, + ) -> Callable[ + [bigframes.dataframe.DataFrame, Mapping], bigframes.dataframe.DataFrame + ]: return self._bqml_model.generate_text @property @@ -1268,7 +1280,7 @@ def predict( top_k: int = 40, top_p: float = 0.95, max_retries: int = 0, - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: """Predict the result from input DataFrame. Args: diff --git a/bigframes/ml/loader.py b/bigframes/ml/loader.py index 1cf8dc8a53..5d52927ded 100644 --- a/bigframes/ml/loader.py +++ b/bigframes/ml/loader.py @@ -20,7 +20,6 @@ import bigframes_vendored.constants as constants from google.cloud import bigquery -import bigframes from bigframes.ml import ( cluster, compose, @@ -36,6 +35,7 @@ preprocessing, utils, ) +import bigframes.session _BQML_MODEL_TYPE_MAPPING = MappingProxyType( { @@ -80,7 +80,7 @@ def from_bq( - session: bigframes.Session, bq_model: bigquery.Model + session: bigframes.session.Session, bq_model: bigquery.Model ) -> Union[ decomposition.PCA, cluster.KMeans, @@ -121,7 +121,7 @@ def from_bq( return _model_from_bq(session, bq_model) -def _transformer_from_bq(session: bigframes.Session, bq_model: bigquery.Model): +def _transformer_from_bq(session: bigframes.session.Session, bq_model: bigquery.Model): transformer = compose.ColumnTransformer._extract_from_bq_model(bq_model)._merge( bq_model ) @@ -130,7 +130,7 @@ def _transformer_from_bq(session: bigframes.Session, bq_model: bigquery.Model): return transformer -def _model_from_bq(session: bigframes.Session, bq_model: bigquery.Model): +def _model_from_bq(session: bigframes.session.Session, bq_model: bigquery.Model): if bq_model.model_type in _BQML_MODEL_TYPE_MAPPING: return _BQML_MODEL_TYPE_MAPPING[bq_model.model_type]._from_bq( # type: ignore session=session, bq_model=bq_model diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py index 5318f0c531..dac51b1956 100644 --- a/bigframes/ml/pipeline.py +++ b/bigframes/ml/pipeline.py @@ -24,8 +24,8 @@ import bigframes_vendored.sklearn.pipeline from google.cloud import bigquery -import bigframes from bigframes.core import log_adapter +import bigframes.dataframe from bigframes.ml import ( base, compose, @@ -35,7 +35,7 @@ preprocessing, utils, ) -import bigframes.pandas as bpd +import bigframes.session @log_adapter.class_logger @@ -92,7 +92,9 @@ def __init__(self, steps: List[Tuple[str, base.BaseEstimator]]): self._estimator = estimator @classmethod - def _from_bq(cls, session: bigframes.Session, bq_model: bigquery.Model) -> Pipeline: + def _from_bq( + cls, session: bigframes.session.Session, bq_model: bigquery.Model + ) -> Pipeline: col_transformer = compose.ColumnTransformer._extract_from_bq_model(bq_model) transform = col_transformer._merge(bq_model) @@ -115,14 +117,14 @@ def fit( self._estimator._fit(X=X, y=y, transforms=transform_sqls) return self - def predict(self, X: utils.ArrayType) -> bpd.DataFrame: + def predict(self, X: utils.ArrayType) -> bigframes.dataframe.DataFrame: return self._estimator.predict(X) def score( self, X: utils.BigFramesArrayType, y: Optional[utils.BigFramesArrayType] = None, - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: (X,) = utils.batch_convert_to_dataframe(X) if y is not None: (y,) = utils.batch_convert_to_dataframe(y) diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py index f4f55ad34e..21a3a50421 100644 --- a/bigframes/ml/remote.py +++ b/bigframes/ml/remote.py @@ -19,11 +19,11 @@ from typing import Mapping, Optional import warnings -import bigframes from bigframes import clients -from bigframes.core import log_adapter +from bigframes.core import global_session, log_adapter +import bigframes.dataframe from bigframes.ml import base, core, globals, utils -import bigframes.pandas as bpd +import bigframes.session _REMOTE_MODEL_STATUS = "remote_model_status" @@ -54,13 +54,13 @@ def __init__( input: Mapping[str, str], output: Mapping[str, str], *, - session: Optional[bigframes.Session] = None, + session: Optional[bigframes.session.Session] = None, connection_name: Optional[str] = None, ): self.endpoint = endpoint self.input = input self.output = output - self.session = session or bpd.get_global_session() + self.session = session or global_session.get_global_session() self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection @@ -122,15 +122,15 @@ def standardize_type(v: str): def predict( self, X: utils.ArrayType, - ) -> bpd.DataFrame: + ) -> bigframes.dataframe.DataFrame: """Predict the result from the input DataFrame. Args: - X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series): + X (bigframes.pandas.DataFrame or bigframes.pandas.Series or pandas.DataFrame or pandas.Series): Input DataFrame or Series, which needs to comply with the input parameter of the model. Returns: - bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values. + bigframes.pandas.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values. """ (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 37a40b7d01..e55cbc4925 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -14,1042 +14,322 @@ from __future__ import annotations -import dataclasses -import datetime -import functools -import typing -from typing import Union - -import numpy as np -import pandas as pd -from pandas.tseries.offsets import DateOffset -import pyarrow as pa - -import bigframes.dtypes as dtypes -import bigframes.operations.type as op_typing - -if typing.TYPE_CHECKING: - # Avoids circular dependency - import bigframes.core.expression - - -class RowOp(typing.Protocol): - @property - def name(self) -> str: - ... - - def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - ... - - @property - def order_preserving(self) -> bool: - """Whether the row operation preserves total ordering. Can be pruned from ordering expressions.""" - ... - - -@dataclasses.dataclass(frozen=True) -class ScalarOp: - @property - def name(self) -> str: - raise NotImplementedError("RowOp abstract base class has no implementation") - - def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - raise NotImplementedError("Abstract operation has no output type") - - @property - def order_preserving(self) -> bool: - """Whether the row operation preserves total ordering. Can be pruned from ordering expressions.""" - return False - - -@dataclasses.dataclass(frozen=True) -class NaryOp(ScalarOp): - def as_expr( - self, - *exprs: Union[str | bigframes.core.expression.Expression], - ) -> bigframes.core.expression.Expression: - import bigframes.core.expression - - # Keep this in sync with output_type and compilers - inputs: list[bigframes.core.expression.Expression] = [] - - for expr in exprs: - inputs.append(_convert_expr_input(expr)) - - return bigframes.core.expression.OpExpression( - self, - tuple(inputs), - ) - - -# These classes can be used to create simple ops that don't take local parameters -# All is needed is a unique name, and to register an implementation in ibis_mappings.py -@dataclasses.dataclass(frozen=True) -class UnaryOp(ScalarOp): - @property - def arguments(self) -> int: - return 1 - - def as_expr( - self, input_id: typing.Union[str, bigframes.core.expression.Expression] = "arg" - ) -> bigframes.core.expression.Expression: - import bigframes.core.expression - - return bigframes.core.expression.OpExpression( - self, (_convert_expr_input(input_id),) - ) - - -@dataclasses.dataclass(frozen=True) -class BinaryOp(ScalarOp): - @property - def arguments(self) -> int: - return 2 - - def as_expr( - self, - left_input: typing.Union[str, bigframes.core.expression.Expression] = "arg1", - right_input: typing.Union[str, bigframes.core.expression.Expression] = "arg2", - ) -> bigframes.core.expression.Expression: - import bigframes.core.expression - - return bigframes.core.expression.OpExpression( - self, - ( - _convert_expr_input(left_input), - _convert_expr_input(right_input), - ), - ) - - -@dataclasses.dataclass(frozen=True) -class TernaryOp(ScalarOp): - @property - def arguments(self) -> int: - return 3 - - def as_expr( - self, - input1: typing.Union[str, bigframes.core.expression.Expression] = "arg1", - input2: typing.Union[str, bigframes.core.expression.Expression] = "arg2", - input3: typing.Union[str, bigframes.core.expression.Expression] = "arg3", - ) -> bigframes.core.expression.Expression: - import bigframes.core.expression - - return bigframes.core.expression.OpExpression( - self, - ( - _convert_expr_input(input1), - _convert_expr_input(input2), - _convert_expr_input(input3), - ), - ) - - -def _convert_expr_input( - input: typing.Union[str, bigframes.core.expression.Expression] -) -> bigframes.core.expression.Expression: - """Allows creating column references with just a string""" - import bigframes.core.expression - - if isinstance(input, str): - return bigframes.core.expression.deref(input) - else: - return input - - -# Operation Factories -def create_unary_op(name: str, type_signature: op_typing.UnaryTypeSignature) -> UnaryOp: - return dataclasses.make_dataclass( - name, - [ - ("name", typing.ClassVar[str], name), - ("output_type", typing.ClassVar[typing.Callable], type_signature.as_method), - ], - bases=(UnaryOp,), - frozen=True, - )() - - -def create_binary_op( - name: str, type_signature: op_typing.BinaryTypeSignature -) -> BinaryOp: - return dataclasses.make_dataclass( - name, - [ - ("name", typing.ClassVar[str], name), - ("output_type", typing.ClassVar[typing.Callable], type_signature.as_method), - ], - bases=(BinaryOp,), - frozen=True, - )() - - -# Unary Ops -## Generic Ops -invert_op = create_unary_op( - name="invert", - type_signature=op_typing.TypePreserving( - dtypes.is_binary_like, - description="binary-like", - ), -) -isnull_op = create_unary_op( - name="isnull", - type_signature=op_typing.FixedOutputType( - lambda x: True, dtypes.BOOL_DTYPE, description="nullable" - ), -) -notnull_op = create_unary_op( - name="notnull", - type_signature=op_typing.FixedOutputType( - lambda x: True, dtypes.BOOL_DTYPE, description="nullable" - ), -) -hash_op = create_unary_op( - name="hash", - type_signature=op_typing.FixedOutputType( - dtypes.is_string_like, dtypes.INT_DTYPE, description="string-like" - ), -) -## String Ops -len_op = create_unary_op( - name="len", - type_signature=op_typing.FixedOutputType( - dtypes.is_iterable, dtypes.INT_DTYPE, description="iterable" - ), -) -reverse_op = create_unary_op(name="reverse", type_signature=op_typing.STRING_TRANSFORM) -lower_op = create_unary_op(name="lower", type_signature=op_typing.STRING_TRANSFORM) -upper_op = create_unary_op(name="upper", type_signature=op_typing.STRING_TRANSFORM) -strip_op = create_unary_op(name="strip", type_signature=op_typing.STRING_TRANSFORM) -isalnum_op = create_unary_op(name="isalnum", type_signature=op_typing.STRING_PREDICATE) -isalpha_op = create_unary_op(name="isalpha", type_signature=op_typing.STRING_PREDICATE) -isdecimal_op = create_unary_op( - name="isdecimal", type_signature=op_typing.STRING_PREDICATE +from bigframes.operations.array_ops import ArrayIndexOp, ArraySliceOp, ArrayToStringOp +from bigframes.operations.base_ops import ( + BinaryOp, + NaryOp, + RowOp, + ScalarOp, + TernaryOp, + UnaryOp, ) -isdigit_op = create_unary_op(name="isdigit", type_signature=op_typing.STRING_PREDICATE) -isnumeric_op = create_unary_op( - name="isnumeric", type_signature=op_typing.STRING_PREDICATE +from bigframes.operations.blob_ops import ( + obj_fetch_metadata_op, + obj_make_ref_op, + ObjGetAccessUrl, ) -isspace_op = create_unary_op(name="isspace", type_signature=op_typing.STRING_PREDICATE) -islower_op = create_unary_op(name="islower", type_signature=op_typing.STRING_PREDICATE) -isupper_op = create_unary_op(name="isupper", type_signature=op_typing.STRING_PREDICATE) -rstrip_op = create_unary_op(name="rstrip", type_signature=op_typing.STRING_TRANSFORM) -lstrip_op = create_unary_op(name="lstrip", type_signature=op_typing.STRING_TRANSFORM) -capitalize_op = create_unary_op( - name="capitalize", type_signature=op_typing.STRING_TRANSFORM +from bigframes.operations.bool_ops import and_op, or_op, xor_op +from bigframes.operations.comparison_ops import ( + eq_null_match_op, + eq_op, + ge_op, + gt_op, + le_op, + lt_op, + ne_op, ) -## DateTime Ops -### datelike accessors -day_op = create_unary_op( - name="day", - type_signature=op_typing.DATELIKE_ACCESSOR, +from bigframes.operations.date_ops import ( + day_op, + dayofweek_op, + month_op, + quarter_op, + year_op, ) -month_op = create_unary_op( - name="month", - type_signature=op_typing.DATELIKE_ACCESSOR, +from bigframes.operations.datetime_ops import ( + date_op, + StrftimeOp, + time_op, + ToDatetimeOp, + ToTimestampOp, + UnixMicros, + UnixMillis, + UnixSeconds, ) -year_op = create_unary_op( - name="year", - type_signature=op_typing.DATELIKE_ACCESSOR, +from bigframes.operations.distance_ops import ( + cosine_distance_op, + euclidean_distance_op, + manhattan_distance_op, ) -dayofweek_op = create_unary_op( - name="dayofweek", - type_signature=op_typing.DATELIKE_ACCESSOR, +from bigframes.operations.frequency_ops import ( + DatetimeToIntegerLabelOp, + FloorDtOp, + IntegerLabelToDatetimeOp, ) -quarter_op = create_unary_op( - name="quarter", - type_signature=op_typing.DATELIKE_ACCESSOR, +from bigframes.operations.generic_ops import ( + AsTypeOp, + case_when_op, + CaseWhenOp, + clip_op, + coalesce_op, + fillna_op, + hash_op, + invert_op, + IsInOp, + isnull_op, + MapOp, + maximum_op, + minimum_op, + notnull_op, + RowKey, + SqlScalarOp, + where_op, ) -### timelike accessors -hour_op = create_unary_op( - name="hour", - type_signature=op_typing.TIMELIKE_ACCESSOR, +from bigframes.operations.geo_ops import geo_x_op, geo_y_op +from bigframes.operations.json_ops import ( + JSONExtract, + JSONExtractArray, + JSONExtractStringArray, + JSONSet, + JSONValue, + ParseJSON, + ToJSONString, ) -minute_op = create_unary_op( - name="minute", - type_signature=op_typing.TIMELIKE_ACCESSOR, +from bigframes.operations.numeric_ops import ( + abs_op, + add_op, + arccos_op, + arccosh_op, + arcsin_op, + arcsinh_op, + arctan2_op, + arctan_op, + arctanh_op, + ceil_op, + cos_op, + cosh_op, + div_op, + exp_op, + expm1_op, + floor_op, + floordiv_op, + ln_op, + log1p_op, + log10_op, + mod_op, + mul_op, + neg_op, + pos_op, + pow_op, + round_op, + sin_op, + sinh_op, + sqrt_op, + sub_op, + tan_op, + tanh_op, + unsafe_pow_op, ) -second_op = create_unary_op( - name="second", - type_signature=op_typing.TIMELIKE_ACCESSOR, +from bigframes.operations.numpy_op_maps import NUMPY_TO_BINOP, NUMPY_TO_OP +from bigframes.operations.remote_function_ops import ( + BinaryRemoteFunctionOp, + NaryRemoteFunctionOp, + RemoteFunctionOp, ) -normalize_op = create_unary_op( - name="normalize", - type_signature=op_typing.TypePreserving( - dtypes.is_time_like, - description="time-like", - ), +from bigframes.operations.string_ops import ( + capitalize_op, + EndsWithOp, + isalnum_op, + isalpha_op, + isdecimal_op, + isdigit_op, + islower_op, + isnumeric_op, + isspace_op, + isupper_op, + len_op, + lower_op, + lstrip_op, + RegexReplaceStrOp, + ReplaceStrOp, + reverse_op, + rstrip_op, + StartsWithOp, + strconcat_op, + StrContainsOp, + StrContainsRegexOp, + StrExtractOp, + StrFindOp, + StrGetOp, + StringSplitOp, + strip_op, + StrPadOp, + StrRepeatOp, + StrSliceOp, + upper_op, + ZfillOp, ) -### datetimelike accessors -date_op = create_unary_op( - name="date", - type_signature=op_typing.FixedOutputType( - dtypes.is_date_like, dtypes.DATE_DTYPE, description="date-like" - ), -) -time_op = create_unary_op( - name="time", - type_signature=op_typing.FixedOutputType( - dtypes.is_time_like, dtypes.TIME_DTYPE, description="time-like" - ), -) -## Trigonometry Ops -sin_op = create_unary_op(name="sin", type_signature=op_typing.UNARY_REAL_NUMERIC) -cos_op = create_unary_op(name="cos", type_signature=op_typing.UNARY_REAL_NUMERIC) -tan_op = create_unary_op(name="tan", type_signature=op_typing.UNARY_REAL_NUMERIC) -arcsin_op = create_unary_op(name="arcsin", type_signature=op_typing.UNARY_REAL_NUMERIC) -arccos_op = create_unary_op(name="arccos", type_signature=op_typing.UNARY_REAL_NUMERIC) -arctan_op = create_unary_op(name="arctan", type_signature=op_typing.UNARY_REAL_NUMERIC) -sinh_op = create_unary_op(name="sinh", type_signature=op_typing.UNARY_REAL_NUMERIC) -cosh_op = create_unary_op(name="cosh", type_signature=op_typing.UNARY_REAL_NUMERIC) -tanh_op = create_unary_op(name="tanh", type_signature=op_typing.UNARY_REAL_NUMERIC) -arcsinh_op = create_unary_op( - name="arcsinh", type_signature=op_typing.UNARY_REAL_NUMERIC -) -arccosh_op = create_unary_op( - name="arccosh", type_signature=op_typing.UNARY_REAL_NUMERIC -) -arctanh_op = create_unary_op( - name="arctanh", type_signature=op_typing.UNARY_REAL_NUMERIC -) -# Geo Ops -geo_x_op = create_unary_op( - name="geo_x", - type_signature=op_typing.FixedOutputType( - dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like" - ), -) -geo_y_op = create_unary_op( - name="geo_y", - type_signature=op_typing.FixedOutputType( - dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like" - ), -) -## Numeric Ops -floor_op = create_unary_op(name="floor", type_signature=op_typing.UNARY_REAL_NUMERIC) -ceil_op = create_unary_op(name="ceil", type_signature=op_typing.UNARY_REAL_NUMERIC) -abs_op = create_unary_op(name="abs", type_signature=op_typing.UNARY_NUMERIC) -pos_op = create_unary_op(name="pos", type_signature=op_typing.UNARY_NUMERIC) -neg_op = create_unary_op(name="neg", type_signature=op_typing.UNARY_NUMERIC) -exp_op = create_unary_op(name="exp", type_signature=op_typing.UNARY_REAL_NUMERIC) -expm1_op = create_unary_op(name="expm1", type_signature=op_typing.UNARY_REAL_NUMERIC) -ln_op = create_unary_op(name="log", type_signature=op_typing.UNARY_REAL_NUMERIC) -log10_op = create_unary_op(name="log10", type_signature=op_typing.UNARY_REAL_NUMERIC) -log1p_op = create_unary_op(name="log1p", type_signature=op_typing.UNARY_REAL_NUMERIC) -sqrt_op = create_unary_op(name="sqrt", type_signature=op_typing.UNARY_REAL_NUMERIC) -## Blob Ops -obj_fetch_metadata_op = create_unary_op( - name="obj_fetch_metadata", type_signature=op_typing.BLOB_TRANSFORM -) - - -# Parameterized unary ops -@dataclasses.dataclass(frozen=True) -class StrContainsOp(UnaryOp): - name: typing.ClassVar[str] = "str_contains" - pat: str - - def output_type(self, *input_types): - return op_typing.STRING_PREDICATE.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class StrContainsRegexOp(UnaryOp): - name: typing.ClassVar[str] = "str_contains_regex" - pat: str - - def output_type(self, *input_types): - return op_typing.STRING_PREDICATE.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class StrGetOp(UnaryOp): - name: typing.ClassVar[str] = "str_get" - i: int - - def output_type(self, *input_types): - return op_typing.STRING_TRANSFORM.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class StrPadOp(UnaryOp): - name: typing.ClassVar[str] = "str_pad" - length: int - fillchar: str - side: typing.Literal["both", "left", "right"] - - def output_type(self, *input_types): - return op_typing.STRING_TRANSFORM.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class ReplaceStrOp(UnaryOp): - name: typing.ClassVar[str] = "str_replace" - pat: str - repl: str - - def output_type(self, *input_types): - return op_typing.STRING_TRANSFORM.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class RegexReplaceStrOp(UnaryOp): - name: typing.ClassVar[str] = "str_rereplace" - pat: str - repl: str - - def output_type(self, *input_types): - return op_typing.STRING_TRANSFORM.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class StartsWithOp(UnaryOp): - name: typing.ClassVar[str] = "str_startswith" - pat: typing.Sequence[str] - - def output_type(self, *input_types): - return op_typing.STRING_PREDICATE.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class StringSplitOp(UnaryOp): - name: typing.ClassVar[str] = "str_split" - pat: typing.Sequence[str] - - def output_type(self, *input_types): - input_type = input_types[0] - if not isinstance(input_type, pd.StringDtype): - raise TypeError("field accessor input must be a string type") - arrow_type = dtypes.bigframes_dtype_to_arrow_dtype(input_type) - return pd.ArrowDtype(pa.list_(arrow_type)) - - -@dataclasses.dataclass(frozen=True) -class EndsWithOp(UnaryOp): - name: typing.ClassVar[str] = "str_endswith" - pat: typing.Sequence[str] - - def output_type(self, *input_types): - return op_typing.STRING_PREDICATE.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class ZfillOp(UnaryOp): - name: typing.ClassVar[str] = "str_zfill" - width: int - - def output_type(self, *input_types): - return op_typing.STRING_TRANSFORM.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class StrFindOp(UnaryOp): - name: typing.ClassVar[str] = "str_find" - substr: str - start: typing.Optional[int] - end: typing.Optional[int] - - def output_type(self, *input_types): - signature = op_typing.FixedOutputType( - dtypes.is_string_like, dtypes.INT_DTYPE, "string-like" - ) - return signature.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class StrExtractOp(UnaryOp): - name: typing.ClassVar[str] = "str_extract" - pat: str - n: int = 1 - - def output_type(self, *input_types): - return op_typing.STRING_TRANSFORM.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class StrSliceOp(UnaryOp): - name: typing.ClassVar[str] = "str_slice" - start: typing.Optional[int] - end: typing.Optional[int] - - def output_type(self, *input_types): - return op_typing.STRING_TRANSFORM.output_type(input_types[0]) - - -@dataclasses.dataclass(frozen=True) -class StrRepeatOp(UnaryOp): - name: typing.ClassVar[str] = "str_repeat" - repeats: int - - def output_type(self, *input_types): - return op_typing.STRING_TRANSFORM.output_type(input_types[0]) - - -# Other parameterized unary operations -@dataclasses.dataclass(frozen=True) -class StructFieldOp(UnaryOp): - name: typing.ClassVar[str] = "struct_field" - name_or_index: str | int - - def output_type(self, *input_types): - input_type = input_types[0] - if not isinstance(input_type, pd.ArrowDtype): - raise TypeError("field accessor input must be a struct type") - - pa_type = input_type.pyarrow_dtype - if not isinstance(pa_type, pa.StructType): - raise TypeError("field accessor input must be a struct type") - - pa_result_type = pa_type[self.name_or_index].type - return dtypes.arrow_dtype_to_bigframes_dtype(pa_result_type) - - -@dataclasses.dataclass(frozen=True) -class AsTypeOp(UnaryOp): - name: typing.ClassVar[str] = "astype" - # TODO: Convert strings to dtype earlier - to_type: dtypes.DtypeString | dtypes.Dtype - safe: bool = False - - def output_type(self, *input_types): - # TODO: We should do this conversion earlier - if self.to_type == pa.string(): - return dtypes.STRING_DTYPE - if isinstance(self.to_type, str): - return dtypes.BIGFRAMES_STRING_TO_BIGFRAMES[ - typing.cast(dtypes.DtypeString, self.to_type) - ] - return self.to_type - - -@dataclasses.dataclass(frozen=True) -class IsInOp(UnaryOp): - name: typing.ClassVar[str] = "is_in" - values: typing.Tuple - match_nulls: bool = True - - def output_type(self, *input_types): - return dtypes.BOOL_DTYPE - - -@dataclasses.dataclass(frozen=True) -class RemoteFunctionOp(UnaryOp): - name: typing.ClassVar[str] = "remote_function" - func: typing.Callable - apply_on_null: bool - - def output_type(self, *input_types): - # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method - if hasattr(self.func, "output_dtype"): - if dtypes.is_array_like(self.func.output_dtype): - # TODO(b/284515241): remove this special handling to support - # array output types once BQ remote functions support ARRAY. - # Until then, use json serialized strings at the remote function - # level, and parse that to the intended output type at the - # bigframes level. - return dtypes.STRING_DTYPE - return self.func.output_dtype - else: - raise AttributeError("output_dtype not defined") - - -@dataclasses.dataclass(frozen=True) -class MapOp(UnaryOp): - name = "map_values" - mappings: typing.Tuple[typing.Tuple[typing.Hashable, typing.Hashable], ...] - - def output_type(self, *input_types): - return input_types[0] - - -@dataclasses.dataclass(frozen=True) -class ToDatetimeOp(UnaryOp): - name: typing.ClassVar[str] = "to_datetime" - format: typing.Optional[str] = None - unit: typing.Optional[str] = None - - def output_type(self, *input_types): - if input_types[0] not in ( - dtypes.FLOAT_DTYPE, - dtypes.INT_DTYPE, - dtypes.STRING_DTYPE, - ): - raise TypeError("expected string or numeric input") - return pd.ArrowDtype(pa.timestamp("us", tz=None)) - - -@dataclasses.dataclass(frozen=True) -class ToTimestampOp(UnaryOp): - name: typing.ClassVar[str] = "to_timestamp" - format: typing.Optional[str] = None - unit: typing.Optional[str] = None - - def output_type(self, *input_types): - # Must be numeric or string - if input_types[0] not in ( - dtypes.FLOAT_DTYPE, - dtypes.INT_DTYPE, - dtypes.STRING_DTYPE, - ): - raise TypeError("expected string or numeric input") - return pd.ArrowDtype(pa.timestamp("us", tz="UTC")) - - -@dataclasses.dataclass(frozen=True) -class StrftimeOp(UnaryOp): - name: typing.ClassVar[str] = "strftime" - date_format: str - - def output_type(self, *input_types): - return dtypes.STRING_DTYPE - - -@dataclasses.dataclass(frozen=True) -class FloorDtOp(UnaryOp): - name: typing.ClassVar[str] = "floor_dt" - freq: str - - def output_type(self, *input_types): - return input_types[0] - - -@dataclasses.dataclass(frozen=True) -class DatetimeToIntegerLabelOp(BinaryOp): - name: typing.ClassVar[str] = "datetime_to_integer_label" - freq: DateOffset - closed: typing.Optional[typing.Literal["right", "left"]] - origin: Union[ - Union[pd.Timestamp, datetime.datetime, np.datetime64, int, float, str], - typing.Literal["epoch", "start", "start_day", "end", "end_day"], - ] - - def output_type(self, *input_types): - return dtypes.INT_DTYPE - - -@dataclasses.dataclass(frozen=True) -class IntegerLabelToDatetimeOp(BinaryOp): - name: typing.ClassVar[str] = "integer_label_to_datetime" - freq: DateOffset - label: typing.Optional[typing.Literal["right", "left"]] - origin: Union[ - Union[pd.Timestamp, datetime.datetime, np.datetime64, int, float, str], - typing.Literal["epoch", "start", "start_day", "end", "end_day"], - ] - - def output_type(self, *input_types): - return input_types[1] - - -## Array Ops -@dataclasses.dataclass(frozen=True) -class ArrayToStringOp(UnaryOp): - name: typing.ClassVar[str] = "array_to_string" - delimiter: str - - def output_type(self, *input_types): - input_type = input_types[0] - if not dtypes.is_array_string_like(input_type): - raise TypeError("Input type must be an array of string type.") - return dtypes.STRING_DTYPE - - -@dataclasses.dataclass(frozen=True) -class ArrayIndexOp(UnaryOp): - name: typing.ClassVar[str] = "array_index" - index: int - - def output_type(self, *input_types): - input_type = input_types[0] - if dtypes.is_string_like(input_type): - return dtypes.STRING_DTYPE - elif dtypes.is_array_like(input_type): - return dtypes.arrow_dtype_to_bigframes_dtype( - input_type.pyarrow_dtype.value_type - ) - else: - raise TypeError("Input type must be an array or string-like type.") - - -@dataclasses.dataclass(frozen=True) -class ArraySliceOp(UnaryOp): - name: typing.ClassVar[str] = "array_slice" - start: int - stop: typing.Optional[int] = None - step: typing.Optional[int] = None - - def output_type(self, *input_types): - input_type = input_types[0] - if dtypes.is_string_like(input_type): - return dtypes.STRING_DTYPE - elif dtypes.is_array_like(input_type): - return input_type - else: - raise TypeError("Input type must be an array or string-like type.") - - -## JSON Ops -@dataclasses.dataclass(frozen=True) -class JSONExtract(UnaryOp): - name: typing.ClassVar[str] = "json_extract" - json_path: str - - def output_type(self, *input_types): - input_type = input_types[0] - if not dtypes.is_json_like(input_type): - raise TypeError( - "Input type must be an valid JSON object or JSON-formatted string type." - + f" Received type: {input_type}" - ) - return input_type - - -@dataclasses.dataclass(frozen=True) -class JSONExtractArray(UnaryOp): - name: typing.ClassVar[str] = "json_extract_array" - json_path: str - - def output_type(self, *input_types): - input_type = input_types[0] - if not dtypes.is_json_like(input_type): - raise TypeError( - "Input type must be an valid JSON object or JSON-formatted string type." - + f" Received type: {input_type}" - ) - return pd.ArrowDtype( - pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(dtypes.STRING_DTYPE)) - ) - - -@dataclasses.dataclass(frozen=True) -class JSONExtractStringArray(UnaryOp): - name: typing.ClassVar[str] = "json_extract_string_array" - json_path: str - - def output_type(self, *input_types): - input_type = input_types[0] - if not dtypes.is_json_like(input_type): - raise TypeError( - "Input type must be an valid JSON object or JSON-formatted string type." - + f" Received type: {input_type}" - ) - return pd.ArrowDtype( - pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(dtypes.STRING_DTYPE)) - ) - - -@dataclasses.dataclass(frozen=True) -class ParseJSON(UnaryOp): - name: typing.ClassVar[str] = "parse_json" - - def output_type(self, *input_types): - input_type = input_types[0] - if input_type != dtypes.STRING_DTYPE: - raise TypeError( - "Input type must be an valid JSON-formatted string type." - + f" Received type: {input_type}" - ) - return dtypes.JSON_DTYPE - - -@dataclasses.dataclass(frozen=True) -class ToJSONString(UnaryOp): - name: typing.ClassVar[str] = "to_json_string" - - def output_type(self, *input_types): - input_type = input_types[0] - if not dtypes.is_json_like(input_type): - raise TypeError( - "Input type must be an valid JSON object or JSON-formatted string type." - + f" Received type: {input_type}" - ) - return dtypes.STRING_DTYPE - - -## Blob Ops -@dataclasses.dataclass(frozen=True) -class ObjGetAccessUrl(UnaryOp): - name: typing.ClassVar[str] = "obj_get_access_url" - mode: str # access mode, e.g. R read, W write, RW read & write - - def output_type(self, *input_types): - return dtypes.JSON_DTYPE - - -# Binary Ops -fillna_op = create_binary_op(name="fillna", type_signature=op_typing.COERCE) -maximum_op = create_binary_op(name="maximum", type_signature=op_typing.COERCE) -minimum_op = create_binary_op(name="minimum", type_signature=op_typing.COERCE) -coalesce_op = create_binary_op(name="coalesce", type_signature=op_typing.COERCE) - - -## Math Ops -@dataclasses.dataclass(frozen=True) -class AddOp(BinaryOp): - name: typing.ClassVar[str] = "add" - - def output_type(self, *input_types): - left_type = input_types[0] - right_type = input_types[1] - if all(map(dtypes.is_string_like, input_types)) and len(set(input_types)) == 1: - # String addition - return input_types[0] - if (left_type is None or dtypes.is_numeric(left_type)) and ( - right_type is None or dtypes.is_numeric(right_type) - ): - # Numeric addition - return dtypes.coerce_to_common(left_type, right_type) - # TODO: Add temporal addition once delta types supported - raise TypeError(f"Cannot add dtypes {left_type} and {right_type}") - - -@dataclasses.dataclass(frozen=True) -class SubOp(BinaryOp): - name: typing.ClassVar[str] = "sub" - - # Note: this is actualyl a vararg op, but we don't model that yet - def output_type(self, *input_types): - left_type = input_types[0] - right_type = input_types[1] - if (left_type is None or dtypes.is_numeric(left_type)) and ( - right_type is None or dtypes.is_numeric(right_type) - ): - # Numeric subtraction - return dtypes.coerce_to_common(left_type, right_type) - # TODO: Add temporal addition once delta types supported - raise TypeError(f"Cannot subtract dtypes {left_type} and {right_type}") - - -@dataclasses.dataclass(frozen=True) -class BinaryRemoteFunctionOp(BinaryOp): - name: typing.ClassVar[str] = "binary_remote_function" - func: typing.Callable - - def output_type(self, *input_types): - # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method - if hasattr(self.func, "output_dtype"): - return self.func.output_dtype - else: - raise AttributeError("output_dtype not defined") - - -@dataclasses.dataclass(frozen=True) -class NaryRemoteFunctionOp(NaryOp): - name: typing.ClassVar[str] = "nary_remote_function" - func: typing.Callable - - def output_type(self, *input_types): - # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method - if hasattr(self.func, "output_dtype"): - return self.func.output_dtype - else: - raise AttributeError("output_dtype not defined") - - -add_op = AddOp() -sub_op = SubOp() -mul_op = create_binary_op(name="mul", type_signature=op_typing.BINARY_NUMERIC) -div_op = create_binary_op(name="div", type_signature=op_typing.BINARY_REAL_NUMERIC) -floordiv_op = create_binary_op(name="floordiv", type_signature=op_typing.BINARY_NUMERIC) -pow_op = create_binary_op(name="pow", type_signature=op_typing.BINARY_NUMERIC) -mod_op = create_binary_op(name="mod", type_signature=op_typing.BINARY_NUMERIC) -arctan2_op = create_binary_op( - name="arctan2", type_signature=op_typing.BINARY_REAL_NUMERIC -) -round_op = create_binary_op(name="round", type_signature=op_typing.BINARY_REAL_NUMERIC) -unsafe_pow_op = create_binary_op( - name="unsafe_pow_op", type_signature=op_typing.BINARY_REAL_NUMERIC -) -# Logical Ops -and_op = create_binary_op(name="and", type_signature=op_typing.LOGICAL) -or_op = create_binary_op(name="or", type_signature=op_typing.LOGICAL) -xor_op = create_binary_op(name="xor", type_signature=op_typing.LOGICAL) - -## Comparison Ops -eq_op = create_binary_op(name="eq", type_signature=op_typing.COMPARISON) -eq_null_match_op = create_binary_op( - name="eq_nulls_match", type_signature=op_typing.COMPARISON -) -ne_op = create_binary_op(name="ne", type_signature=op_typing.COMPARISON) -lt_op = create_binary_op(name="lt", type_signature=op_typing.COMPARISON) -gt_op = create_binary_op(name="gt", type_signature=op_typing.COMPARISON) -le_op = create_binary_op(name="le", type_signature=op_typing.COMPARISON) -ge_op = create_binary_op(name="ge", type_signature=op_typing.COMPARISON) - - -cosine_distance_op = create_binary_op( - name="ml_cosine_distance", type_signature=op_typing.VECTOR_METRIC -) -manhattan_distance_op = create_binary_op( - name="ml_manhattan_distance", type_signature=op_typing.VECTOR_METRIC -) -euclidean_distance_op = create_binary_op( - name="ml_euclidean_distance", type_signature=op_typing.VECTOR_METRIC -) - - -## String Ops -@dataclasses.dataclass(frozen=True) -class StrConcatOp(BinaryOp): - name: typing.ClassVar[str] = "str_concat" - - # Note: this is actualyl a vararg op, but we don't model that yet - def output_type(self, *input_types): - if not all(map(dtypes.is_string_like, input_types)): - raise TypeError("string concat requires string-like arguments") - if len(set(input_types)) != 1: - raise TypeError("string concat requires like-typed arguments") - return input_types[0] - - -strconcat_op = StrConcatOp() - - -## JSON Ops -@dataclasses.dataclass(frozen=True) -class JSONSet(BinaryOp): - name: typing.ClassVar[str] = "json_set" - json_path: str - - def output_type(self, *input_types): - left_type = input_types[0] - right_type = input_types[1] - if not dtypes.is_json_like(left_type): - raise TypeError( - "Input type must be an valid JSON object or JSON-formatted string type." - + f" Received type: {left_type}" - ) - if not dtypes.is_json_encoding_type(right_type): - raise TypeError( - "The value to be assigned must be a type that can be encoded as JSON." - + f"Received type: {right_type}" - ) - - # After JSON type implementation, ONLY return JSON data. - return left_type - - -## Blob Ops -@dataclasses.dataclass(frozen=True) -class ObjMakeRef(BinaryOp): - name: typing.ClassVar[str] = "obj.make_ref" - - def output_type(self, *input_types): - if not all(map(dtypes.is_string_like, input_types)): - raise TypeError("obj.make_ref requires string-like arguments") - - return dtypes.OBJ_REF_DTYPE - - -obj_make_ref_op = ObjMakeRef() - - -# Ternary Ops -@dataclasses.dataclass(frozen=True) -class WhereOp(TernaryOp): - name: typing.ClassVar[str] = "where" - - def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - if input_types[1] != dtypes.BOOL_DTYPE: - raise TypeError("where condition must be a boolean") - return dtypes.coerce_to_common(input_types[0], input_types[2]) - - -where_op = WhereOp() - - -@dataclasses.dataclass(frozen=True) -class ClipOp(TernaryOp): - name: typing.ClassVar[str] = "clip" - - def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - return dtypes.coerce_to_common( - input_types[0], dtypes.coerce_to_common(input_types[1], input_types[2]) - ) - - -clip_op = ClipOp() - - -class CaseWhenOp(NaryOp): - name: typing.ClassVar[str] = "switch" - - def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - assert len(input_types) % 2 == 0 - # predicate1, output1, predicate2, output2... - if not all(map(lambda x: x == dtypes.BOOL_DTYPE, input_types[::2])): - raise TypeError(f"Case inputs {input_types[::2]} must be boolean-valued") - output_expr_types = input_types[1::2] - return functools.reduce( - lambda t1, t2: dtypes.coerce_to_common(t1, t2), - output_expr_types, - ) - - -case_when_op = CaseWhenOp() - - -@dataclasses.dataclass(frozen=True) -class StructOp(NaryOp): - name: typing.ClassVar[str] = "struct" - column_names: tuple[str] - - def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - num_input_types = len(input_types) - # value1, value2, ... - assert num_input_types == len(self.column_names) - fields = [] - - for i in range(num_input_types): - arrow_type = dtypes.bigframes_dtype_to_arrow_dtype(input_types[i]) - fields.append( - pa.field( - self.column_names[i], - arrow_type, - nullable=(not pa.types.is_list(arrow_type)), - ) - ) - return pd.ArrowDtype( - pa.struct(fields) - ) # [(name1, value1), (name2, value2), ...] - - -# Just parameterless unary ops for now -# TODO: Parameter mappings -NUMPY_TO_OP: dict[np.ufunc, UnaryOp] = { - np.sin: sin_op, - np.cos: cos_op, - np.tan: tan_op, - np.arcsin: arcsin_op, - np.arccos: arccos_op, - np.arctan: arctan_op, - np.sinh: sinh_op, - np.cosh: cosh_op, - np.tanh: tanh_op, - np.arcsinh: arcsinh_op, - np.arccosh: arccosh_op, - np.arctanh: arctanh_op, - np.exp: exp_op, - np.log: ln_op, - np.log10: log10_op, - np.sqrt: sqrt_op, - np.abs: abs_op, - np.floor: floor_op, - np.ceil: ceil_op, - np.log1p: log1p_op, - np.expm1: expm1_op, -} - - -NUMPY_TO_BINOP: dict[np.ufunc, BinaryOp] = { - np.add: add_op, - np.subtract: sub_op, - np.multiply: mul_op, - np.divide: div_op, - np.power: pow_op, - np.arctan2: arctan2_op, - np.maximum: maximum_op, - np.minimum: minimum_op, -} +from bigframes.operations.struct_ops import StructFieldOp, StructOp +from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op + +__all__ = [ + # Base ops + "RowOp", + "NaryOp", + "UnaryOp", + "BinaryOp", + "TernaryOp", + "ScalarOp", + # Generic ops + "AsTypeOp", + "case_when_op", + "CaseWhenOp", + "clip_op", + "coalesce_op", + "fillna_op", + "hash_op", + "invert_op", + "IsInOp", + "isnull_op", + "MapOp", + "maximum_op", + "minimum_op", + "notnull_op", + "RowKey", + "SqlScalarOp", + "where_op", + # String ops + "capitalize_op", + "EndsWithOp", + "isalnum_op", + "isalpha_op", + "isdecimal_op", + "isdigit_op", + "islower_op", + "isnumeric_op", + "isspace_op", + "isupper_op", + "len_op", + "lower_op", + "lstrip_op", + "RegexReplaceStrOp", + "ReplaceStrOp", + "reverse_op", + "rstrip_op", + "StartsWithOp", + "strconcat_op", + "StrContainsOp", + "StrContainsRegexOp", + "StrExtractOp", + "StrFindOp", + "StrGetOp", + "StringSplitOp", + "strip_op", + "StrPadOp", + "StrRepeatOp", + "StrSliceOp", + "upper_op", + "ZfillOp", + # Date ops + "day_op", + "month_op", + "year_op", + "dayofweek_op", + "quarter_op", + # Time ops + "hour_op", + "minute_op", + "second_op", + "normalize_op", + # Datetime ops + "date_op", + "time_op", + "ToDatetimeOp", + "ToTimestampOp", + "StrftimeOp", + "UnixMicros", + "UnixMillis", + "UnixSeconds", + # Numeric ops + "abs_op", + "add_op", + "arccos_op", + "arccosh_op", + "arcsin_op", + "arcsinh_op", + "arctan2_op", + "arctan_op", + "arctanh_op", + "ceil_op", + "cos_op", + "cosh_op", + "div_op", + "exp_op", + "expm1_op", + "floor_op", + "floordiv_op", + "ln_op", + "log1p_op", + "log10_op", + "mod_op", + "mul_op", + "neg_op", + "pos_op", + "pow_op", + "round_op", + "sin_op", + "sinh_op", + "sqrt_op", + "sub_op", + "tan_op", + "tanh_op", + "unsafe_pow_op", + # Array ops + "ArrayIndexOp", + "ArraySliceOp", + "ArrayToStringOp", + # Blob ops + "ObjGetAccessUrl", + "obj_make_ref_op", + "obj_fetch_metadata_op", + # Struct ops + "StructFieldOp", + "StructOp", + # Remote Functions ops + "BinaryRemoteFunctionOp", + "NaryRemoteFunctionOp", + "RemoteFunctionOp", + # Frequency ops + "DatetimeToIntegerLabelOp", + "FloorDtOp", + "IntegerLabelToDatetimeOp", + # JSON ops + "JSONExtract", + "JSONExtractArray", + "JSONExtractStringArray", + "JSONSet", + "JSONValue", + "ParseJSON", + "ToJSONString", + # Bool ops + "and_op", + "or_op", + "xor_op", + # Comparison ops + "eq_null_match_op", + "eq_op", + "ge_op", + "gt_op", + "le_op", + "lt_op", + "ne_op", + # Distance ops + "cosine_distance_op", + "euclidean_distance_op", + "manhattan_distance_op", + # Geo ops + "geo_x_op", + "geo_y_op", + # Numpy ops mapping + "NUMPY_TO_BINOP", + "NUMPY_TO_OP", +] diff --git a/bigframes/operations/array_ops.py b/bigframes/operations/array_ops.py new file mode 100644 index 0000000000..c1e644fc11 --- /dev/null +++ b/bigframes/operations/array_ops.py @@ -0,0 +1,65 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import typing + +from bigframes import dtypes +from bigframes.operations import base_ops + + +@dataclasses.dataclass(frozen=True) +class ArrayToStringOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "array_to_string" + delimiter: str + + def output_type(self, *input_types): + input_type = input_types[0] + if not dtypes.is_array_string_like(input_type): + raise TypeError("Input type must be an array of string type.") + return dtypes.STRING_DTYPE + + +@dataclasses.dataclass(frozen=True) +class ArrayIndexOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "array_index" + index: int + + def output_type(self, *input_types): + input_type = input_types[0] + if dtypes.is_string_like(input_type): + return dtypes.STRING_DTYPE + elif dtypes.is_array_like(input_type): + return dtypes.arrow_dtype_to_bigframes_dtype( + input_type.pyarrow_dtype.value_type + ) + else: + raise TypeError("Input type must be an array or string-like type.") + + +@dataclasses.dataclass(frozen=True) +class ArraySliceOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "array_slice" + start: int + stop: typing.Optional[int] = None + step: typing.Optional[int] = None + + def output_type(self, *input_types): + input_type = input_types[0] + if dtypes.is_string_like(input_type): + return dtypes.STRING_DTYPE + elif dtypes.is_array_like(input_type): + return input_type + else: + raise TypeError("Input type must be an array or string-like type.") diff --git a/bigframes/operations/base_ops.py b/bigframes/operations/base_ops.py new file mode 100644 index 0000000000..0308283ad4 --- /dev/null +++ b/bigframes/operations/base_ops.py @@ -0,0 +1,197 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import dataclasses +import typing + +from bigframes import dtypes +import bigframes.operations.type as op_typing + +if typing.TYPE_CHECKING: + # Avoids circular dependency + import bigframes.core.expression + + +class RowOp(typing.Protocol): + @property + def name(self) -> str: + ... + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + ... + + @property + def is_monotonic(self) -> bool: + """Whether the row operation preserves total ordering. Can be pruned from ordering expressions.""" + ... + + @property + def is_bijective(self) -> bool: + """Whether the operation has a 1:1 mapping between inputs and outputs""" + ... + + @property + def deterministic(self) -> bool: + """Whether the operation is deterministic" (given deterministic inputs)""" + ... + + +@dataclasses.dataclass(frozen=True) +class ScalarOp: + @property + def name(self) -> str: + raise NotImplementedError("RowOp abstract base class has no implementation") + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + raise NotImplementedError("Abstract operation has no output type") + + @property + def is_monotonic(self) -> bool: + """Whether the row operation preserves total ordering. Can be pruned from ordering expressions.""" + return False + + @property + def is_bijective(self) -> bool: + """Whether the operation has a 1:1 mapping between inputs and outputs""" + return False + + @property + def deterministic(self) -> bool: + """Whether the operation is deterministic" (given deterministic inputs)""" + return True + + +@dataclasses.dataclass(frozen=True) +class NaryOp(ScalarOp): + def as_expr( + self, + *exprs: typing.Union[str, bigframes.core.expression.Expression], + ) -> bigframes.core.expression.Expression: + import bigframes.core.expression + + # Keep this in sync with output_type and compilers + inputs: list[bigframes.core.expression.Expression] = [] + + for expr in exprs: + inputs.append(_convert_expr_input(expr)) + + return bigframes.core.expression.OpExpression( + self, + tuple(inputs), + ) + + +# These classes can be used to create simple ops that don't take local parameters +# All is needed is a unique name, and to register an implementation in ibis_mappings.py +@dataclasses.dataclass(frozen=True) +class UnaryOp(ScalarOp): + @property + def arguments(self) -> int: + return 1 + + def as_expr( + self, input_id: typing.Union[str, bigframes.core.expression.Expression] = "arg" + ) -> bigframes.core.expression.Expression: + import bigframes.core.expression + + return bigframes.core.expression.OpExpression( + self, (_convert_expr_input(input_id),) + ) + + +@dataclasses.dataclass(frozen=True) +class BinaryOp(ScalarOp): + @property + def arguments(self) -> int: + return 2 + + def as_expr( + self, + left_input: typing.Union[str, bigframes.core.expression.Expression] = "arg1", + right_input: typing.Union[str, bigframes.core.expression.Expression] = "arg2", + ) -> bigframes.core.expression.Expression: + import bigframes.core.expression + + return bigframes.core.expression.OpExpression( + self, + ( + _convert_expr_input(left_input), + _convert_expr_input(right_input), + ), + ) + + +@dataclasses.dataclass(frozen=True) +class TernaryOp(ScalarOp): + @property + def arguments(self) -> int: + return 3 + + def as_expr( + self, + input1: typing.Union[str, bigframes.core.expression.Expression] = "arg1", + input2: typing.Union[str, bigframes.core.expression.Expression] = "arg2", + input3: typing.Union[str, bigframes.core.expression.Expression] = "arg3", + ) -> bigframes.core.expression.Expression: + import bigframes.core.expression + + return bigframes.core.expression.OpExpression( + self, + ( + _convert_expr_input(input1), + _convert_expr_input(input2), + _convert_expr_input(input3), + ), + ) + + +def _convert_expr_input( + input: typing.Union[str, bigframes.core.expression.Expression] +) -> bigframes.core.expression.Expression: + """Allows creating column references with just a string""" + import bigframes.core.expression + + if isinstance(input, str): + return bigframes.core.expression.deref(input) + else: + return input + + +# Operation Factories +def create_unary_op(name: str, type_signature: op_typing.UnaryTypeSignature) -> UnaryOp: + return dataclasses.make_dataclass( + name, + [ + ("name", typing.ClassVar[str], name), + ("output_type", typing.ClassVar[typing.Callable], type_signature.as_method), + ], + bases=(UnaryOp,), + frozen=True, + )() + + +def create_binary_op( + name: str, type_signature: op_typing.BinaryTypeSignature +) -> BinaryOp: + return dataclasses.make_dataclass( + name, + [ + ("name", typing.ClassVar[str], name), + ("output_type", typing.ClassVar[typing.Callable], type_signature.as_method), + ], + bases=(BinaryOp,), + frozen=True, + )() diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py index f78db2b6fc..a4de2f80c7 100644 --- a/bigframes/operations/blob.py +++ b/bigframes/operations/blob.py @@ -34,22 +34,159 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + def uri(self) -> bigframes.series.Series: + """URIs of the Blob. + + .. note:: + BigFrames Blob is still under experiments. It may not work and subject to change in the future. + + Returns: + BigFrames Series: URIs as string.""" + s = bigframes.series.Series(self._block) + + return s.struct.field("uri") + + def authorizer(self) -> bigframes.series.Series: + """Authorizers of the Blob. + + .. note:: + BigFrames Blob is still under experiments. It may not work and subject to change in the future. + + Returns: + BigFrames Series: Autorithers(connection) as string.""" + s = bigframes.series.Series(self._block) + + return s.struct.field("authorizer") + + def version(self) -> bigframes.series.Series: + """Versions of the Blob. + + .. note:: + BigFrames Blob is still under experiments. It may not work and subject to change in the future. + + Returns: + BigFrames Series: Version as string.""" + # version must be retrieved after fetching metadata + return self._apply_unary_op(ops.obj_fetch_metadata_op).struct.field("version") + def metadata(self) -> bigframes.series.Series: - """Retrive the metadata of the Blob. + """Retrieve the metadata of the Blob. .. note:: BigFrames Blob is still under experiments. It may not work and subject to change in the future. Returns: - JSON: metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time).""" + BigFrames Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time).""" details_json = self._apply_unary_op(ops.obj_fetch_metadata_op).struct.field( "details" ) import bigframes.bigquery as bbq - return bbq.json_extract(details_json, "$.gcs_metadata") + return bbq.json_extract(details_json, "$.gcs_metadata").rename("metadata") - def display(self, n: int = 3): + def content_type(self) -> bigframes.series.Series: + """Retrieve the content type of the Blob. + + .. note:: + BigFrames Blob is still under experiments. It may not work and subject to change in the future. + + Returns: + BigFrames Series: string of the content type.""" + return ( + self.metadata() + ._apply_unary_op(ops.JSONValue(json_path="$.content_type")) + .rename("content_type") + ) + + def md5_hash(self) -> bigframes.series.Series: + """Retrieve the md5 hash of the Blob. + + .. note:: + BigFrames Blob is still under experiments. It may not work and subject to change in the future. + + Returns: + BigFrames Series: string of the md5 hash.""" + return ( + self.metadata() + ._apply_unary_op(ops.JSONValue(json_path="$.md5_hash")) + .rename("md5_hash") + ) + + def size(self) -> bigframes.series.Series: + """Retrieve the file size of the Blob. + + .. note:: + BigFrames Blob is still under experiments. It may not work and subject to change in the future. + + Returns: + BigFrames Series: file size in bytes.""" + return ( + self.metadata() + ._apply_unary_op(ops.JSONValue(json_path="$.size")) + .rename("size") + .astype("Int64") + ) + + def updated(self) -> bigframes.series.Series: + """Retrieve the updated time of the Blob. + + .. note:: + BigFrames Blob is still under experiments. It may not work and subject to change in the future. + + Returns: + BigFrames Series: updated time as UTC datetime.""" + import bigframes.pandas as bpd + + updated = ( + self.metadata() + ._apply_unary_op(ops.JSONValue(json_path="$.updated")) + .rename("updated") + .astype("Int64") + ) + + return bpd.to_datetime(updated, unit="us", utc=True) + + def _get_runtime( + self, mode: str, with_metadata: bool = False + ) -> bigframes.series.Series: + """Retrieve the ObjectRefRuntime as JSON. + + Args: + mode (str): mode for the URLs, "R" for read, "RW" for read & write. + metadata (bool, default False): whether to fetch the metadata in the ObjectRefRuntime. + + Returns: + bigframes Series: ObjectRefRuntime JSON. + """ + s = self._apply_unary_op(ops.obj_fetch_metadata_op) if with_metadata else self + + return s._apply_unary_op(ops.ObjGetAccessUrl(mode=mode)) + + def read_url(self) -> bigframes.series.Series: + """Retrieve the read URL of the Blob. + + .. note:: + BigFrames Blob is still under experiments. It may not work and subject to change in the future. + + Returns: + BigFrames Series: Read only URLs.""" + return self._get_runtime(mode="R")._apply_unary_op( + ops.JSONValue(json_path="$.access_urls.read_url") + ) + + def write_url(self) -> bigframes.series.Series: + """Retrieve the write URL of the Blob. + + .. note:: + BigFrames Blob is still under experiments. It may not work and subject to change in the future. + + Returns: + BigFrames Series: Writable URLs.""" + return self._get_runtime(mode="RW")._apply_unary_op( + ops.JSONValue(json_path="$.access_urls.write_url") + ) + + def display(self, n: int = 3, *, content_type: str = ""): """Display the blob content in the IPython Notebook environment. Only works for image type now. .. note:: @@ -57,20 +194,35 @@ def display(self, n: int = 3): Args: n (int, default 3): number of sample blob objects to display. + content_type (str, default ""): content type of the blob. If unset, use the blob metadata of the storage. Possible values are "image", "audio" and "video". """ - import bigframes.bigquery as bbq + # col name doesn't matter here. Rename to avoid column name conflicts + df = bigframes.series.Series(self._block).rename("blob_col").head(n).to_frame() - s = bigframes.series.Series(self._block).head(n) + df["read_url"] = df["blob_col"].blob.read_url() - obj_ref_runtime = s._apply_unary_op(ops.ObjGetAccessUrl(mode="R")) - read_urls = bbq.json_extract( - obj_ref_runtime, json_path="$.access_urls.read_url" - ) + if content_type: + df["content_type"] = content_type + else: + df["content_type"] = df["blob_col"].blob.content_type() + + def display_single_url(read_url: str, content_type: str): + content_type = content_type.casefold() - for read_url in read_urls: - read_url = str(read_url).strip('"') - response = requests.get(read_url) - ipy_display.display(ipy_display.Image(response.content)) + if content_type.startswith("image"): + ipy_display.display(ipy_display.Image(url=read_url)) + elif content_type.startswith("audio"): + # using url somehow doesn't work with audios + response = requests.get(read_url) + ipy_display.display(ipy_display.Audio(response.content)) + elif content_type.startswith("video"): + ipy_display.display(ipy_display.Video(url=read_url)) + else: # display as raw data + response = requests.get(read_url) + ipy_display.display(response.content) + + for _, row in df.iterrows(): + display_single_url(row["read_url"], row["content_type"]) def image_blur( self, @@ -116,10 +268,8 @@ def image_blur( connection=connection, ).udf() - src_rt = bigframes.series.Series(self._block)._apply_unary_op( - ops.ObjGetAccessUrl(mode="R") - ) - dst_rt = dst._apply_unary_op(ops.ObjGetAccessUrl(mode="RW")) + src_rt = self._get_runtime(mode="R") + dst_rt = dst.blob._get_runtime(mode="RW") src_rt = src_rt._apply_unary_op(ops.ToJSONString()) dst_rt = dst_rt._apply_unary_op(ops.ToJSONString()) diff --git a/bigframes/operations/blob_ops.py b/bigframes/operations/blob_ops.py new file mode 100644 index 0000000000..b17d1b1215 --- /dev/null +++ b/bigframes/operations/blob_ops.py @@ -0,0 +1,47 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import typing + +from bigframes import dtypes +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +obj_fetch_metadata_op = base_ops.create_unary_op( + name="obj_fetch_metadata", type_signature=op_typing.BLOB_TRANSFORM +) + + +@dataclasses.dataclass(frozen=True) +class ObjGetAccessUrl(base_ops.UnaryOp): + name: typing.ClassVar[str] = "obj_get_access_url" + mode: str # access mode, e.g. R read, W write, RW read & write + + def output_type(self, *input_types): + return dtypes.JSON_DTYPE + + +@dataclasses.dataclass(frozen=True) +class ObjMakeRef(base_ops.BinaryOp): + name: typing.ClassVar[str] = "obj.make_ref" + + def output_type(self, *input_types): + if not all(map(dtypes.is_string_like, input_types)): + raise TypeError("obj.make_ref requires string-like arguments") + + return dtypes.OBJ_REF_DTYPE + + +obj_make_ref_op = ObjMakeRef() diff --git a/bigframes/operations/bool_ops.py b/bigframes/operations/bool_ops.py new file mode 100644 index 0000000000..c8cd08efe5 --- /dev/null +++ b/bigframes/operations/bool_ops.py @@ -0,0 +1,23 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +and_op = base_ops.create_binary_op(name="and", type_signature=op_typing.LOGICAL) + +or_op = base_ops.create_binary_op(name="or", type_signature=op_typing.LOGICAL) + +xor_op = base_ops.create_binary_op(name="xor", type_signature=op_typing.LOGICAL) diff --git a/bigframes/operations/comparison_ops.py b/bigframes/operations/comparison_ops.py new file mode 100644 index 0000000000..b109a85d18 --- /dev/null +++ b/bigframes/operations/comparison_ops.py @@ -0,0 +1,33 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +eq_op = base_ops.create_binary_op(name="eq", type_signature=op_typing.COMPARISON) + +eq_null_match_op = base_ops.create_binary_op( + name="eq_nulls_match", type_signature=op_typing.COMPARISON +) + +ne_op = base_ops.create_binary_op(name="ne", type_signature=op_typing.COMPARISON) + +lt_op = base_ops.create_binary_op(name="lt", type_signature=op_typing.COMPARISON) + +gt_op = base_ops.create_binary_op(name="gt", type_signature=op_typing.COMPARISON) + +le_op = base_ops.create_binary_op(name="le", type_signature=op_typing.COMPARISON) + +ge_op = base_ops.create_binary_op(name="ge", type_signature=op_typing.COMPARISON) diff --git a/bigframes/operations/date_ops.py b/bigframes/operations/date_ops.py new file mode 100644 index 0000000000..2b68a24caf --- /dev/null +++ b/bigframes/operations/date_ops.py @@ -0,0 +1,41 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +day_op = base_ops.create_unary_op( + name="day", + type_signature=op_typing.DATELIKE_ACCESSOR, +) + +month_op = base_ops.create_unary_op( + name="month", + type_signature=op_typing.DATELIKE_ACCESSOR, +) + +year_op = base_ops.create_unary_op( + name="year", + type_signature=op_typing.DATELIKE_ACCESSOR, +) + +dayofweek_op = base_ops.create_unary_op( + name="dayofweek", + type_signature=op_typing.DATELIKE_ACCESSOR, +) + +quarter_op = base_ops.create_unary_op( + name="quarter", + type_signature=op_typing.DATELIKE_ACCESSOR, +) diff --git a/bigframes/operations/datetime_ops.py b/bigframes/operations/datetime_ops.py new file mode 100644 index 0000000000..3ee8a00141 --- /dev/null +++ b/bigframes/operations/datetime_ops.py @@ -0,0 +1,103 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import typing + +import pandas as pd +import pyarrow as pa + +from bigframes import dtypes +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +date_op = base_ops.create_unary_op( + name="date", + type_signature=op_typing.FixedOutputType( + dtypes.is_date_like, dtypes.DATE_DTYPE, description="date-like" + ), +) + +time_op = base_ops.create_unary_op( + name="time", + type_signature=op_typing.FixedOutputType( + dtypes.is_time_like, dtypes.TIME_DTYPE, description="time-like" + ), +) + + +@dataclasses.dataclass(frozen=True) +class ToDatetimeOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "to_datetime" + format: typing.Optional[str] = None + unit: typing.Optional[str] = None + + def output_type(self, *input_types): + if input_types[0] not in ( + dtypes.FLOAT_DTYPE, + dtypes.INT_DTYPE, + dtypes.STRING_DTYPE, + ): + raise TypeError("expected string or numeric input") + return pd.ArrowDtype(pa.timestamp("us", tz=None)) + + +@dataclasses.dataclass(frozen=True) +class ToTimestampOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "to_timestamp" + format: typing.Optional[str] = None + unit: typing.Optional[str] = None + + def output_type(self, *input_types): + # Must be numeric or string + if input_types[0] not in ( + dtypes.FLOAT_DTYPE, + dtypes.INT_DTYPE, + dtypes.STRING_DTYPE, + ): + raise TypeError("expected string or numeric input") + return pd.ArrowDtype(pa.timestamp("us", tz="UTC")) + + +@dataclasses.dataclass(frozen=True) +class StrftimeOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "strftime" + date_format: str + + def output_type(self, *input_types): + return dtypes.STRING_DTYPE + + +@dataclasses.dataclass(frozen=True) +class UnixSeconds(base_ops.UnaryOp): + name: typing.ClassVar[str] = "unix_seconds" + + def output_type(self, *input_types): + return dtypes.INT_DTYPE + + +@dataclasses.dataclass(frozen=True) +class UnixMillis(base_ops.UnaryOp): + name: typing.ClassVar[str] = "unix_millis" + + def output_type(self, *input_types): + return dtypes.INT_DTYPE + + +@dataclasses.dataclass(frozen=True) +class UnixMicros(base_ops.UnaryOp): + name: typing.ClassVar[str] = "unix_micros" + + def output_type(self, *input_types): + return dtypes.INT_DTYPE diff --git a/bigframes/operations/distance_ops.py b/bigframes/operations/distance_ops.py new file mode 100644 index 0000000000..74595b561a --- /dev/null +++ b/bigframes/operations/distance_ops.py @@ -0,0 +1,29 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +cosine_distance_op = base_ops.create_binary_op( + name="ml_cosine_distance", type_signature=op_typing.VECTOR_METRIC +) + +manhattan_distance_op = base_ops.create_binary_op( + name="ml_manhattan_distance", type_signature=op_typing.VECTOR_METRIC +) + +euclidean_distance_op = base_ops.create_binary_op( + name="ml_euclidean_distance", type_signature=op_typing.VECTOR_METRIC +) diff --git a/bigframes/operations/frequency_ops.py b/bigframes/operations/frequency_ops.py new file mode 100644 index 0000000000..2d5a854c32 --- /dev/null +++ b/bigframes/operations/frequency_ops.py @@ -0,0 +1,61 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import datetime +import typing + +import numpy as np +import pandas as pd +from pandas.tseries import offsets + +from bigframes import dtypes +from bigframes.operations import base_ops + + +@dataclasses.dataclass(frozen=True) +class FloorDtOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "floor_dt" + freq: str + + def output_type(self, *input_types): + return input_types[0] + + +@dataclasses.dataclass(frozen=True) +class DatetimeToIntegerLabelOp(base_ops.BinaryOp): + name: typing.ClassVar[str] = "datetime_to_integer_label" + freq: offsets.DateOffset + closed: typing.Optional[typing.Literal["right", "left"]] + origin: typing.Union[ + typing.Union[pd.Timestamp, datetime.datetime, np.datetime64, int, float, str], + typing.Literal["epoch", "start", "start_day", "end", "end_day"], + ] + + def output_type(self, *input_types): + return dtypes.INT_DTYPE + + +@dataclasses.dataclass(frozen=True) +class IntegerLabelToDatetimeOp(base_ops.BinaryOp): + name: typing.ClassVar[str] = "integer_label_to_datetime" + freq: offsets.DateOffset + label: typing.Optional[typing.Literal["right", "left"]] + origin: typing.Union[ + typing.Union[pd.Timestamp, datetime.datetime, np.datetime64, int, float, str], + typing.Literal["epoch", "start", "start_day", "end", "end_day"], + ] + + def output_type(self, *input_types): + return input_types[1] diff --git a/bigframes/operations/generic_ops.py b/bigframes/operations/generic_ops.py new file mode 100644 index 0000000000..ef7e1f5cea --- /dev/null +++ b/bigframes/operations/generic_ops.py @@ -0,0 +1,174 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import functools +import typing + +import pyarrow as pa + +from bigframes import dtypes +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +invert_op = base_ops.create_unary_op( + name="invert", + type_signature=op_typing.TypePreserving( + dtypes.is_binary_like, + description="binary-like", + ), +) + +isnull_op = base_ops.create_unary_op( + name="isnull", + type_signature=op_typing.FixedOutputType( + lambda x: True, dtypes.BOOL_DTYPE, description="nullable" + ), +) + +notnull_op = base_ops.create_unary_op( + name="notnull", + type_signature=op_typing.FixedOutputType( + lambda x: True, dtypes.BOOL_DTYPE, description="nullable" + ), +) + +hash_op = base_ops.create_unary_op( + name="hash", + type_signature=op_typing.FixedOutputType( + dtypes.is_string_like, dtypes.INT_DTYPE, description="string-like" + ), +) + + +@dataclasses.dataclass(frozen=True) +class AsTypeOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "astype" + # TODO: Convert strings to dtype earlier + to_type: typing.Union[dtypes.DtypeString, dtypes.Dtype] + safe: bool = False + + def output_type(self, *input_types): + # TODO: We should do this conversion earlier + if self.to_type == pa.string(): + return dtypes.STRING_DTYPE + if isinstance(self.to_type, str): + return dtypes.BIGFRAMES_STRING_TO_BIGFRAMES[ + typing.cast(dtypes.DtypeString, self.to_type) + ] + return self.to_type + + +@dataclasses.dataclass(frozen=True) +class IsInOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "is_in" + values: typing.Tuple + match_nulls: bool = True + + def output_type(self, *input_types): + return dtypes.BOOL_DTYPE + + +@dataclasses.dataclass(frozen=True) +class MapOp(base_ops.UnaryOp): + name = "map_values" + mappings: typing.Tuple[typing.Tuple[typing.Hashable, typing.Hashable], ...] + + def output_type(self, *input_types): + return input_types[0] + + +fillna_op = base_ops.create_binary_op(name="fillna", type_signature=op_typing.COERCE) + +maximum_op = base_ops.create_binary_op(name="maximum", type_signature=op_typing.COERCE) + +minimum_op = base_ops.create_binary_op(name="minimum", type_signature=op_typing.COERCE) + +coalesce_op = base_ops.create_binary_op( + name="coalesce", type_signature=op_typing.COERCE +) + + +@dataclasses.dataclass(frozen=True) +class WhereOp(base_ops.TernaryOp): + name: typing.ClassVar[str] = "where" + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + if input_types[1] != dtypes.BOOL_DTYPE: + raise TypeError("where condition must be a boolean") + return dtypes.coerce_to_common(input_types[0], input_types[2]) + + +where_op = WhereOp() + + +@dataclasses.dataclass(frozen=True) +class ClipOp(base_ops.TernaryOp): + name: typing.ClassVar[str] = "clip" + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + return dtypes.coerce_to_common( + input_types[0], dtypes.coerce_to_common(input_types[1], input_types[2]) + ) + + +clip_op = ClipOp() + + +class CaseWhenOp(base_ops.NaryOp): + name: typing.ClassVar[str] = "switch" + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + assert len(input_types) % 2 == 0 + # predicate1, output1, predicate2, output2... + if not all(map(lambda x: x == dtypes.BOOL_DTYPE, input_types[::2])): + raise TypeError(f"Case inputs {input_types[::2]} must be boolean-valued") + output_expr_types = input_types[1::2] + return functools.reduce( + lambda t1, t2: dtypes.coerce_to_common(t1, t2), + output_expr_types, + ) + + +case_when_op = CaseWhenOp() + + +# Really doesn't need to be its own op, but allows us to try to get the most compact representation +@dataclasses.dataclass(frozen=True) +class RowKey(base_ops.NaryOp): + name: typing.ClassVar[str] = "rowkey" + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + return dtypes.STRING_DTYPE + + @property + def is_bijective(self) -> bool: + """Whether the operation has a 1:1 mapping between inputs and outputs""" + return True + + @property + def deterministic(self) -> bool: + return False + + +@dataclasses.dataclass(frozen=True) +class SqlScalarOp(base_ops.NaryOp): + """An escape to SQL, representing a single column.""" + + name: typing.ClassVar[str] = "sql_scalar" + _output_type: dtypes.ExpressionType + sql_template: str + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + return self._output_type diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py new file mode 100644 index 0000000000..73e7e89197 --- /dev/null +++ b/bigframes/operations/geo_ops.py @@ -0,0 +1,31 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes import dtypes +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +geo_x_op = base_ops.create_unary_op( + name="geo_x", + type_signature=op_typing.FixedOutputType( + dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like" + ), +) + +geo_y_op = base_ops.create_unary_op( + name="geo_y", + type_signature=op_typing.FixedOutputType( + dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like" + ), +) diff --git a/bigframes/operations/json_ops.py b/bigframes/operations/json_ops.py new file mode 100644 index 0000000000..86c5a19ba7 --- /dev/null +++ b/bigframes/operations/json_ops.py @@ -0,0 +1,137 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import typing + +import pandas as pd +import pyarrow as pa + +from bigframes import dtypes +from bigframes.operations import base_ops + + +@dataclasses.dataclass(frozen=True) +class JSONExtract(base_ops.UnaryOp): + name: typing.ClassVar[str] = "json_extract" + json_path: str + + def output_type(self, *input_types): + input_type = input_types[0] + if not dtypes.is_json_like(input_type): + raise TypeError( + "Input type must be an valid JSON object or JSON-formatted string type." + + f" Received type: {input_type}" + ) + return input_type + + +@dataclasses.dataclass(frozen=True) +class JSONExtractArray(base_ops.UnaryOp): + name: typing.ClassVar[str] = "json_extract_array" + json_path: str + + def output_type(self, *input_types): + input_type = input_types[0] + if not dtypes.is_json_like(input_type): + raise TypeError( + "Input type must be an valid JSON object or JSON-formatted string type." + + f" Received type: {input_type}" + ) + return pd.ArrowDtype( + pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(dtypes.STRING_DTYPE)) + ) + + +@dataclasses.dataclass(frozen=True) +class JSONExtractStringArray(base_ops.UnaryOp): + name: typing.ClassVar[str] = "json_extract_string_array" + json_path: str + + def output_type(self, *input_types): + input_type = input_types[0] + if not dtypes.is_json_like(input_type): + raise TypeError( + "Input type must be an valid JSON object or JSON-formatted string type." + + f" Received type: {input_type}" + ) + return pd.ArrowDtype( + pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(dtypes.STRING_DTYPE)) + ) + + +@dataclasses.dataclass(frozen=True) +class ParseJSON(base_ops.UnaryOp): + name: typing.ClassVar[str] = "parse_json" + + def output_type(self, *input_types): + input_type = input_types[0] + if input_type != dtypes.STRING_DTYPE: + raise TypeError( + "Input type must be an valid JSON-formatted string type." + + f" Received type: {input_type}" + ) + return dtypes.JSON_DTYPE + + +@dataclasses.dataclass(frozen=True) +class ToJSONString(base_ops.UnaryOp): + name: typing.ClassVar[str] = "to_json_string" + + def output_type(self, *input_types): + input_type = input_types[0] + if not dtypes.is_json_like(input_type): + raise TypeError( + "Input type must be an valid JSON object or JSON-formatted string type." + + f" Received type: {input_type}" + ) + return dtypes.STRING_DTYPE + + +@dataclasses.dataclass(frozen=True) +class JSONSet(base_ops.BinaryOp): + name: typing.ClassVar[str] = "json_set" + json_path: str + + def output_type(self, *input_types): + left_type = input_types[0] + right_type = input_types[1] + if not dtypes.is_json_like(left_type): + raise TypeError( + "Input type must be an valid JSON object or JSON-formatted string type." + + f" Received type: {left_type}" + ) + if not dtypes.is_json_encoding_type(right_type): + raise TypeError( + "The value to be assigned must be a type that can be encoded as JSON." + + f"Received type: {right_type}" + ) + + # After JSON type implementation, ONLY return JSON data. + return left_type + + +@dataclasses.dataclass(frozen=True) +class JSONValue(base_ops.UnaryOp): + name: typing.ClassVar[str] = "json_value" + json_path: str + + def output_type(self, *input_types): + input_type = input_types[0] + if not dtypes.is_json_like(input_type): + raise TypeError( + "Input type must be an valid JSON object or JSON-formatted string type." + + f" Received type: {input_type}" + ) + return dtypes.STRING_DTYPE diff --git a/bigframes/operations/numeric_ops.py b/bigframes/operations/numeric_ops.py new file mode 100644 index 0000000000..939330954d --- /dev/null +++ b/bigframes/operations/numeric_ops.py @@ -0,0 +1,174 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import typing + +from bigframes import dtypes +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +sin_op = base_ops.create_unary_op( + name="sin", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +cos_op = base_ops.create_unary_op( + name="cos", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +tan_op = base_ops.create_unary_op( + name="tan", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +arcsin_op = base_ops.create_unary_op( + name="arcsin", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +arccos_op = base_ops.create_unary_op( + name="arccos", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +arctan_op = base_ops.create_unary_op( + name="arctan", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +sinh_op = base_ops.create_unary_op( + name="sinh", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +cosh_op = base_ops.create_unary_op( + name="cosh", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +tanh_op = base_ops.create_unary_op( + name="tanh", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +arcsinh_op = base_ops.create_unary_op( + name="arcsinh", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +arccosh_op = base_ops.create_unary_op( + name="arccosh", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +arctanh_op = base_ops.create_unary_op( + name="arctanh", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +floor_op = base_ops.create_unary_op( + name="floor", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +ceil_op = base_ops.create_unary_op( + name="ceil", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +abs_op = base_ops.create_unary_op(name="abs", type_signature=op_typing.UNARY_NUMERIC) + +pos_op = base_ops.create_unary_op(name="pos", type_signature=op_typing.UNARY_NUMERIC) + +neg_op = base_ops.create_unary_op(name="neg", type_signature=op_typing.UNARY_NUMERIC) + +exp_op = base_ops.create_unary_op( + name="exp", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +expm1_op = base_ops.create_unary_op( + name="expm1", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +ln_op = base_ops.create_unary_op( + name="log", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +log10_op = base_ops.create_unary_op( + name="log10", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +log1p_op = base_ops.create_unary_op( + name="log1p", type_signature=op_typing.UNARY_REAL_NUMERIC +) + +sqrt_op = base_ops.create_unary_op( + name="sqrt", type_signature=op_typing.UNARY_REAL_NUMERIC +) + + +@dataclasses.dataclass(frozen=True) +class AddOp(base_ops.BinaryOp): + name: typing.ClassVar[str] = "add" + + def output_type(self, *input_types): + left_type = input_types[0] + right_type = input_types[1] + if all(map(dtypes.is_string_like, input_types)) and len(set(input_types)) == 1: + # String addition + return input_types[0] + if (left_type is None or dtypes.is_numeric(left_type)) and ( + right_type is None or dtypes.is_numeric(right_type) + ): + # Numeric addition + return dtypes.coerce_to_common(left_type, right_type) + # TODO: Add temporal addition once delta types supported + raise TypeError(f"Cannot add dtypes {left_type} and {right_type}") + + +add_op = AddOp() + + +@dataclasses.dataclass(frozen=True) +class SubOp(base_ops.BinaryOp): + name: typing.ClassVar[str] = "sub" + + # Note: this is actualyl a vararg op, but we don't model that yet + def output_type(self, *input_types): + left_type = input_types[0] + right_type = input_types[1] + if (left_type is None or dtypes.is_numeric(left_type)) and ( + right_type is None or dtypes.is_numeric(right_type) + ): + # Numeric subtraction + return dtypes.coerce_to_common(left_type, right_type) + # TODO: Add temporal addition once delta types supported + raise TypeError(f"Cannot subtract dtypes {left_type} and {right_type}") + + +sub_op = SubOp() + +mul_op = base_ops.create_binary_op(name="mul", type_signature=op_typing.BINARY_NUMERIC) + +div_op = base_ops.create_binary_op( + name="div", type_signature=op_typing.BINARY_REAL_NUMERIC +) + +floordiv_op = base_ops.create_binary_op( + name="floordiv", type_signature=op_typing.BINARY_NUMERIC +) + +pow_op = base_ops.create_binary_op(name="pow", type_signature=op_typing.BINARY_NUMERIC) + +mod_op = base_ops.create_binary_op(name="mod", type_signature=op_typing.BINARY_NUMERIC) + +arctan2_op = base_ops.create_binary_op( + name="arctan2", type_signature=op_typing.BINARY_REAL_NUMERIC +) + +round_op = base_ops.create_binary_op( + name="round", type_signature=op_typing.BINARY_REAL_NUMERIC +) + +unsafe_pow_op = base_ops.create_binary_op( + name="unsafe_pow_op", type_signature=op_typing.BINARY_REAL_NUMERIC +) diff --git a/bigframes/operations/numpy_op_maps.py b/bigframes/operations/numpy_op_maps.py new file mode 100644 index 0000000000..7f3decdfa0 --- /dev/null +++ b/bigframes/operations/numpy_op_maps.py @@ -0,0 +1,55 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +from bigframes.operations import base_ops, generic_ops, numeric_ops + +# Just parameterless unary ops for now +# TODO: Parameter mappings +NUMPY_TO_OP: dict[np.ufunc, base_ops.UnaryOp] = { + np.sin: numeric_ops.sin_op, + np.cos: numeric_ops.cos_op, + np.tan: numeric_ops.tan_op, + np.arcsin: numeric_ops.arcsin_op, + np.arccos: numeric_ops.arccos_op, + np.arctan: numeric_ops.arctan_op, + np.sinh: numeric_ops.sinh_op, + np.cosh: numeric_ops.cosh_op, + np.tanh: numeric_ops.tanh_op, + np.arcsinh: numeric_ops.arcsinh_op, + np.arccosh: numeric_ops.arccosh_op, + np.arctanh: numeric_ops.arctanh_op, + np.exp: numeric_ops.exp_op, + np.log: numeric_ops.ln_op, + np.log10: numeric_ops.log10_op, + np.sqrt: numeric_ops.sqrt_op, + np.abs: numeric_ops.abs_op, + np.floor: numeric_ops.floor_op, + np.ceil: numeric_ops.ceil_op, + np.log1p: numeric_ops.log1p_op, + np.expm1: numeric_ops.expm1_op, +} + + +NUMPY_TO_BINOP: dict[np.ufunc, base_ops.BinaryOp] = { + np.add: numeric_ops.add_op, + np.subtract: numeric_ops.sub_op, + np.multiply: numeric_ops.mul_op, + np.divide: numeric_ops.div_op, + np.power: numeric_ops.pow_op, + np.arctan2: numeric_ops.arctan2_op, + np.maximum: generic_ops.maximum_op, + np.minimum: generic_ops.minimum_op, +} diff --git a/bigframes/operations/remote_function_ops.py b/bigframes/operations/remote_function_ops.py new file mode 100644 index 0000000000..0bced56f8d --- /dev/null +++ b/bigframes/operations/remote_function_ops.py @@ -0,0 +1,80 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import typing + +from bigframes import dtypes +from bigframes.operations import base_ops + + +@dataclasses.dataclass(frozen=True) +class RemoteFunctionOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "remote_function" + func: typing.Callable + apply_on_null: bool + + def output_type(self, *input_types): + # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method + if hasattr(self.func, "output_dtype"): + if dtypes.is_array_like(self.func.output_dtype): + # TODO(b/284515241): remove this special handling to support + # array output types once BQ remote functions support ARRAY. + # Until then, use json serialized strings at the remote function + # level, and parse that to the intended output type at the + # bigframes level. + return dtypes.STRING_DTYPE + return self.func.output_dtype + else: + raise AttributeError("output_dtype not defined") + + +@dataclasses.dataclass(frozen=True) +class BinaryRemoteFunctionOp(base_ops.BinaryOp): + name: typing.ClassVar[str] = "binary_remote_function" + func: typing.Callable + + def output_type(self, *input_types): + # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method + if hasattr(self.func, "output_dtype"): + if dtypes.is_array_like(self.func.output_dtype): + # TODO(b/284515241): remove this special handling to support + # array output types once BQ remote functions support ARRAY. + # Until then, use json serialized strings at the remote function + # level, and parse that to the intended output type at the + # bigframes level. + return dtypes.STRING_DTYPE + return self.func.output_dtype + else: + raise AttributeError("output_dtype not defined") + + +@dataclasses.dataclass(frozen=True) +class NaryRemoteFunctionOp(base_ops.NaryOp): + name: typing.ClassVar[str] = "nary_remote_function" + func: typing.Callable + + def output_type(self, *input_types): + # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method + if hasattr(self.func, "output_dtype"): + if dtypes.is_array_like(self.func.output_dtype): + # TODO(b/284515241): remove this special handling to support + # array output types once BQ remote functions support ARRAY. + # Until then, use json serialized strings at the remote function + # level, and parse that to the intended output type at the + # bigframes level. + return dtypes.STRING_DTYPE + return self.func.output_dtype + else: + raise AttributeError("output_dtype not defined") diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py index a2bf18a41d..3b7a77e5b7 100644 --- a/bigframes/operations/semantics.py +++ b/bigframes/operations/semantics.py @@ -21,12 +21,13 @@ import numpy as np from bigframes import dtypes, exceptions -from bigframes.core import guid +from bigframes.core import guid, log_adapter +@log_adapter.class_logger class Semantics: def __init__(self, df) -> None: - import bigframes + import bigframes # Import in the function body to avoid circular imports. import bigframes.dataframe if not bigframes.options.experiments.semantic_operators: @@ -1099,7 +1100,7 @@ def _validate_model(model): @staticmethod def _confirm_operation(row_count: int): """Raises OperationAbortedError when the confirmation fails""" - import bigframes + import bigframes # Import in the function body to avoid circular imports. threshold = bigframes.options.compute.semantic_ops_confirmation_threshold diff --git a/bigframes/operations/string_ops.py b/bigframes/operations/string_ops.py new file mode 100644 index 0000000000..b2ce0706ce --- /dev/null +++ b/bigframes/operations/string_ops.py @@ -0,0 +1,247 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import typing + +import pandas as pd +import pyarrow as pa + +from bigframes import dtypes +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +len_op = base_ops.create_unary_op( + name="len", + type_signature=op_typing.FixedOutputType( + dtypes.is_iterable, dtypes.INT_DTYPE, description="iterable" + ), +) + +reverse_op = base_ops.create_unary_op( + name="reverse", type_signature=op_typing.STRING_TRANSFORM +) + +lower_op = base_ops.create_unary_op( + name="lower", type_signature=op_typing.STRING_TRANSFORM +) + +upper_op = base_ops.create_unary_op( + name="upper", type_signature=op_typing.STRING_TRANSFORM +) + +strip_op = base_ops.create_unary_op( + name="strip", type_signature=op_typing.STRING_TRANSFORM +) + +isalnum_op = base_ops.create_unary_op( + name="isalnum", type_signature=op_typing.STRING_PREDICATE +) + +isalpha_op = base_ops.create_unary_op( + name="isalpha", type_signature=op_typing.STRING_PREDICATE +) + +isdecimal_op = base_ops.create_unary_op( + name="isdecimal", type_signature=op_typing.STRING_PREDICATE +) + +isdigit_op = base_ops.create_unary_op( + name="isdigit", type_signature=op_typing.STRING_PREDICATE +) + +isnumeric_op = base_ops.create_unary_op( + name="isnumeric", type_signature=op_typing.STRING_PREDICATE +) + +isspace_op = base_ops.create_unary_op( + name="isspace", type_signature=op_typing.STRING_PREDICATE +) + +islower_op = base_ops.create_unary_op( + name="islower", type_signature=op_typing.STRING_PREDICATE +) + +isupper_op = base_ops.create_unary_op( + name="isupper", type_signature=op_typing.STRING_PREDICATE +) + +rstrip_op = base_ops.create_unary_op( + name="rstrip", type_signature=op_typing.STRING_TRANSFORM +) + +lstrip_op = base_ops.create_unary_op( + name="lstrip", type_signature=op_typing.STRING_TRANSFORM +) + +capitalize_op = base_ops.create_unary_op( + name="capitalize", type_signature=op_typing.STRING_TRANSFORM +) + + +@dataclasses.dataclass(frozen=True) +class StrContainsOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_contains" + pat: str + + def output_type(self, *input_types): + return op_typing.STRING_PREDICATE.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StrContainsRegexOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_contains_regex" + pat: str + + def output_type(self, *input_types): + return op_typing.STRING_PREDICATE.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StrGetOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_get" + i: int + + def output_type(self, *input_types): + return op_typing.STRING_TRANSFORM.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StrPadOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_pad" + length: int + fillchar: str + side: typing.Literal["both", "left", "right"] + + def output_type(self, *input_types): + return op_typing.STRING_TRANSFORM.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class ReplaceStrOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_replace" + pat: str + repl: str + + def output_type(self, *input_types): + return op_typing.STRING_TRANSFORM.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class RegexReplaceStrOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_rereplace" + pat: str + repl: str + + def output_type(self, *input_types): + return op_typing.STRING_TRANSFORM.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StartsWithOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_startswith" + pat: typing.Sequence[str] + + def output_type(self, *input_types): + return op_typing.STRING_PREDICATE.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StringSplitOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_split" + pat: typing.Sequence[str] + + def output_type(self, *input_types): + input_type = input_types[0] + if not isinstance(input_type, pd.StringDtype): + raise TypeError("field accessor input must be a string type") + arrow_type = dtypes.bigframes_dtype_to_arrow_dtype(input_type) + return pd.ArrowDtype(pa.list_(arrow_type)) + + +@dataclasses.dataclass(frozen=True) +class EndsWithOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_endswith" + pat: typing.Sequence[str] + + def output_type(self, *input_types): + return op_typing.STRING_PREDICATE.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class ZfillOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_zfill" + width: int + + def output_type(self, *input_types): + return op_typing.STRING_TRANSFORM.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StrFindOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_find" + substr: str + start: typing.Optional[int] + end: typing.Optional[int] + + def output_type(self, *input_types): + signature = op_typing.FixedOutputType( + dtypes.is_string_like, dtypes.INT_DTYPE, "string-like" + ) + return signature.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StrExtractOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_extract" + pat: str + n: int = 1 + + def output_type(self, *input_types): + return op_typing.STRING_TRANSFORM.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StrSliceOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_slice" + start: typing.Optional[int] + end: typing.Optional[int] + + def output_type(self, *input_types): + return op_typing.STRING_TRANSFORM.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StrRepeatOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "str_repeat" + repeats: int + + def output_type(self, *input_types): + return op_typing.STRING_TRANSFORM.output_type(input_types[0]) + + +@dataclasses.dataclass(frozen=True) +class StrConcatOp(base_ops.BinaryOp): + name: typing.ClassVar[str] = "str_concat" + + # Note: this is actualyl a vararg op, but we don't model that yet + def output_type(self, *input_types): + if not all(map(dtypes.is_string_like, input_types)): + raise TypeError("string concat requires string-like arguments") + if len(set(input_types)) != 1: + raise TypeError("string concat requires like-typed arguments") + return input_types[0] + + +strconcat_op = StrConcatOp() diff --git a/bigframes/operations/struct_ops.py b/bigframes/operations/struct_ops.py new file mode 100644 index 0000000000..0926142b17 --- /dev/null +++ b/bigframes/operations/struct_ops.py @@ -0,0 +1,65 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import typing + +import pandas as pd +import pyarrow as pa + +from bigframes import dtypes +from bigframes.operations import base_ops + + +@dataclasses.dataclass(frozen=True) +class StructFieldOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "struct_field" + name_or_index: typing.Union[str, int] + + def output_type(self, *input_types): + input_type = input_types[0] + if not isinstance(input_type, pd.ArrowDtype): + raise TypeError("field accessor input must be a struct type") + + pa_type = input_type.pyarrow_dtype + if not isinstance(pa_type, pa.StructType): + raise TypeError("field accessor input must be a struct type") + + pa_result_type = pa_type[self.name_or_index].type + return dtypes.arrow_dtype_to_bigframes_dtype(pa_result_type) + + +@dataclasses.dataclass(frozen=True) +class StructOp(base_ops.NaryOp): + name: typing.ClassVar[str] = "struct" + column_names: tuple[str] + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + num_input_types = len(input_types) + # value1, value2, ... + assert num_input_types == len(self.column_names) + fields = [] + + for i in range(num_input_types): + arrow_type = dtypes.bigframes_dtype_to_arrow_dtype(input_types[i]) + fields.append( + pa.field( + self.column_names[i], + arrow_type, + nullable=(not pa.types.is_list(arrow_type)), + ) + ) + return pd.ArrowDtype( + pa.struct(fields) + ) # [(name1, value1), (name2, value2), ...] diff --git a/bigframes/operations/time_ops.py b/bigframes/operations/time_ops.py new file mode 100644 index 0000000000..a6a65ad80e --- /dev/null +++ b/bigframes/operations/time_ops.py @@ -0,0 +1,40 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes import dtypes +from bigframes.operations import base_ops +import bigframes.operations.type as op_typing + +hour_op = base_ops.create_unary_op( + name="hour", + type_signature=op_typing.TIMELIKE_ACCESSOR, +) + +minute_op = base_ops.create_unary_op( + name="minute", + type_signature=op_typing.TIMELIKE_ACCESSOR, +) + +second_op = base_ops.create_unary_op( + name="second", + type_signature=op_typing.TIMELIKE_ACCESSOR, +) + +normalize_op = base_ops.create_unary_op( + name="normalize", + type_signature=op_typing.TypePreserving( + dtypes.is_time_like, + description="time-like", + ), +) diff --git a/bigframes/pandas/io/api.py b/bigframes/pandas/io/api.py index 2cb33beb04..454b2e729e 100644 --- a/bigframes/pandas/io/api.py +++ b/bigframes/pandas/io/api.py @@ -325,7 +325,10 @@ def read_parquet( read_parquet.__doc__ = inspect.getdoc(bigframes.session.Session.read_parquet) -def read_gbq_function(function_name: str, is_row_processor: bool = False): +def read_gbq_function( + function_name: str, + is_row_processor: bool = False, +): return global_session.with_default_session( bigframes.session.Session.read_gbq_function, function_name=function_name, diff --git a/bigframes/series.py b/bigframes/series.py index 842962f78a..46847996f1 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1062,7 +1062,9 @@ def mode(self) -> Series: # Approach: Count each value, return each value for which count(x) == max(counts)) block, agg_ids = block.aggregate( by_column_ids=[self._value_column], - aggregations=((self._value_column, agg_ops.count_op),), + aggregations=( + ex.UnaryAggregation(agg_ops.count_op, ex.deref(self._value_column)), + ), ) value_count_col_id = agg_ids[0] block, max_value_count_col_id = block.apply_window_op( @@ -1299,6 +1301,9 @@ def __getattr__(self, key: str): if key == "_block": raise AttributeError(key) elif hasattr(pandas.Series, key): + log_adapter.submit_pandas_labels( + self._block.expr.session.bqclient, self.__class__.__name__, key + ) raise AttributeError( textwrap.dedent( f""" @@ -1344,6 +1349,8 @@ def value_counts( def sort_values( self, *, axis=0, ascending=True, kind: str = "quicksort", na_position="last" ) -> Series: + if axis != 0 and axis != "index": + raise ValueError(f"No axis named {axis} for object type Series") if na_position not in ["first", "last"]: raise ValueError("Param na_position must be one of 'first' or 'last'") block = self._block.order_by( @@ -1358,6 +1365,8 @@ def sort_values( @validations.requires_index def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series: # TODO(tbergeron): Support level parameter once multi-index introduced. + if axis != 0 and axis != "index": + raise ValueError(f"No axis named {axis} for object type Series") if na_position not in ["first", "last"]: raise ValueError("Param na_position must be one of 'first' or 'last'") block = self._block @@ -1513,6 +1522,18 @@ def apply( ops.RemoteFunctionOp(func=func, apply_on_null=True) ) + # if the output is an array, reconstruct it from the json serialized + # string form + if bigframes.dtypes.is_array_like(func.output_dtype): + import bigframes.bigquery as bbq + + result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype( + func.output_dtype.pyarrow_dtype.value_type + ) + result_series = bbq.json_extract_string_array( + result_series, value_dtype=result_dtype + ) + return result_series def combine( @@ -1541,6 +1562,18 @@ def combine( other, ops.BinaryRemoteFunctionOp(func=func) ) + # if the output is an array, reconstruct it from the json serialized + # string form + if bigframes.dtypes.is_array_like(func.output_dtype): + import bigframes.bigquery as bbq + + result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype( + func.output_dtype.pyarrow_dtype.value_type + ) + result_series = bbq.json_extract_string_array( + result_series, value_dtype=result_dtype + ) + return result_series @validations.requires_index @@ -1644,7 +1677,8 @@ def unique(self, keep_order=True) -> Series: return self.drop_duplicates() block, result = self._block.aggregate( [self._value_column], - [(self._value_column, agg_ops.AnyValueOp())], + [ex.UnaryAggregation(agg_ops.AnyValueOp(), ex.deref(self._value_column))], + column_labels=self._block.column_labels, dropna=False, ) return Series(block.select_columns(result).reset_index()) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index d787f8e7f3..1d85967729 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -269,6 +269,7 @@ def __init__( storage_manager=self._temp_storage_manager, default_index_type=self._default_index_type, scan_index_uniqueness=self._strictly_ordered, + force_total_order=self._strictly_ordered, metrics=self._metrics, ) @@ -1250,12 +1251,19 @@ def remote_function( `$ gcloud projects add-iam-policy-binding PROJECT_ID --member="serviceAccount:CONNECTION_SERVICE_ACCOUNT_ID" --role="roles/run.invoker"`. Args: - input_types (type or sequence(type)): + input_types (type or sequence(type), Optional): For scalar user defined function it should be the input type or - sequence of input types. For row processing user defined function, - type `Series` should be specified. - output_type (type): - Data type of the output in the user defined function. + sequence of input types. The supported scalar input types are + `bool`, `bytes`, `float`, `int`, `str`. For row processing user + defined function (i.e. functions that receive a single input + representing a row in form of a Series), type `Series` should be + specified. + output_type (type, Optional): + Data type of the output in the user defined function. If the + user defined function returns an array, then `list[type]` should + be specified. The supported output types are `bool`, `bytes`, + `float`, `int`, `str`, `list[bool]`, `list[float]`, `list[int]` + and `list[str]`. dataset (str, Optional): Dataset in which to create a BigQuery remote function. It should be in `.` or `` format. If this diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py index 6a5ba3f4c7..8fcc36b4d3 100644 --- a/bigframes/session/_io/bigquery/__init__.py +++ b/bigframes/session/_io/bigquery/__init__.py @@ -29,7 +29,6 @@ import google.cloud.bigquery as bigquery import google.cloud.bigquery.table -import bigframes from bigframes.core import log_adapter import bigframes.core.compile.googlesql as googlesql import bigframes.core.sql diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py index 6114427570..ac9523243e 100644 --- a/bigframes/session/_io/bigquery/read_gbq_table.py +++ b/bigframes/session/_io/bigquery/read_gbq_table.py @@ -27,7 +27,6 @@ import google.api_core.exceptions import google.cloud.bigquery as bigquery -import bigframes import bigframes.clients import bigframes.core.compile import bigframes.core.compile.default_ordering diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py index ec922e286d..43faae37c3 100644 --- a/bigframes/session/loader.py +++ b/bigframes/session/loader.py @@ -43,12 +43,14 @@ import bigframes.core.compile import bigframes.core.expression as expression import bigframes.core.guid +import bigframes.core.ordering import bigframes.core.pruning import bigframes.core.schema as schemata import bigframes.dataframe import bigframes.dtypes import bigframes.exceptions import bigframes.formatting_helpers as formatting_helpers +import bigframes.operations import bigframes.operations.aggregations as agg_ops import bigframes.session._io.bigquery as bf_io_bigquery import bigframes.session._io.bigquery.read_gbq_table as bf_read_gbq_table @@ -116,12 +118,14 @@ def __init__( storage_manager: bigframes.session.temp_storage.TemporaryGbqStorageManager, default_index_type: bigframes.enums.DefaultIndexKind, scan_index_uniqueness: bool, + force_total_order: bool, metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None, ): self._bqclient = bqclient self._storage_manager = storage_manager self._default_index_type = default_index_type self._scan_index_uniqueness = scan_index_uniqueness + self._force_total_order = force_total_order self._df_snapshot: Dict[ bigquery.TableReference, Tuple[datetime.datetime, bigquery.Table] ] = {} @@ -439,6 +443,21 @@ def read_gbq_table( primary_key=index_cols if is_index_unique else (), session=self._session, ) + # if we don't have a unique index, we order by row hash if we are in strict mode + if self._force_total_order: + if not is_index_unique: + array_value = array_value.order_by( + [ + bigframes.core.ordering.OrderingExpression( + bigframes.operations.RowKey().as_expr( + *(id for id in array_value.column_ids) + ), + # More concise SQL this way. + na_last=False, + ) + ], + is_total_order=True, + ) # ---------------------------------------------------- # Create Default Sequential Index if still have no index diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py index 960da9f57c..90c638b82e 100644 --- a/bigframes/streaming/dataframe.py +++ b/bigframes/streaming/dataframe.py @@ -23,10 +23,10 @@ from google.cloud import bigquery -import bigframes from bigframes import dataframe from bigframes.core import log_adapter import bigframes.exceptions as bfe +import bigframes.session def _return_type_wrapper(method, cls): @@ -55,7 +55,7 @@ def _curate_df_doc(doc: Optional[str]): class StreamingBase: sql: str - _session: bigframes.Session + _session: bigframes.session.Session def to_bigtable( self, @@ -209,7 +209,7 @@ class StreamingDataFrame(StreamingBase): def __init__(self, df: dataframe.DataFrame, *, create_key=0): if create_key is not StreamingDataFrame._create_key: raise ValueError( - "StreamingDataFrame class shouldn't be created through constructor. Call bigframes.Session.read_gbq_table_streaming method to create." + "StreamingDataFrame class shouldn't be created through constructor. Call bigframes.pandas.read_gbq_table_streaming method to create." ) self._df = df self._df._disable_cache_override = True @@ -279,7 +279,7 @@ def _to_bigtable( instance: str, table: str, service_account_email: Optional[str] = None, - session: Optional[bigframes.Session] = None, + session: Optional[bigframes.session.Session] = None, app_profile: Optional[str] = None, truncate: bool = False, overwrite: bool = False, @@ -311,7 +311,7 @@ def _to_bigtable( Example: accountname@projectname.gserviceaccounts.com If not provided, the user account will be used, but this limits the lifetime of the continuous query. - session (bigframes.Session, default None): + session (bigframes.session.Session, default None): The session object to use for the query. This determines the project id and location of the query. If None, will default to the bigframes global session. @@ -414,7 +414,7 @@ def _to_pubsub( *, topic: str, service_account_email: str, - session: Optional[bigframes.Session] = None, + session: Optional[bigframes.session.Session] = None, job_id: Optional[str] = None, job_id_prefix: Optional[str] = None, ) -> bigquery.QueryJob: @@ -440,7 +440,7 @@ def _to_pubsub( service_account_email (str): Full name of the service account to run the continuous query. Example: accountname@projectname.gserviceaccounts.com - session (bigframes.Session, default None): + session (bigframes.session.Session, default None): The session object to use for the query. This determines the project id and location of the query. If None, will default to the bigframes global session. diff --git a/bigframes/version.py b/bigframes/version.py index 0858c02c1e..50dde36b01 100644 --- a/bigframes/version.py +++ b/bigframes/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "1.32.0" +__version__ = "1.33.0" diff --git a/notebooks/getting_started/bq_dataframes_template.ipynb b/notebooks/getting_started/bq_dataframes_template.ipynb index dab4a7572f..2b47c40397 100644 --- a/notebooks/getting_started/bq_dataframes_template.ipynb +++ b/notebooks/getting_started/bq_dataframes_template.ipynb @@ -158,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": { "id": "PyQmSRbKA8r-" }, @@ -242,11 +242,24 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": { "id": "Vyex9BQI-BNa" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "Query job 9c49a31b-7db6-49e1-b711-42eeebfdf7d3 is DONE. 0 Bytes processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# This is how you read a BigQuery table\n", "df = bpd.read_gbq(\"bigquery-public-data.ml_datasets.penguins\")\n", @@ -264,13 +277,13 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job bbb5d053-39a1-4542-83e1-f8c86ba93e0e is DONE. 28.9 kB processed. Open Job" + "Query job a4004810-9249-4fe3-ab87-7cc33b69808d is DONE. 28.9 kB processed. Open Job" ], "text/plain": [ "" @@ -282,7 +295,7 @@ { "data": { "text/html": [ - "Query job 099f1e48-07b6-481d-95af-e6d6811a54f4 is DONE. 31.7 kB processed. Open Job" + "Query job 09214d4a-8911-41b3-9f14-1c781cb7dc1b is DONE. 31.7 kB processed. Open Job" ], "text/plain": [ "" @@ -323,76 +336,76 @@ " \n", " \n", " \n", - " 104\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", + " 171\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 52.7\n", - " 19.8\n", - " 197.0\n", - " 3725.0\n", + " 41.1\n", + " 19.0\n", + " 182.0\n", + " 3425.0\n", " MALE\n", " \n", " \n", - " 271\n", + " 219\n", " Gentoo penguin (Pygoscelis papua)\n", " Biscoe\n", - " 59.6\n", - " 17.0\n", - " 230.0\n", - " 6050.0\n", - " MALE\n", + " 45.7\n", + " 13.9\n", + " 214.0\n", + " 4400.0\n", + " FEMALE\n", " \n", " \n", - " 146\n", + " 59\n", " Gentoo penguin (Pygoscelis papua)\n", " Biscoe\n", - " 46.6\n", - " 14.2\n", - " 210.0\n", - " 4850.0\n", - " FEMALE\n", - " \n", - " \n", - " 278\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Torgersen\n", - " 44.1\n", - " 18.0\n", - " 210.0\n", - " 4000.0\n", + " 49.9\n", + " 16.1\n", + " 213.0\n", + " 5400.0\n", " MALE\n", " \n", " \n", - " 337\n", + " 132\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Biscoe\n", - " 38.2\n", - " 20.0\n", - " 190.0\n", + " 39.6\n", + " 20.7\n", + " 191.0\n", " 3900.0\n", - " MALE\n", + " FEMALE\n", + " \n", + " \n", + " 223\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 47.3\n", + " 13.8\n", + " 216.0\n", + " 4725.0\n", + " <NA>\n", " \n", " \n", "\n", "" ], "text/plain": [ - " species island culmen_length_mm \\\n", - "104 Chinstrap penguin (Pygoscelis antarctica) Dream 52.7 \n", - "271 Gentoo penguin (Pygoscelis papua) Biscoe 59.6 \n", - "146 Gentoo penguin (Pygoscelis papua) Biscoe 46.6 \n", - "278 Adelie Penguin (Pygoscelis adeliae) Torgersen 44.1 \n", - "337 Adelie Penguin (Pygoscelis adeliae) Biscoe 38.2 \n", + " species island culmen_length_mm \\\n", + "171 Adelie Penguin (Pygoscelis adeliae) Dream 41.1 \n", + "219 Gentoo penguin (Pygoscelis papua) Biscoe 45.7 \n", + "59 Gentoo penguin (Pygoscelis papua) Biscoe 49.9 \n", + "132 Adelie Penguin (Pygoscelis adeliae) Biscoe 39.6 \n", + "223 Gentoo penguin (Pygoscelis papua) Biscoe 47.3 \n", "\n", " culmen_depth_mm flipper_length_mm body_mass_g sex \n", - "104 19.8 197.0 3725.0 MALE \n", - "271 17.0 230.0 6050.0 MALE \n", - "146 14.2 210.0 4850.0 FEMALE \n", - "278 18.0 210.0 4000.0 MALE \n", - "337 20.0 190.0 3900.0 MALE " + "171 19.0 182.0 3425.0 MALE \n", + "219 13.9 214.0 4400.0 FEMALE \n", + "59 16.1 213.0 5400.0 MALE \n", + "132 20.7 191.0 3900.0 FEMALE \n", + "223 13.8 216.0 4725.0 " ] }, - "execution_count": 23, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -441,7 +454,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 4, "metadata": { "id": "YKwCW7Nsavap" }, @@ -449,7 +462,7 @@ { "data": { "text/html": [ - "Query job 747e82a9-d9f9-4448-ab8c-a8ea75e6417d is DONE. 2.7 kB processed. Open Job" + "Query job 788957fa-55af-40af-a17c-913c1d0ec170 is DONE. 2.7 kB processed. Open Job" ], "text/plain": [ "" @@ -462,7 +475,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "average_body_mass: 4201.75438596491\n" + "average_body_mass: 4201.754385964913\n" ] } ], @@ -482,7 +495,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 5, "metadata": { "id": "4PyKMR61-Mjy" }, @@ -490,7 +503,7 @@ { "data": { "text/html": [ - "Query job 505bd504-f1fb-4d23-bad4-5f9f69164fec is DONE. 15.6 kB processed. Open Job" + "Query job 0026583d-b326-4393-82f9-a1d2629fa745 is DONE. 15.6 kB processed. Open Job" ], "text/plain": [ "" @@ -502,7 +515,7 @@ { "data": { "text/html": [ - "Query job 4fb709da-21b5-462e-8df5-394e14470f89 is DONE. 163 Bytes processed. Open Job" + "Query job 30e17097-f818-4e87-a4a1-3eef82bc38be is DONE. 163 Bytes processed. Open Job" ], "text/plain": [ "" @@ -567,7 +580,7 @@ "[3 rows x 1 columns]" ] }, - "execution_count": 25, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -585,6 +598,143 @@ "You can confirm that the calculations were run in BigQuery by clicking \"Open job\" from the previous cells' output. This takes you to the BigQuery console to view the SQL statement and job details." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using SQL functions\n", + "\n", + "The [bigframes.bigquery module](https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.bigquery) provides many [BigQuery SQL functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-all) which may not have a pandas-equivalent." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import bigframes.bigquery" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `bigframes.bigquery.struct()` function creates a new STRUCT Series with subfields for each column in a DataFrames." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Query job 223af6b4-d58d-42c3-b7c9-13a303536e21 is DONE. 31.7 kB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Query job 96fb4ef9-6c12-4cfa-aa2c-16377efed8f3 is DONE. 11.0 kB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "133 {'culmen_length_mm': None, 'culmen_depth_mm': ...\n", + "279 {'culmen_length_mm': 37.9, 'culmen_depth_mm': ...\n", + "34 {'culmen_length_mm': 37.8, 'culmen_depth_mm': ...\n", + "96 {'culmen_length_mm': 37.7, 'culmen_depth_mm': ...\n", + "18 {'culmen_length_mm': 38.8, 'culmen_depth_mm': ...\n", + "dtype: struct[pyarrow]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lengths = bigframes.bigquery.struct(\n", + " df[[\"culmen_length_mm\", \"culmen_depth_mm\", \"flipper_length_mm\"]]\n", + ")\n", + "lengths.peek()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the `bigframes.bigquery.sql_scalar()` function to access arbitrary SQL syntax representing a single column expression." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Query job 81db1901-0704-4af2-8395-4c310e043f30 is DONE. 34.5 kB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Query job dfb40bba-3170-47b8-9ab4-c3ed5ab7550e is DONE. 8.2 kB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "116 \n", + "308 15.5\n", + "285 15.9\n", + "2 16.0\n", + "245 16.1\n", + "dtype: Float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "shortest = bigframes.bigquery.sql_scalar(\n", + " \"LEAST({0}, {1}, {2})\",\n", + " columns=[df['culmen_depth_mm'], df['culmen_length_mm'], df['flipper_length_mm']],\n", + ")\n", + "shortest.peek()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -603,13 +753,13 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job 33027eda-7ca4-4b39-8618-b211d8fe3ee8 is DONE. 28.9 kB processed. Open Job" + "Query job 4420834b-8f6f-46ce-9488-a7ae3960e72b is DONE. 34.5 kB processed. Open Job" ], "text/plain": [ "" @@ -624,13 +774,13 @@ "" ] }, - "execution_count": 50, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -645,13 +795,13 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job 2b3aba71-3b36-425a-835f-6c72fc79950a is DONE. 12.9 kB processed. Open Job" + "Query job 2e9d7f11-c442-4a6b-905f-0f5ac498b399 is DONE. 12.9 kB processed. Open Job" ], "text/plain": [ "" @@ -663,7 +813,7 @@ { "data": { "text/html": [ - "Query job 013405ab-25b3-4ead-8fe7-41974af67752 is DONE. 23.8 kB processed. Open Job" + "Query job 27db4f04-849b-4193-acae-6ecff5f4350f is DONE. 23.8 kB processed. Open Job" ], "text/plain": [ "" @@ -678,13 +828,13 @@ "" ] }, - "execution_count": 75, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAALECAYAAAAW8gpgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAACEdklEQVR4nOzdeVxN+eM/8NdtL3UraaVVSRHKmm0skXVsw1hGqBhG1kFjxr5PnzFirGMLw9gGYyfZhiJF2UOiUPZKSvvvD7/u150bw0x1Op3X8/Ho8XDPOffc123u1Ktz3ud9ZIWFhYUgIiIiEhE1oQMQERERfSoWGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0NoQOUloKCAjx69AgGBgaQyWRCxyEiIqKPUFhYiFevXsHKygpqau8/zlJhC8yjR49gbW0tdAwiIiL6F5KSklCtWrX3rq+wBcbAwADA22+AXC4XOA0RERF9jPT0dFhbWyt+j79PhS0wRaeN5HI5CwwREZHI/NPwDw7iJSIiItFhgSEiIiLRYYEhIiIi0amwY2A+Vn5+PnJzc4WOQVSiNDU1oa6uLnQMIqJSI9kCU1hYiJSUFKSmpgodhahUGBkZwcLCgvMgEVGFJNkCU1RezMzMoKenxx/yVGEUFhYiMzMTT548AQBYWloKnIiIqORJssDk5+cryouJiYnQcYhKnK6uLgDgyZMnMDMz4+kkIqpwJDmIt2jMi56ensBJiEpP0eebY7yIqCKSZIEpwtNGVJHx801EFZmkCwwRERGJEwsMERERiY4kB/G+j913B8r09e4t6FymrxcSEoKxY8eW+0vHW7VqhXr16iE4OFjoKDh58iRat26Nly9fwsjISOg4RET0//EIDNH/16pVK4wdO1boGERE9BFYYIiIiEh0WGBEpqCgAEFBQXB0dIS2tjZsbGwwd+5cnDx5EjKZTOn0UExMDGQyGe7du1fsvmbMmIF69eph3bp1sLGxgb6+Pr755hvk5+cjKCgIFhYWMDMzw9y5c5Wel5qaCn9/f5iamkIul6NNmzaIjY1V2e+mTZtgZ2cHQ0ND9O3bF69evfpX7zk7OxsTJkxA1apVUalSJTRu3BgnT55UrA8JCYGRkRGOHDkCFxcX6Ovro0OHDkhOTlZsk5eXh9GjR8PIyAgmJiYIDAzEoEGD0L17dwDA4MGDcerUKSxevBgymUzl+xYdHY0GDRpAT08PTZs2RVxc3Edl/7ffY5lMhlWrVqFLly7Q09ODi4sLIiIicOfOHbRq1QqVKlVC06ZNER8f/6++p0REYscxMCIzefJkrF69GosWLULz5s2RnJyMmzdv/uv9xcfH49ChQzh8+DDi4+PxxRdf4O7du6hRowZOnTqF8PBw+Pr6wsvLC40bNwYA9O7dG7q6ujh06BAMDQ2xatUqtG3bFrdu3ULlypUV+92zZw/279+Ply9fok+fPliwYIHKL+qPERAQgOvXr2Pr1q2wsrLC7t270aFDB1y5cgVOTk4AgMzMTPz000/YtGkT1NTU8NVXX2HChAnYvHkzAODHH3/E5s2bsX79eri4uGDx4sXYs2cPWrduDQBYvHgxbt26hdq1a2PWrFkAAFNTU0WJ+eGHH7Bw4UKYmppi+PDh8PX1xdmzZ0vtewwAs2fPxs8//4yff/4ZgYGB6N+/PxwcHDB58mTY2NjA19cXAQEBOHTo0Cd/T4lIHG7UdCnxfbrcvFHi+xTCJx2BmTFjhuKv06KvmjVrKta/efMGI0eOhImJCfT19dGrVy88fvxYaR+JiYno3Lkz9PT0YGZmhokTJyIvL09pm5MnT8LDwwPa2tpwdHRESEjIv3+HFcirV6+wePFiBAUFYdCgQahevTqaN28Of3//f73PgoICrFu3Dq6urujatStat26NuLg4BAcHw9nZGUOGDIGzszNOnDgBADhz5gwiIyOxY8cONGjQAE5OTvjpp59gZGSEnTt3Ku03JCQEtWvXRosWLTBw4ECEhYV9cr7ExESsX78eO3bsQIsWLVC9enVMmDABzZs3x/r16xXb5ebmYuXKlWjQoAE8PDwQEBCg9Hq//PILJk+ejB49eqBmzZpYunSp0qBcQ0NDaGlpQU9PDxYWFrCwsFCavXbu3Ln47LPP4Orqiu+++w7h4eF48+ZNqXyPiwwZMgR9+vRBjRo1EBgYiHv37mHAgAHw9vaGi4sLxowZo3QkiohISj75CEytWrVw7Nix/9uBxv/tYty4cThw4AB27NgBQ0NDBAQEoGfPnoq/VPPz89G5c2dYWFggPDwcycnJ8PHxgaamJubNmwcASEhIQOfOnTF8+HBs3rwZYWFh8Pf3h6WlJby9vf/r+xW1GzduIDs7G23bti2xfdrZ2cHAwEDx2NzcHOrq6lBTU1NaVnRfndjYWGRkZKjcgiErK0vpdMbf92tpaanYx6e4cuUK8vPzUaNGDaXl2dnZShn09PRQvXr1Yl8vLS0Njx8/RqNGjRTr1dXVUb9+fRQUFHxUjjp16ijtG3g7Tb+Njc0/PvdTv8fFvaa5uTkAwM3NTWnZmzdvkJ6eDrlc/lHvg4ioovjkAqOhoQELCwuV5WlpaVi7di22bNmCNm3aAIDicP25c+fQpEkTHD16FNevX8exY8dgbm6OevXqYfbs2QgMDMSMGTOgpaWFlStXwt7eHgsXLgQAuLi44MyZM1i0aJHkC0zR/W2KU/TLsLCwULHsY6aQ19TUVHosk8mKXVb0iz4jIwOWlpbF/uX/7hGND+3jU2RkZEBdXR3R0dEq9/PR19f/4Ou9+734r97df9EMtx/7fj71e/yh1/wvOYiIKpJPHsR7+/ZtWFlZwcHBAQMGDEBiYiKAt4Mcc3Nz4eXlpdi2Zs2asLGxQUREBAAgIiICbm5uir8mAcDb2xvp6em4du2aYpt391G0TdE+3ic7Oxvp6elKXxWNk5MTdHV1iz0VY2pqCgBKA1djYmJKPIOHhwdSUlKgoaEBR0dHpa8qVaqU+Ou5u7sjPz8fT548UXm94op0cQwNDWFubo4LFy4oluXn5+PixYtK22lpaSE/P79E8xMRUen4pALTuHFjhISE4PDhw1ixYgUSEhLQokULvHr1CikpKdDS0lKZ7Mvc3BwpKSkAgJSUFKXyUrS+aN2HtklPT0dWVtZ7s82fPx+GhoaKL2tr6095a6Kgo6ODwMBATJo0CRs3bkR8fDzOnTuHtWvXwtHREdbW1pgxYwZu376NAwcOKI5ilSQvLy94enqie/fuOHr0KO7du4fw8HD88MMPiIqKKvHXq1GjBgYMGAAfHx/s2rULCQkJiIyMxPz583HgwMdPPDhq1CjMnz8ff/75J+Li4jBmzBi8fPlS6X5BdnZ2OH/+PO7du4dnz57xyAYRUTn2SaeQOnbsqPh3nTp10LhxY9ja2mL79u0fPL1RFiZPnozx48crHqenp39yiSnrmXH/jalTp0JDQwPTpk3Do0ePYGlpieHDh0NTUxO///47RowYgTp16qBhw4aYM2cOevfuXaKvL5PJcPDgQfzwww8YMmQInj59CgsLC7Rs2VKleJaU9evXY86cOfj222/x8OFDVKlSBU2aNEGXLl0+eh+BgYFISUmBj48P1NXVMWzYMHh7eyudlpowYQIGDRoEV1dXZGVlISEhoTTeDhERlQBZ4X8cKNCwYUN4eXmhXbt2aNu2rcqU67a2thg7dizGjRuHadOmYe/evUqnNhISEuDg4ICLFy/C3d0dLVu2hIeHh9I08uvXr8fYsWORlpb20bnS09NhaGiItLQ0lQGOb968QUJCAuzt7aGjo/Nv3zqJWEFBAVxcXNCnTx/Mnj1b6Dilgp9zIvGT4mXUH/r9/a7/NJFdRkYG4uPjYWlpifr160NTU1NpfEZcXBwSExPh6ekJAPD09MSVK1eUrrYIDQ2FXC6Hq6urYpu/j/EIDQ1V7IPo37h//z5Wr16NW7du4cqVKxgxYgQSEhLQv39/oaMREdG/8EkFZsKECTh16pRi3EOPHj2grq6Ofv36wdDQEH5+fhg/fjxOnDiB6OhoDBkyBJ6enmjSpAkAoH379nB1dcXAgQMRGxuLI0eOYMqUKRg5ciS0tbUBAMOHD8fdu3cxadIk3Lx5E8uXL8f27dsxbty4kn/3VOYSExOhr6//3q+iQeElTU1NDSEhIWjYsCGaNWuGK1eu4NixY3Bx+W9/3dSqVeu976VoEj0iIip5nzQG5sGDB+jXrx+eP38OU1NTNG/eHOfOnVNcAbNo0SKoqamhV69eyM7Ohre3N5YvX654vrq6Ovbv348RI0bA09MTlSpVwqBBgxQznwKAvb09Dhw4gHHjxmHx4sWoVq0a1qxZI/lLqCsKKyurD14dZWVlVSqva21t/dEz536KgwcPvvdy9dIaE0RERCUwBqa84hgYkjp+zonEj2NgSmkMDBEREZEQWGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdD75btQV2gzDMn69j59ZuCSEhIRg7NixSE1NLdPXLQmtWrVCvXr1lGZoLi0ymQy7d+9G9+7dS/21iIjo3+ERGJKsGTNmoF69ekLHICKif4EFhoiIiESHBUZkCgoKEBQUBEdHR2hra8PGxgZz587FyZMnIZPJlE4PxcTEQCaT4d69e8Xuq+gIxLp162BjYwN9fX188803yM/PR1BQECwsLGBmZoa5c+cqPS81NRX+/v4wNTWFXC5HmzZtEBsbq7LfTZs2wc7ODoaGhujbty9evXr1Ue/x9evX8PHxgb6+PiwtLbFw4UKVbbKzszFhwgRUrVoVlSpVQuPGjXHy5EnF+pCQEBgZGWHPnj1wcnKCjo4OvL29kZSUpFg/c+ZMxMbGQiaTQSaTISQkRPH8Z8+eoUePHtDT04OTkxP27t37UdmL/jscOXIE7u7u0NXVRZs2bfDkyRMcOnQILi4ukMvl6N+/PzIzMxXPa9WqFUaNGoWxY8fC2NgY5ubmWL16NV6/fo0hQ4bAwMAAjo6OOHTo0EflICKq6FhgRGby5MlYsGABpk6diuvXr2PLli3/acr6+Ph4HDp0CIcPH8bvv/+OtWvXonPnznjw4AFOnTqFH3/8EVOmTMH58+cVz+ndu7fiF3J0dDQ8PDzQtm1bvHjxQmm/e/bswf79+7F//36cOnUKCxYs+KhMEydOxKlTp/Dnn3/i6NGjOHnyJC5evKi0TUBAACIiIrB161ZcvnwZvXv3RocOHXD79m3FNpmZmZg7dy42btyIs2fPIjU1FX379gUAfPnll/j2229Rq1YtJCcnIzk5GV9++aXiuTNnzkSfPn1w+fJldOrUCQMGDFB6f/9kxowZWLp0KcLDw5GUlIQ+ffogODgYW7ZswYEDB3D06FH88ssvSs/ZsGEDqlSpgsjISIwaNQojRoxA79690bRpU1y8eBHt27fHwIEDlYoPEZFUscCIyKtXr7B48WIEBQVh0KBBqF69Opo3bw5/f/9/vc+CggKsW7cOrq6u6Nq1K1q3bo24uDgEBwfD2dkZQ4YMgbOzM06cOAEAOHPmDCIjI7Fjxw40aNAATk5O+Omnn2BkZISdO3cq7TckJAS1a9dGixYtMHDgQJW7jBcnIyMDa9euxU8//YS2bdvCzc0NGzZsQF5enmKbxMRErF+/Hjt27ECLFi1QvXp1TJgwAc2bN8f69esV2+Xm5mLp0qXw9PRE/fr1sWHDBoSHhyMyMhK6urrQ19eHhoYGLCwsYGFhAV1dXcVzBw8ejH79+sHR0RHz5s1DRkYGIiMjP/r7OmfOHDRr1gzu7u7w8/PDqVOnsGLFCri7u6NFixb44osvFN/TInXr1sWUKVPg5OSEyZMnQ0dHB1WqVMHQoUPh5OSEadOm4fnz57h8+fJH5yAiqqh4FZKI3LhxA9nZ2Wjbtm2J7dPOzg4GBgaKx+bm5lBXV4eamprSsidPngAAYmNjkZGRARMTE6X9ZGVlIT4+/r37tbS0VOzjQ+Lj45GTk4PGjRsrllWuXBnOzs6Kx1euXEF+fj5q1Kih9Nzs7GylXBoaGmjYsKHicc2aNWFkZIQbN26gUaNGH8xRp04dxb8rVaoEuVz+UfmLe765uTn09PTg4OCgtOzvhejd56irq8PExARubm5KzwHwSTmIiCoqFhgRefcIwd8VFY537835vrskv0tTU1PpsUwmK3ZZQUEBgLdHSCwtLZXGmxQxMjL64H6L9vFfZWRkQF1dHdHR0VBXV1dap6+vXyKv8V/zv/v8f/qefug1/74fACX2fSQiEjOeQhIRJycn6OrqFnsqxtTUFACQnJysWBYTE1PiGTw8PJCSkgINDQ04OjoqfVWpUuU/77969erQ1NRUGnPz8uVL3Lp1S/HY3d0d+fn5ePLkiUoGCwsLxXZ5eXmIiopSPI6Li0NqaipcXN7e3VVLSwv5+fn/OTMREZU9FhgR0dHRQWBgICZNmoSNGzciPj4e586dw9q1a+Ho6Ahra2vMmDEDt2/fxoEDB4q9eue/8vLygqenJ7p3746jR4/i3r17CA8Pxw8//KBUFv4tfX19+Pn5YeLEiTh+/DiuXr2KwYMHK53SqlGjBgYMGAAfHx/s2rULCQkJiIyMxPz583HgwAHFdpqamhg1ahTOnz+P6OhoDB48GE2aNFGcPrKzs0NCQgJiYmLw7NkzZGdn/+f8RERUNngK6V1lPDPuvzF16lRoaGhg2rRpePToESwtLTF8+HBoamri999/x4gRI1CnTh00bNgQc+bMQe/evUv09WUyGQ4ePIgffvgBQ4YMwdOnT2FhYYGWLVv+p6uh3vW///0PGRkZ6Nq1KwwMDPDtt98iLU35v8369esxZ84cfPvtt3j48CGqVKmCJk2aoEuXLopt9PT0EBgYiP79++Phw4do0aIF1q5dq1jfq1cv7Nq1C61bt0ZqairWr1+PwYMHl8h7ICKi0iUrfHfQRAWSnp4OQ0NDpKWlQS6XK6178+YNEhISYG9vDx0dHYESUmkS820TSgo/50Tid6OmS4nv0+XmjRLfZ0n60O/vd/EUEhEREYkOCwyVqcTEROjr67/3KzExUeiIHzR8+PD3Zh8+fLjQ8YiIJIOnkHhovUzl5eW999YGwNuBtRoa5Xdo1pMnT5Cenl7sOrlcDjMzszJO9H78nBOJH08hvf8UUvn9TUEVUtHl12JlZmZWrkoKEZFU8RQSERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOr0J6h9sGtzJ9vSuDrnzycwoLC/H1119j586dePnyJQwNDTF48GAEBwcDeHsZ8tixYzF27NiSDVsKZDIZdu/eje7duwsdBTNmzMCePXtK5QaYRERU8ngERmQOHz6MkJAQ7N+/H8nJyahdu7bS+gsXLmDYsGECpRMHmUyGPXv2CB2DiIj+Ax6BEZn4+HhYWlqiadOmAKAy6ZupqakQsVTk5ORAS0tL6BhERFRB8QiMiAwePBijRo1CYmIiZDIZ7OzsVLaxs7NTnE4C3h5tWLFiBTp27AhdXV04ODhg586divX37t2DTCbD1q1b0bRpU+jo6KB27do4deqU0n6vXr2Kjh07Ql9fH+bm5hg4cCCePXumWN+qVSsEBARg7NixqFKlCry9vT/5/SUlJaFPnz4wMjJC5cqV0a1bN6VZewcPHozu3bvjp59+gqWlJUxMTDBy5Ejk5uYqtklOTkbnzp2hq6sLe3t7bNmyRel7UvQ969GjR7Hfw02bNsHOzg6Ghobo27cvXr169VHZW7VqhVGjRmHs2LEwNjaGubk5Vq9ejdevX2PIkCEwMDCAo6MjDh06pHjOyZMnIZPJcOTIEbi7u0NXVxdt2rTBkydPcOjQIbi4uEAul6N///7IzMz85O8nEVFFxgIjIosXL8asWbNQrVo1JCcn48KFCx/1vKlTp6JXr16IjY3FgAED0LdvX9y4oTyV9MSJE/Htt9/i0qVL8PT0RNeuXfH8+XMAQGpqKtq0aQN3d3dERUXh8OHDePz4Mfr06aO0jw0bNkBLSwtnz57FypUrP+m95ebmwtvbGwYGBvjrr79w9uxZ6Ovro0OHDsjJyVFsd+LECcTHx+PEiRPYsGEDQkJCEBISoljv4+ODR48e4eTJk/jjjz/w66+/4smTJ4r1Rd+z9evXq3wP4+PjsWfPHuzfvx/79+/HqVOnsGDBgo9+Dxs2bECVKlUQGRmJUaNGYcSIEejduzeaNm2Kixcvon379hg4cKBKGZkxYwaWLl2K8PBwRYkLDg7Gli1bcODAARw9ehS//PLLJ30/iYgqOhYYETE0NISBgQHU1dVhYWHx0aeLevfuDX9/f9SoUQOzZ89GgwYNVH4hBgQEoFevXnBxccGKFStgaGiItWvXAgCWLl0Kd3d3zJs3DzVr1oS7uzvWrVuHEydO4NatW4p9ODk5ISgoCM7OznB2dv6k97Zt2zYUFBRgzZo1cHNzg4uLC9avX4/ExEScPHlSsZ2xsTGWLl2KmjVrokuXLujcuTPCwsIAADdv3sSxY8ewevVqNG7cGB4eHlizZg2ysrIUzy/6nhkZGal8DwsKChASEoLatWujRYsWGDhwoGLfH6Nu3bqYMmUKnJycMHnyZOjo6KBKlSoYOnQonJycMG3aNDx//hyXL19Wet6cOXPQrFkzuLu7w8/PD6dOncKKFSvg7u6OFi1a4IsvvsCJEyc+6ftJRFTRcQyMBHh6eqo8/vvVNu9uo6GhgQYNGiiO0sTGxuLEiRPQ19dX2Xd8fDxq1KgBAKhfv/6/zhgbG4s7d+7AwMBAafmbN28QHx+veFyrVi2oq6srHltaWuLKlbdXc8XFxUFDQwMeHh6K9Y6OjjA2Nv6oDHZ2dkqvb2lpqXT05p/UqVNH8W91dXWYmJjAze3/rmwzNzcHAJV9vvs8c3Nz6OnpwcHBQWlZZGTkR+cgIpICFhj6RxkZGejatSt+/PFHlXWWlpaKf1eqVOk/vUb9+vWxefNmlXXvHiXR1NRUWieTyVBQUPCvX/dd/3XfxT3/3WUymQwAVPb5921K8z0SEVUUPIUkAefOnVN57OLi8t5t8vLyEB0drdjGw8MD165dg52dHRwdHZW+/ktpeZeHhwdu374NMzMzldcwNDT8qH04OzsjLy8Ply5dUiy7c+cOXr58qbSdpqYm8vPzSyQ3EREJgwVGAnbs2IF169bh1q1bmD59OiIjIxEQEKC0zbJly7B7927cvHkTI0eOxMuXL+Hr6wsAGDlyJF68eIF+/frhwoULiI+Px5EjRzBkyJASKwIDBgxAlSpV0K1bN/z1119ISEjAyZMnMXr0aDx48OCj9lGzZk14eXlh2LBhiIyMxKVLlzBs2DDo6uoqjn4Ab08VhYWFISUlRaXcEBGROPAU0jv+zcy4YjBz5kxs3boV33zzDSwtLfH777/D1dVVaZsFCxZgwYIFiImJgaOjI/bu3YsqVaoAAKysrHD27FkEBgaiffv2yM7Ohq2tLTp06AA1tZLpwHp6ejh9+jQCAwPRs2dPvHr1ClWrVkXbtm0hl8s/ej8bN26En58fWrZsCQsLC8yfPx/Xrl2Djo6OYpuFCxdi/PjxWL16NapWrap0qTYREYmDrLCwsFDoEKUhPT0dhoaGSEtLU/kF+ObNGyQkJMDe3l7pF1tF9E/T9d+7dw/29va4dOkS6tWrV6bZysKDBw9gbW2NY8eOoW3btkLHKVNS+pwTVVQ3arr880afyOXmjX/eSEAf+v39Lh6BoQrl+PHjyMjIgJubG5KTkzFp0iTY2dmhZcuWQkcjIqISxDEwVCo2b94MfX39Yr9q1apVaq+bm5uL77//HrVq1UKPHj1gamqKkydPqlzZ8ykSExPf+1709fWRmJhYgu+AiIg+Bo/AVHD/dIbQzs7uH7f5Nz7//HM0bty42HX/pUz8E29v7391G4MPsbKy+uBdqq2srEr09YiI6J+xwFCpMDAwUJmUTqw0NDTg6OgodAwiInoHTyERERGR6LDAEBERkeiwwBAREZHosMAQERGR6LDAEBERkejwKqR3lMaMhx/yqbMhtmrVCvXq1UNwcHCJZQgJCcHYsWORmppaYvskIiIqbTwCQ0RERKLDAkNERESiwwIjMnl5eQgICIChoSGqVKmCqVOnKmbSffnyJXx8fGBsbAw9PT107NgRt2/fVnp+SEgIbGxsoKenhx49euD58+eKdffu3YOamhqioqKUnhMcHAxbW1sUFBR8MNvJkychk8lw5MgRuLu7Q1dXF23atMGTJ09w6NAhuLi4QC6Xo3///sjMzFQ87/Dhw2jevDmMjIxgYmKCLl26ID4+XrE+JycHAQEBsLS0hI6ODmxtbTF//nwAb2canjFjBmxsbKCtrQ0rKyuMHj36o76XycnJ6Ny5M3R1dWFvb48tW7bAzs6uRE/RERFR6WCBEZkNGzZAQ0MDkZGRWLx4MX7++WesWbMGADB48GBERUVh7969iIiIQGFhITp16oTc3FwAwPnz5+Hn54eAgADExMSgdevWmDNnjmLfdnZ28PLywvr165Vec/369Rg8eDDU1D7u4zJjxgwsXboU4eHhSEpKQp8+fRAcHIwtW7bgwIEDOHr0KH755RfF9q9fv8b48eMRFRWFsLAwqKmpoUePHorCtGTJEuzduxfbt29HXFwcNm/eDDs7OwDAH3/8gUWLFmHVqlW4ffs29uzZAzc3t4/K6ePjg0ePHuHkyZP4448/8Ouvv+LJkycf9VwiIhIWB/GKjLW1NRYtWgSZTAZnZ2dcuXIFixYtQqtWrbB3716cPXsWTZs2BfD2horW1tbYs2cPevfujcWLF6NDhw6YNGkSAKBGjRoIDw/H4cOHFfv39/fH8OHD8fPPP0NbWxsXL17ElStX8Oeff350xjlz5qBZs2YAAD8/P0yePBnx8fFwcHAAAHzxxRc4ceIEAgMDAQC9evVSev66detgamqK69evo3bt2khMTISTkxOaN28OmUwGW1tbxbaJiYmwsLCAl5cXNDU1YWNjg0aNGv1jxps3b+LYsWO4cOECGjRoAABYs2YNnJycPvp9EhGRcHgERmSaNGkCmUymeOzp6Ynbt2/j+vXr0NDQULqBoomJCZydnXHjxturnW7cuKFyg0VPT0+lx927d4e6ujp2794N4O0pp9atWyuOeHyMOnXqKP5tbm4OPT09RXkpWvbukY7bt2+jX79+cHBwgFwuV7xW0V2eBw8ejJiYGDg7O2P06NE4evSo4rm9e/dGVlYWHBwcMHToUOzevRt5eXn/mDEuLg4aGhrw8PBQLHN0dISxsfFHv08iIhIOCwwp0dLSgo+PD9avX4+cnBxs2bIFvr6+n7SPd+82LZPJVO4+LZPJlMbTdO3aFS9evMDq1atx/vx5nD9/HsDbsS8A4OHhgYSEBMyePRtZWVno06cPvvjiCwBvj0jFxcVh+fLl0NXVxTfffIOWLVsqTpsREVHFxAIjMkW/3IucO3cOTk5OcHV1RV5entL658+fIy4uDq6urgAAFxeXYp//d/7+/jh27BiWL1+OvLw89OzZsxTeiXLGKVOmoG3btnBxccHLly9VtpPL5fjyyy+xevVqbNu2DX/88QdevHgBANDV1UXXrl2xZMkSnDx5EhEREbhy5coHX9fZ2Rl5eXm4dOmSYtmdO3eKfW0iIip/OAZGZBITEzF+/Hh8/fXXuHjxIn755RcsXLgQTk5O6NatG4YOHYpVq1bBwMAA3333HapWrYpu3boBAEaPHo1mzZrhp59+Qrdu3XDkyBGl8S9FXFxc0KRJEwQGBsLX1xe6urql9n6MjY1hYmKCX3/9FZaWlkhMTMR3332ntM3PP/8MS0tLuLu7Q01NDTt27ICFhQWMjIwQEhKC/Px8NG7cGHp6evjtt9+gq6urNE6mODVr1oSXlxeGDRuGFStWQFNTE99++y10dXWVTtEREVH5xALzjk+dGVcIPj4+yMrKQqNGjaCuro4xY8Zg2LBhAN5eLTRmzBh06dIFOTk5aNmyJQ4ePKg4hdOkSROsXr0a06dPx7Rp0+Dl5YUpU6Zg9uzZKq/j5+eH8PDwTz599KnU1NSwdetWjB49GrVr14azszOWLFmCVq1aKbYxMDBAUFAQbt++DXV1dTRs2BAHDx6EmpoajIyMsGDBAowfPx75+flwc3PDvn37YGJi8o+vvXHjRvj5+aFly5awsLDA/Pnzce3aNejo6JTiOyYiopIgKyyaRKSCSU9Ph6GhIdLS0iCXy5XWvXnzBgkJCbC3t+cvq/eYPXs2duzYgcuXLwsdpcw8ePAA1tbWOHbsGNq2bSt0nP+Mn3Mi8SuNW9yU9z/WP/T7+13/aQzMggULIJPJMHbsWMWyN2/eYOTIkTAxMYG+vj569eqFx48fKz0vMTERnTt3hp6eHszMzDBx4kSVK0dOnjwJDw8PaGtrw9HRESEhIf8lKn2kjIwMXL16FUuXLsWoUaOEjlOqjh8/jr179yIhIQHh4eHo27cv7Ozs0LJlS6GjERHRP/jXBebChQtYtWqV0iWzADBu3Djs27cPO3bswKlTp/Do0SOlQaD5+fno3LkzcnJyEB4ejg0bNiAkJATTpk1TbJOQkIDOnTujdevWiImJwdixY+Hv748jR47827j0kQICAlC/fn20atVK5fTR8OHDoa+vX+zX8OHDBUpcvL/++uu9WfX19QEAubm5+P7771GrVi306NEDpqamOHnypMpVU0REVP78q1NIGRkZ8PDwwPLlyzFnzhzFHZLT0tJgamqKLVu2KC5zvXnzJlxcXBAREYEmTZrg0KFD6NKlCx49egRzc3MAwMqVKxEYGIinT59CS0sLgYGBOHDgAK5evap4zb59+yI1NbXYQafF4SmkkvfkyROkp6cXu04ul8PMzKyME71fVlYWHj58+N71jo6OZZhGGPycE4kfTyG9/xTSvxrEO3LkSHTu3BleXl5KU9FHR0cjNzcXXl5eimU1a9aEjY2NosBERETAzc1NUV4AwNvbGyNGjMC1a9fg7u6OiIgIpX0UbfPuqaq/y87ORnZ2tuLx+37R0r9nZmZWrkrKh+jq6kqipBARSdUnF5itW7fi4sWLuHDhgsq6lJQUaGlpwcjISGm5ubk5UlJSFNu8W16K1het+9A26enpyMrKKvay3vnz52PmzJmf9F4q6PhlIgD8fBNRxfZJY2CSkpIwZswYbN68udwdkp48eTLS0tIUX0lJSe/dtmiMw7t3RCaqaIo+3xzTQ0QV0ScdgYmOjsaTJ0+U7h+Tn5+P06dPY+nSpThy5AhycnKQmpqqdBTm8ePHsLCwAABYWFggMjJSab9FVym9u83fr1x6/Pgx5HL5eydV09bWhra29ke9D3V1dRgZGSnux6Onp8fJy6jCKCwsRGZmJp48eQIjIyOoq6sLHYmIqMR9UoFp27atyhTtQ4YMQc2aNREYGAhra2toamoiLCxMcYfhuLg4JCYmKm4a6Onpiblz5+LJkyeK8RShoaGQy+WKKe89PT1x8OBBpdcJDQ1VufHgf1FUlt69qSBRRWJkZKT4nBMRVTSfVGAMDAxQu3ZtpWWVKlWCiYmJYrmfnx/Gjx+PypUrQy6XY9SoUfD09ESTJk0AAO3bt4erqysGDhyIoKAgpKSkYMqUKRg5cqTiCMrw4cOxdOlSTJo0Cb6+vjh+/Di2b9+OAwcOlMR7BvD2hoKWlpYwMzPjjf+owtHU1OSRFyKq0Er8VgKLFi2CmpoaevXqhezsbHh7e2P58uWK9erq6ti/fz9GjBgBT09PVKpUCYMGDcKsWbMU29jb2+PAgQMYN24cFi9ejGrVqmHNmjXw9vYu6bhQV1fnD3oiIiKRkeStBIiIiMSA88CU0q0EiIiIiITAAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiU+I3cyQiEquSvu9Meb/nDJGY8QgMERERiQ6PwJAgpHiHVSIiKjk8AkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREosMCQ0RERKLDAkNERESiwwJDREREovNJBWbFihWoU6cO5HI55HI5PD09cejQIcX6N2/eYOTIkTAxMYG+vj569eqFx48fK+0jMTERnTt3hp6eHszMzDBx4kTk5eUpbXPy5El4eHhAW1sbjo6OCAkJ+ffvkIiIiCqcTyow1apVw4IFCxAdHY2oqCi0adMG3bp1w7Vr1wAA48aNw759+7Bjxw6cOnUKjx49Qs+ePRXPz8/PR+fOnZGTk4Pw8HBs2LABISEhmDZtmmKbhIQEdO7cGa1bt0ZMTAzGjh0Lf39/HDlypITeMhEREYmdrLCwsPC/7KBy5cr43//+hy+++AKmpqbYsmULvvjiCwDAzZs34eLigoiICDRp0gSHDh1Cly5d8OjRI5ibmwMAVq5cicDAQDx9+hRaWloIDAzEgQMHcPXqVcVr9O3bF6mpqTh8+PBH50pPT4ehoSHS0tIgl8v/y1ukUnCjpkuJ79Pl5o0S3ydJS0l/LvmZpP9Kij8rP/b3978eA5Ofn4+tW7fi9evX8PT0RHR0NHJzc+Hl5aXYpmbNmrCxsUFERAQAICIiAm5uboryAgDe3t5IT09XHMWJiIhQ2kfRNkX7eJ/s7Gykp6crfREREVHF9MkF5sqVK9DX14e2tjaGDx+O3bt3w9XVFSkpKdDS0oKRkZHS9ubm5khJSQEApKSkKJWXovVF6z60TXp6OrKyst6ba/78+TA0NFR8WVtbf+pbIyIiIpH45ALj7OyMmJgYnD9/HiNGjMCgQYNw/fr10sj2SSZPnoy0tDTFV1JSktCRiIiIqJRofOoTtLS04OjoCACoX78+Lly4gMWLF+PLL79ETk4OUlNTlY7CPH78GBYWFgAACwsLREZGKu2v6Cqld7f5+5VLjx8/hlwuh66u7ntzaWtrQ1tb+1PfDhEREYnQf54HpqCgANnZ2ahfvz40NTURFhamWBcXF4fExER4enoCADw9PXHlyhU8efJEsU1oaCjkcjlcXV0V27y7j6JtivZBRERE9ElHYCZPnoyOHTvCxsYGr169wpYtW3Dy5EkcOXIEhoaG8PPzw/jx41G5cmXI5XKMGjUKnp6eaNKkCQCgffv2cHV1xcCBAxEUFISUlBRMmTIFI0eOVBw9GT58OJYuXYpJkybB19cXx48fx/bt23HgwIGSf/dEREQkSp9UYJ48eQIfHx8kJyfD0NAQderUwZEjR9CuXTsAwKJFi6CmpoZevXohOzsb3t7eWL58ueL56urq2L9/P0aMGAFPT09UqlQJgwYNwqxZsxTb2Nvb48CBAxg3bhwWL16MatWqYc2aNfD29i6ht0xERERi95/ngSmvOA9M+SbFuQ2o/OM8MFTeSPFnZanPA0NEREQkFBYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISnU8qMPPnz0fDhg1hYGAAMzMzdO/eHXFxcUrbvHnzBiNHjoSJiQn09fXRq1cvPH78WGmbxMREdO7cGXp6ejAzM8PEiRORl5entM3Jkyfh4eEBbW1tODo6IiQk5N+9QyIiIqpwPqnAnDp1CiNHjsS5c+cQGhqK3NxctG/fHq9fv1ZsM27cOOzbtw87duzAqVOn8OjRI/Ts2VOxPj8/H507d0ZOTg7Cw8OxYcMGhISEYNq0aYptEhIS0LlzZ7Ru3RoxMTEYO3Ys/P39ceTIkRJ4y0RERCR2ssLCwsJ/++SnT5/CzMwMp06dQsuWLZGWlgZTU1Ns2bIFX3zxBQDg5s2bcHFxQUREBJo0aYJDhw6hS5cuePToEczNzQEAK1euRGBgIJ4+fQotLS0EBgbiwIEDuHr1quK1+vbti9TUVBw+fPijsqWnp8PQ0BBpaWmQy+X/9i1SKblR06XE9+ly80aJ75OkpaQ/l/xM0n8lxZ+VH/v7+z+NgUlLSwMAVK5cGQAQHR2N3NxceHl5KbapWbMmbGxsEBERAQCIiIiAm5uborwAgLe3N9LT03Ht2jXFNu/uo2ibon0UJzs7G+np6UpfREREVDH96wJTUFCAsWPHolmzZqhduzYAICUlBVpaWjAyMlLa1tzcHCkpKYpt3i0vReuL1n1om/T0dGRlZRWbZ/78+TA0NFR8WVtb/9u3RkREROXcvy4wI0eOxNWrV7F169aSzPOvTZ48GWlpaYqvpKQkoSMRERFRKdH4N08KCAjA/v37cfr0aVSrVk2x3MLCAjk5OUhNTVU6CvP48WNYWFgotomMjFTaX9FVSu9u8/crlx4/fgy5XA5dXd1iM2lra0NbW/vfvB0iIiISmU86AlNYWIiAgADs3r0bx48fh729vdL6+vXrQ1NTE2FhYYplcXFxSExMhKenJwDA09MTV65cwZMnTxTbhIaGQi6Xw9XVVbHNu/so2qZoH0RERCRtn3QEZuTIkdiyZQv+/PNPGBgYKMasGBoaQldXF4aGhvDz88P48eNRuXJlyOVyjBo1Cp6enmjSpAkAoH379nB1dcXAgQMRFBSElJQUTJkyBSNHjlQcQRk+fDiWLl2KSZMmwdfXF8ePH8f27dtx4MCBEn77REREJEafdARmxYoVSEtLQ6tWrWBpaan42rZtm2KbRYsWoUuXLujVqxdatmwJCwsL7Nq1S7FeXV0d+/fvh7q6Ojw9PfHVV1/Bx8cHs2bNUmxjb2+PAwcOIDQ0FHXr1sXChQuxZs0aeHt7l8BbJiIiIrH7T/PAlGecB6Z8k+LcBlT+cR4YKm+k+LOyTOaBISIiIhICCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJzicXmNOnT6Nr166wsrKCTCbDnj17lNYXFhZi2rRpsLS0hK6uLry8vHD79m2lbV68eIEBAwZALpfDyMgIfn5+yMjIUNrm8uXLaNGiBXR0dGBtbY2goKBPf3dERERUIX1ygXn9+jXq1q2LZcuWFbs+KCgIS5YswcqVK3H+/HlUqlQJ3t7eePPmjWKbAQMG4Nq1awgNDcX+/ftx+vRpDBs2TLE+PT0d7du3h62tLaKjo/G///0PM2bMwK+//vov3iIRERFVNBqf+oSOHTuiY8eOxa4rLCxEcHAwpkyZgm7dugEANm7cCHNzc+zZswd9+/bFjRs3cPjwYVy4cAENGjQAAPzyyy/o1KkTfvrpJ1hZWWHz5s3IycnBunXroKWlhVq1aiEmJgY///yzUtEhIiIiaSrRMTAJCQlISUmBl5eXYpmhoSEaN26MiIgIAEBERASMjIwU5QUAvLy8oKamhvPnzyu2admyJbS0tBTbeHt7Iy4uDi9fviz2tbOzs5Genq70RURERBVTiRaYlJQUAIC5ubnScnNzc8W6lJQUmJmZKa3X0NBA5cqVlbYpbh/vvsbfzZ8/H4aGhoova2vr//6GiIiIqFyqMFchTZ48GWlpaYqvpKQkoSMRERFRKSnRAmNhYQEAePz4sdLyx48fK9ZZWFjgyZMnSuvz8vLw4sULpW2K28e7r/F32trakMvlSl9ERERUMZVogbG3t4eFhQXCwsIUy9LT03H+/Hl4enoCADw9PZGamoro6GjFNsePH0dBQQEaN26s2Ob06dPIzc1VbBMaGgpnZ2cYGxuXZGQiIiISoU8uMBkZGYiJiUFMTAyAtwN3Y2JikJiYCJlMhrFjx2LOnDnYu3cvrly5Ah8fH1hZWaF79+4AABcXF3To0AFDhw5FZGQkzp49i4CAAPTt2xdWVlYAgP79+0NLSwt+fn64du0atm3bhsWLF2P8+PEl9saJiIhIvD75MuqoqCi0bt1a8bioVAwaNAghISGYNGkSXr9+jWHDhiE1NRXNmzfH4cOHoaOjo3jO5s2bERAQgLZt20JNTQ29evXCkiVLFOsNDQ1x9OhRjBw5EvXr10eVKlUwbdo0XkJNREREAABZYWFhodAhSkN6ejoMDQ2RlpbG8TDl0I2aLiW+T5ebN0p8nyQtJf255GeS/isp/qz82N/fFeYqJCIiIpIOFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0WGCIiIhIdFhgiIiISHQ2hAxAREVUEbhvcSnyf20t8jxUHCwwRiRJ/WRBJGwsMfZSS/mXBXxRERPRfcAwMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYkOCwwRERGJDgsMERERiQ4LDBEREYlOuS4wy5Ytg52dHXR0dNC4cWNERkYKHYmIiIjKgXI7E++2bdswfvx4rFy5Eo0bN0ZwcDC8vb0RFxcHMzMzoeOVGLvvDpT4Pu8t6Fzi+yRpKenPJT+T9F/xZyX9Xbk9AvPzzz9j6NChGDJkCFxdXbFy5Uro6elh3bp1QkcjIiIigZXLIzA5OTmIjo7G5MmTFcvU1NTg5eWFiIiIYp+TnZ2N7OxsxeO0tDQAQHp6eumG/Y8KsjNLfJ/pk+Ulvs9822olur+M/PwS3R9Q/v9bi0lJfy7F8JkESv5zyc9kyRHDz0oxfCaB8v+5LMpXWFj4we3KZYF59uwZ8vPzYW5urrTc3NwcN2/eLPY58+fPx8yZM1WWW1tbl0rG8sywVPZ6o0T31qhE9/b/GZbOO6f/TgyfSaAUPpf8TJZrJf9fRwSfSUA0n8tXr17B8ANZy2WB+TcmT56M8ePHKx4XFBTgxYsXMDExgUwmEzCZ+KWnp8Pa2hpJSUmQy0v+L2miT8XPJJU3/EyWnMLCQrx69QpWVlYf3K5cFpgqVapAXV0djx8/Vlr++PFjWFhYFPscbW1taGtrKy0zMjIqrYiSJJfL+T8mlSv8TFJ5w89kyfjQkZci5XIQr5aWFurXr4+wsDDFsoKCAoSFhcHT01PAZERERFQelMsjMAAwfvx4DBo0CA0aNECjRo0QHByM169fY8iQIUJHIyIiIoGV2wLz5Zdf4unTp5g2bRpSUlJQr149HD58WGVgL5U+bW1tTJ8+XeUUHZFQ+Jmk8oafybInK/yn65SIiIiIyplyOQaGiIiI6ENYYIiIiEh0WGCIiIhIdFhgiIiISHRYYIiIiEh0yu1l1FQ+ZGdn87JAElxCQgL++usv3L9/H5mZmTA1NYW7uzs8PT2ho6MjdDySIH4mhccCQ0oOHTqErVu34q+//kJSUhIKCgpQqVIluLu7o3379hgyZMg/3p+CqKRs3rwZixcvRlRUFMzNzWFlZQVdXV28ePEC8fHx0NHRwYABAxAYGAhbW1uh45IE8DNZfnAeGAIA7N69G4GBgXj16hU6deqERo0aKf2PefXqVfz111+IiIjA4MGDMXv2bJiamgodmyowd3d3aGlpYdCgQejatavKneWzs7MRERGBrVu34o8//sDy5cvRu3dvgdKSFPAzWb6wwBAAwNPTE1OmTEHHjh2hpvb+oVEPHz7EL7/8AnNzc4wbN64ME5LUHDlyBN7e3h+17fPnz3Hv3j3Ur1+/lFORlPEzWb6wwBAREZHocAwMvVdOTg4SEhJQvXp1aGjwo0Llw5s3b5CTk6O0TC6XC5SGiJ9JofAyalKRmZkJPz8/6OnpoVatWkhMTAQAjBo1CgsWLBA4HUlRZmYmAgICYGZmhkqVKsHY2Fjpi6is8TMpPBYYUjF58mTExsbi5MmTSpcDenl5Ydu2bQImI6maOHEijh8/jhUrVkBbWxtr1qzBzJkzYWVlhY0bNwodjySIn0nhcQwMqbC1tcW2bdvQpEkTGBgYIDY2Fg4ODrhz5w48PDyQnp4udESSGBsbG2zcuBGtWrWCXC7HxYsX4ejoiE2bNuH333/HwYMHhY5IEsPPpPB4BIZUPH36FGZmZirLX79+DZlMJkAikroXL17AwcEBwNuxBS9evAAANG/eHKdPnxYyGkkUP5PCY4EhFQ0aNMCBAwcUj4tKy5o1a+Dp6SlULJIwBwcHJCQkAABq1qyJ7du3AwD27dsHIyMjAZORVPEzKTxeWkIq5s2bh44dO+L69evIy8vD4sWLcf36dYSHh+PUqVNCxyMJGjJkCGJjY/HZZ5/hu+++Q9euXbF06VLk5ubi559/FjoeSRA/k8LjGBgqVnx8PBYsWIDY2FhkZGTAw8MDgYGBcHNzEzoaEe7fv4/o6Gg4OjqiTp06Qsch4mdSACwwREREJDo8hUQfxAmaqDwYPXo0HB0dMXr0aKXlS5cuxZ07dxAcHCxMMJKsWbNmfXD9tGnTyiiJdPEIDKnIzMzEpEmTsH37djx//lxlfX5+vgCpSMqqVq2KvXv3qtxX5uLFi/j888/x4MEDgZKRVLm7uys9zs3NRUJCAjQ0NFC9enVcvHhRoGTSwSMwpGLixIk4ceIEVqxYgYEDB2LZsmV4+PAhVq1axZl4SRDPnz+HoaGhynK5XI5nz54JkIik7tKlSyrL0tPTMXjwYPTo0UOARNLDy6hJxb59+7B8+XL06tULGhoaaNGiBaZMmYJ58+Zh8+bNQscjCXJ0dMThw4dVlh86dEgxFweR0ORyOWbOnImpU6cKHUUSeASGVHxogqYRI0YIGY0kavz48QgICMDTp0/Rpk0bAEBYWBgWLlzI8S9UrqSlpSEtLU3oGJLAAkMqiiZosrGxUUzQ1KhRI07QRILx9fVFdnY25s6di9mzZwMA7OzssGLFCvj4+AicjqRoyZIlSo8LCwuRnJyMTZs2oWPHjgKlkhYO4iUVixYtgrq6OkaPHo1jx46ha9euKCwsVEzQNGbMGKEjkoQ9ffoUurq60NfXFzoKSZi9vb3SYzU1NZiamqJNmzaYPHkyDAwMBEomHSww9I84QRMREZU3LDBEVC55eHggLCwMxsbGcHd3/+CNRHnJKgkpKSkJAGBtbS1wEmnhGBgC8PZ87rBhw6Cjo6Nybvfv/j6ZGFFp6NatG7S1tRX/5p3QqTzJy8vDzJkzsWTJEmRkZAAA9PX1MWrUKEyfPh2ampoCJ6z4eASGALw9nxsVFQUTExOVc7vvkslkuHv3bhkmIyIqf0aMGIFdu3Zh1qxZ8PT0BABERERgxowZ6N69O1asWCFwwoqPBYaIyj0HBwdcuHABJiYmSstTU1Ph4eHBUk1lztDQEFu3blW54ujgwYPo168fL6UuA5zIjojKvXv37hV7C4vs7GzeRoAEoa2tDTs7O5Xl9vb20NLSKvtAEsQxMATg7URhH+vnn38uxSRE/2fv3r2Kfx85ckTpdgL5+fkICwv74ClPotISEBCA2bNnY/369YqxWkVzFQUEBAicThp4CokAAK1bt/6o7WQyGY4fP17KaYjeUlN7e5BYJpPh7z+qNDU1YWdnh4ULF6JLly5CxCMJ69GjB8LCwqCtrY26desCAGJjY5GTk4O2bdsqbbtr1y4hIlZ4PAJDAIATJ04IHYFIRUFBAYC3h+UvXLiAKlWqCJyI6C0jIyP06tVLaRkvoy5bPAJD73Xnzh3Ex8ejZcuW0NXVRWFhIS9lJSKicoGDeEnF8+fP0bZtW9SoUQOdOnVCcnIyAMDPzw/ffvutwOlIikaPHl3s/ERLly7F2LFjyz4QEQmOBYZUjBs3DpqamkhMTISenp5i+ZdffonDhw8LmIyk6o8//kCzZs1Uljdt2hQ7d+4UIBERsHPnTvTp0wdNmjSBh4eH0heVPhYYUnH06FH8+OOPqFatmtJyJycn3L9/X6BUJGXPnz9XugKpiFwux7NnzwRIRFK3ZMkSDBkyBObm5rh06RIaNWoEExMT3L17l3ejLiMsMKTi9evXSkdeirx48UJxuSBRWXJ0dCz26N+hQ4fg4OAgQCKSuuXLl+PXX3/FL7/8Ai0tLUyaNAmhoaEYPXo0J7ErI7wKiVS0aNECGzduxOzZswG8vYS1oKAAQUFBH325NVFJGj9+PAICAvD06VO0adMGABAWFoaFCxciODhY2HAkSYmJiWjatCkAQFdXF69evQIADBw4EE2aNMHSpUuFjCcJLDCkIigoCG3btkVUVBRycnIwadIkXLt2DS9evMDZs2eFjkcS5Ovrq5gkrKhY29nZYcWKFfDx8RE4HUmRhYUFXrx4AVtbW9jY2ODcuXOoW7cuEhISVOYsotLBU0ikonbt2rh16xaaN2+Obt264fXr1+jZsycuXbqE6tWrCx2PJCYvLw8bN25Ez5498eDBAzx+/Bjp6em4e/cuywsJpk2bNoqZoocMGYJx48ahXbt2+PLLL9GjRw+B00kD54EhonJPT08PN27cgK2trdBRiAC8nWSxoKAAGhpvT2Rs3boV4eHhcHJywtdff837IZUBFhgCAFy+fPmjt61Tp04pJiFS1apVK4wdOxbdu3cXOgoRlRMcA0MAgHr16inuN/PubLtF/fbdZcXdFZioNH3zzTf49ttv8eDBA9SvXx+VKlVSWs9STUJ4+fIl1q5dixs3bgAAXF1dMWTIEFSuXFngZNLAIzAEAErzu1y6dAkTJkzAxIkT4enpCQCIiIjAwoULERQUxL+CqcwV3dTxXe8WbpZqKmunT5/G559/DrlcjgYNGgAAoqOjkZqain379qFly5YCJ6z4WGBIRaNGjTBjxgx06tRJafnBgwcxdepUREdHC5SMpOqfJlDk2Bgqa25ubvD09MSKFSugrq4O4O3R6W+++Qbh4eG4cuWKwAkrPhYYUqGrq4uLFy/CxcVFafmNGzfg4eGBrKwsgZIREZUPurq6iImJgbOzs9LyuLg41KtXjz8nywDHwJAKFxcXzJ8/H2vWrFGMpM/JycH8+fNVSg1RWbp+/ToSExORk5OjtPzzzz8XKBFJlYeHB27cuKFSYG7cuIG6desKlEpaWGBIxcqVK9G1a1dUq1ZNMTjy8uXLkMlk2Ldvn8DpSIru3r2LHj164MqVK4qxL8D/DS7nGBgqa6NHj8aYMWNw584dNGnSBABw7tw5LFu2DAsWLFC6spODzEsHTyFRsV6/fo3Nmzfj5s2bAN4elenfv7/K1R9EZaFr165QV1fHmjVrYG9vj8jISDx//hzffvstfvrpJ7Ro0ULoiCQxxQ0sfxcHmZc+FhgiKveqVKmC48ePo06dOjA0NERkZCScnZ1x/PhxfPvtt7h06ZLQEUli/mlg+bs4yLx08BQSvRfHG1B5kZ+fDwMDAwBvy8yjR4/g7OwMW1tbxMXFCZyOpIilRHgsMKSC4w2ovKlduzZiY2Nhb2+Pxo0bIygoCFpaWvj111/h4OAgdDwiEgBv5kgqxowZA3t7ezx58gR6enq4du0aTp8+jQYNGuDkyZNCxyMJmjJlCgoKCgAAs2bNQkJCAlq0aIGDBw9i8eLFAqcjIiFwDAyp4HgDEoMXL17A2NhY6TYXRCQdPAJDKoobbwCA4w1IML6+vnj16pXSssqVKyMzMxO+vr4CpSIiIbHAkIqi8QYAFOMNzp49i1mzZnG8AQliw4YNxc5smpWVhY0bNwqQiKQuKSkJDx48UDyOjIzE2LFj8euvvwqYSlpYYEjFh8YbLFmyROB0JCXp6elIS0tDYWEhXr16hfT0dMXXy5cvcfDgQZiZmQkdkySof//+OHHiBAAgJSUF7dq1Q2RkJH744QfMmjVL4HTSwDEw9FE43oCEoKam9sHPnEwmw8yZM/HDDz+UYSoiwNjYGOfOnYOzszOWLFmCbdu24ezZszh69CiGDx+Ou3fvCh2xwuNl1PRRKleuLHQEkqATJ06gsLAQbdq0wR9//KH0OdTS0oKtrS2srKwETEhSlZubC21tbQDAsWPHFPNj1axZE8nJyUJGkwwWGCIqtz777DMAQEJCAqytrf9x+naislKrVi2sXLkSnTt3RmhoKGbPng0AePToEUxMTAROJw08hUREopCamorIyEg8efJEMUariI+Pj0CpSKpOnjyJHj16ID09HYMGDcK6desAAN9//z1u3ryJXbt2CZyw4mOBIaJyb9++fRgwYAAyMjIgl8uVxsXIZDK8ePFCwHQkVfn5+UhPT4exsbFi2b1796Cnp8fB5WWABYaIyr0aNWqgU6dOmDdvHvT09ISOQ0TlAAsMqdiwYQOqVKmCzp07AwAmTZqEX3/9Fa6urvj99995EzMqc5UqVcKVK1c4DxEJysPDA2FhYTA2Noa7u/sHr5C7ePFiGSaTJg7iJRXz5s3DihUrAAARERFYtmwZFi1ahP3792PcuHE8t0tlztvbG1FRUSwwJKhu3boprjzq3r27sGGIR2BIlZ6eHm7evAkbGxsEBgYiOTkZGzduxLVr19CqVSs8ffpU6IgkMWvXrsWsWbMwZMgQuLm5QVNTU2l90SWsRCQdPAJDKvT19fH8+XPY2Njg6NGjGD9+PABAR0en2OnciUrb0KFDAaDYGU5lMhny8/PLOhIRCYwFhlS0a9cO/v7+cHd3x61bt9CpUycAwLVr12BnZydsOJKkv182TSSET5mNnFfGlT4WGFKxbNkyTJkyBUlJSfjjjz8UkzJFR0ejX79+AqcjIhJGcHCw0BHoHRwDQ0Si8Pr1a5w6dQqJiYnIyclRWjd69GiBUhGRUFhgCABw+fJl1K5dG2pqarh8+fIHt61Tp04ZpSJ669KlS+jUqRMyMzPx+vVrVK5cGc+ePVNMGMYb55EQ4uPjsX79esTHx2Px4sUwMzPDoUOHYGNjg1q1agkdr8JjgSEAb+/6m5KSAjMzM8UdgN/9aBQ95oBJEkKrVq1Qo0YNrFy5EoaGhoiNjYWmpia++uorjBkzBj179hQ6IknMqVOn0LFjRzRr1gynT5/GjRs34ODggAULFiAqKgo7d+4UOmKFxwJDAID79+/DxsYGMpkM9+/f/+C2nMiOypqRkRHOnz8PZ2dnGBkZISIiAi4uLjh//jwGDRqEmzdvCh2RJMbT0xO9e/fG+PHjYWBggNjYWDg4OCAyMhI9e/bEgwcPhI5Y4XEQLwFQLiUsKFTeaGpqKu5EbWZmhsTERLi4uMDQ0BBJSUkCpyMpunLlCrZs2aKy3MzMDM+ePRMgkfSwwBAAYO/evR+9LScNo7Lm7u6OCxcuwMnJCZ999hmmTZuGZ8+eYdOmTahdu7bQ8UiCjIyMkJycDHt7e6Xlly5dQtWqVQVKJS08hUQAoPjr9p9wDAwJISoqCq9evULr1q3x5MkT+Pj4IDw8HE5OTli3bh3q1q0rdESSmAkTJuD8+fPYsWMHatSogYsXL+Lx48fw8fGBj48Ppk+fLnTECo8FhoiI6BPl5ORg5MiRCAkJQX5+PjQ0NJCfn4/+/fsjJCQE6urqQkes8Fhg6IPevHkDHR0doWMQEZVLSUlJuHLlCjIyMuDu7g4nJyehI0kGCwypyM/Px7x587By5Uo8fvwYt27dgoODA6ZOnQo7Ozv4+fkJHZGIiCTu4wY+kKTMnTsXISEhCAoKgpaWlmJ57dq1sWbNGgGTERGVD7169cKPP/6osjwoKAi9e/cWIJH0sMCQio0bN+LXX3/FgAEDlM7j1q1bl/NtEBEBOH36tOJGt+/q2LEjTp8+LUAi6WGBIRUPHz6Eo6OjyvKCggLk5uYKkIhIVWpqqtARSMIyMjKUjlAX0dTURHp6ugCJpIcFhlS4urrir7/+Ulm+c+dOuLu7C5CIpO7HH3/Etm3bFI/79OkDExMTVK1aFbGxsQImI6lyc3NT+kwW2bp1K1xdXQVIJD2cyI5UTJs2DYMGDcLDhw9RUFCAXbt2IS4uDhs3bsT+/fuFjkcStHLlSmzevBkAEBoaitDQUBw6dAjbt2/HxIkTcfToUYETktRMnToVPXv2RHx8PNq0aQMACAsLw++//44dO3YInE4aeBUSFeuvv/7CrFmzEBsbi4yMDHh4eGDatGlo37690NFIgnR1dXHr1i1YW1tjzJgxePPmDVatWoVbt26hcePGePnypdARSYIOHDiAefPmISYmBrq6uqhTpw6mT5+Ozz77TOhoksACQ0TlnpWVFXbu3ImmTZvC2dkZc+bMQe/evREXF4eGDRtyzAGRBPEUEqm4cOECCgoK0LhxY6Xl58+fh7q6Oho0aCBQMpKqnj17on///nBycsLz58/RsWNHAG/vO1PcgHOi0paUlASZTIZq1aoBACIjI7Flyxa4urpi2LBhAqeTBg7iJRUjR44s9g6/Dx8+xMiRIwVIRFK3aNEiBAQEwNXVFaGhodDX1wcAJCcn45tvvhE4HUlR//79ceLECQBASkoKvLy8EBkZiR9++AGzZs0SOJ008BQSqdDX18fly5fh4OCgtDwhIQF16tTBq1evBEpGRFQ+GBsb49y5c3B2dsaSJUuwbds2nD17FkePHsXw4cNx9+5doSNWeDyFRCq0tbXx+PFjlQKTnJwMDQ1+ZKhs7N27Fx07doSmpib27t37wW0///zzMkpF9FZubi60tbUBAMeOHVN8BmvWrInk5GQho0kGj8CQin79+iE5ORl//vknDA0NAbydNKx79+4wMzPD9u3bBU5IUqCmpoaUlBSYmZlBTe39Z7tlMhny8/PLMBkR0LhxY7Ru3RqdO3dG+/btce7cOdStWxfnzp3DF198gQcPHggdscJjgSEVDx8+RMuWLfH8+XPFxHUxMTEwNzdHaGgorK2tBU5IRCSskydPokePHkhPT8egQYOwbt06AMD333+PmzdvYteuXQInrPhYYKhYr1+/xubNmxEbG6uY36Bfv37Q1NQUOhoRUbmQn5+P9PR0GBsbK5bdu3cPenp6MDMzEzCZNLDAEFG5tGTJko/edvTo0aWYhOj9nj59iri4OACAs7MzTE1NBU4kHSwwpGLDhg2oUqUKOnfuDACYNGkSfv31V7i6uuL333+Hra2twAlJCuzt7T9qO5lMxis+qMy9fv0ao0aNwsaNG1FQUAAAUFdXh4+PD3755Rfo6ekJnLDiY4EhFc7OzlixYgXatGmDiIgItG3bFsHBwdi/fz80NDR4bpeIJO/rr7/GsWPHsHTpUjRr1gwAcObMGYwePRrt2rXDihUrBE5Y8bHAkAo9PT3cvHkTNjY2CAwMRHJyMjZu3Ihr166hVatWePr0qdARSaJycnKQkJCA6tWr85J+ElSVKlWwc+dOtGrVSmn5iRMn0KdPH/6cLAOciZdU6Ovr4/nz5wCAo0ePol27dgAAHR0dZGVlCRmNJCozMxN+fn7Q09NDrVq1kJiYCAAYNWoUFixYIHA6kqLMzEyYm5urLDczM0NmZqYAiaSHBYZUtGvXDv7+/vD398etW7fQqVMnAMC1a9dgZ2cnbDiSpMmTJyM2NhYnT56Ejo6OYrmXlxe2bdsmYDKSKk9PT0yfPh1v3rxRLMvKysLMmTPh6ekpYDLp4DFYUrFs2TJMmTIFSUlJ+OOPP2BiYgIAiI6ORr9+/QROR1K0Z88ebNu2DU2aNIFMJlMsr1WrFuLj4wVMRlK1ePFieHt7o1q1aqhbty4AIDY2Fjo6Ojhy5IjA6aSBY2CIqNzT09PD1atX4eDgAAMDA8TGxsLBwQGxsbFo2bIl0tLShI5IEpSZmYnNmzfj5s2bAAAXFxcMGDAAurq6AieTBh6BoWKlpqZi7dq1uHHjBoC3f+n6+voqbi1AVJYaNGiAAwcOYNSoUQCgOAqzZs0aHq4nwejp6WHo0KFCx5AsHoEhFVFRUfD29oauri4aNWoEALhw4QKysrJw9OhReHh4CJyQpObMmTPo2LEjvvrqK4SEhODrr7/G9evXER4ejlOnTqF+/fpCRySJed8NRmUyGXR0dODo6PjRcxnRv8MCQypatGgBR0dHrF69WnGpal5eHvz9/XH37l2cPn1a4IQkRfHx8ViwYAFiY2ORkZEBDw8PBAYGws3NTehoJEFqamqQyWT4+6/QomUymQzNmzfHnj17lG41QCWHBYZU6Orq4tKlS6hZs6bS8uvXr6NBgwa8RJCIJC8sLAw//PAD5s6dqzhSHRkZialTp2LKlCkwNDTE119/jcaNG2Pt2rUCp62YOAaGVMjlciQmJqoUmKSkJBgYGAiUiqTs4MGDUFdXh7e3t9LyI0eOoKCgAB07dhQoGUnVmDFj8Ouvv6Jp06aKZW3btoWOjg6GDRuGa9euITg4GL6+vgKmrNg4Dwyp+PLLL+Hn54dt27YhKSkJSUlJ2Lp1K/z9/XkZNQniu+++Q35+vsrywsJCfPfddwIkIqmLj4+HXC5XWS6XyxX35nJycsKzZ8/KOppk8AgMqfjpp58gk8ng4+ODvLw8AICmpiZGjBjBWU9JELdv34arq6vK8po1a+LOnTsCJCKpq1+/PiZOnIiNGzcq7kD99OlTTJo0CQ0bNgTw9nNrbW0tZMwKjQWGVGhpaWHx4sWYP3++YpKw6tWr8+6qJBhDQ0PcvXtXZSboO3fuoFKlSsKEIklbu3YtunXrhmrVqilKSlJSEhwcHPDnn38CADIyMjBlyhQhY1ZoHMRLROXe119/jYiICOzevRvVq1cH8La89OrVCw0bNsSaNWsETkhSVFBQgKNHj+LWrVsAAGdnZ7Rr1w5qahydURZYYEhFjx49lKZrL/Lu/Ab9+/eHs7OzAOlIitLS0tChQwdERUWhWrVqAIAHDx6gRYsW2LVrF4yMjIQNSJJz9+5dODg4CB1D0lhgSMXgwYOxZ88eGBkZKSYIu3jxIlJTU9G+fXvExsbi3r17CAsLQ7NmzQROS1JRWFiI0NBQxMbGQldXF3Xq1EHLli2FjkUSpaamhs8++wx+fn744osvlG4ySmWDBYZUfPfdd0hPT8fSpUsVh0ILCgowZswYGBgYYO7cuRg+fDiuXbuGM2fOCJyWpCo1NZVHXkgwMTExWL9+PX7//Xfk5OTgyy+/hK+vLxo3bix0NMlggSEVpqamOHv2LGrUqKG0/NatW2jatCmePXuGK1euoEWLFkhNTRUmJEnKjz/+CDs7O3z55ZcAgD59+uCPP/6AhYUFDh48qLgbMFFZy8vLw969exESEoLDhw+jRo0a8PX1xcCBAxVXJ1Hp4EgjUpGXl6e4u+q7bt68qZiLQ0dHp9hxMkSlYeXKlYorPUJDQxEaGopDhw6hY8eOmDhxosDpSMo0NDTQs2dP7NixAz/++CPu3LmDCRMmwNraGj4+PkhOThY6YoXFy6hJxcCBA+Hn54fvv/9eMZ/BhQsXMG/ePPj4+AAATp06hVq1agkZkyQkJSVFUWD279+PPn36oH379rCzs+MhexJUVFQU1q1bh61bt6JSpUqYMGEC/Pz88ODBA8ycORPdunVDZGSk0DErJBYYUrFo0SKYm5sjKCgIjx8/BgCYm5tj3LhxCAwMBAC0b98eHTp0EDImSYixsTGSkpJgbW2Nw4cPY86cOQDeDuwtboZeotL2888/Y/369YiLi0OnTp2wceNGdOrUSTFu0N7eHiEhISpzF1HJ4RgY+qD09HQAKHbKbKKyEhAQgP3798PJyQmXLl3CvXv3oK+vj61btyIoKAgXL14UOiJJjJOTE3x9fTF48GBYWloWu01OTg5+//13DBo0qIzTSQMLDKmYPn06fH19YWtrK3QUIgBAbm4uFi9ejKSkJAwePBju7u4A3h4tNDAwgL+/v8AJiaisscCQinr16uHq1auKOQ569eoFbW1toWMREQnu9evXmDBhAvbu3YucnBy0bdsWv/zyC684EgALDBXr0qVLijkO8vLy0LdvX/j6+ioG9RKVtfj4eAQHB+PGjRsAAFdXV4wdO5azoVKZGj9+PH799VcMGDAAOjo6+P3339GsWTPs3r1b6GiSwwJDH5Sbm4t9+/Zh/fr1OHLkCGrWrAk/Pz8MHjwYhoaGQscjiThy5Ag+//xz1KtXTzH789mzZxEbG4t9+/ahXbt2AickqbC3t0dQUBB69+4NAIiOjkaTJk2QlZUFDQ1eF1OWWGDog3JycrB7926sW7cOx48fR9OmTfHo0SM8fvwYq1evVkwsRlSa3N3d4e3tjQULFigt/+6773D06FEO4qUyo6mpifv378PKykqxTE9PDzdv3oSNjY2AyaSHE9lRsaKjoxEQEABLS0uMGzcO7u7uuHHjBk6dOoXbt29j7ty5GD16tNAxSSJu3LgBPz8/leW+vr64fv26AIlIqgoKCqCpqam0TENDg5fzC4DHu0iFm5sbbt68ifbt22Pt2rXo2rUr1NXVlbbp168fxowZI1BCkhpTU1PExMTAyclJaXlMTAzMzMwESkVSVFhYiLZt2yqdLsrMzETXrl2hpaWlWMajgqWPBYZU9OnTB76+vqhatep7t6lSpQoKCgrKMBVJ2dChQzFs2DDcvXsXTZs2BfB2DMyPP/6I8ePHC5yOpGT69Okqy7p16yZAEuIYGFKSnp6O8+fPIycnB40aNeKlgVQuFBYWIjg4GAsXLsSjR48AAFZWVpg4cSJGjx7N+3IRSRALDCnExMSgU6dOePz4MQoLC2FgYIDt27fD29tb6GhECq9evQIAGBgYCJyEiITEQbykEBgYCHt7e5w5cwbR0dFo27YtAgIChI5FpMTAwIDlhQTRoUMHnDt37h+3e/XqFX788UcsW7asDFJJF4/AkEKVKlVw9OhReHh4AABSU1NRuXJlpKam8l5IJCh3d/diTxPJZDLo6OjA0dERgwcPRuvWrQVIR1Kxdu1aTJs2DYaGhujatSsaNGgAKysr6Ojo4OXLl7h+/TrOnDmDgwcPonPnzvjf//7HS6tLEQsMKaipqSElJUXpqg4DAwNcvnwZ9vb2AiYjqZs8eTJWrFgBNzc3NGrUCABw4cIFXL58GYMHD8b169cRFhaGXbt2cUAllars7Gzs2LED27Ztw5kzZ5CWlgbgbZl2dXWFt7c3/Pz84OLiInDSio8FhhTU1NRw/PhxVK5cWbGsadOm2L59O6pVq6ZYVqdOHSHikYQNHToUNjY2mDp1qtLyOXPm4P79+1i9ejWmT5+OAwcOICoqSqCUJEVpaWnIysqCiYmJyvwwVLpYYEhBTU0NMpkMxX0kipbLZDJO2ERlztDQENHR0XB0dFRafufOHdSvXx9paWm4efMmGjZsqBjkS0QVG+eBIYWEhAShIxAVS0dHB+Hh4SoFJjw8HDo6OgDezpBa9G8iqvhYYEjB1tZW6AhExRo1ahSGDx+O6OhoxR3RL1y4gDVr1uD7778H8PaGj/Xq1RMwJRGVJZ5CIgBAYmLiJ42Wf/jw4Qdn6iUqaZs3b8bSpUsRFxcHAHB2dsaoUaPQv39/AEBWVpbiqiQiqvhYYAgAYG5uju7du8Pf31/xF+7fpaWlYfv27Vi8eDGGDRvGmzkSEZFgeAqJAADXr1/H3Llz0a5dO+jo6KB+/foq8xtcu3YNHh4eCAoKQqdOnYSOTBIyaNAg+Pn5oWXLlkJHIVKSk5ODJ0+eqNwbjvO/lD4egSElWVlZOHDgAM6cOYP79+8jKysLVapUgbu7O7y9vVG7dm2hI5IEde/eHQcPHoStrS2GDBmCQYMG8RQmCer27dvw9fVFeHi40nJerVl2WGCISBSePn2KTZs2YcOGDbh+/Tq8vLzg5+eHbt26cf4NKnPNmjWDhoYGvvvuO1haWqrMFF23bl2BkkkHCwwRic7Fixexfv16rFmzBvr6+vjqq6/wzTffwMnJSehoJBGVKlVCdHQ0atasKXQUyeLNHIlIVJKTkxEaGorQ0FCoq6ujU6dOuHLlClxdXbFo0SKh45FEuLq64tmzZ0LHkDQegSGici83Nxd79+7F+vXrcfToUdSpUwf+/v7o37+/4kaju3fvhq+vL16+fClwWpKC48ePY8qUKZg3bx7c3NxUTmPyBriljwWGiMq9KlWqoKCgAP369cPQoUOLnbAuNTUV7u7unFGayoSa2tsTGH8f+8JBvGWHBYaIyr1Nmzahd+/enKSOyo1Tp059cP1nn31WRkmkiwWGinX79m2cOHGi2PkNpk2bJlAqkqJ79+4hNDQUubm5+Oyzz1CrVi2hIxFROcACQypWr16NESNGoEqVKrCwsFA6RCqTyXDx4kUB05GUnDhxAl26dEFWVhYAQENDA+vWrcNXX30lcDKSosuXL6N27dpQU1PD5cuXP7htnTp1yiiVdLHAkApbW1t88803CAwMFDoKSVzz5s1RpUoVrFixAjo6OpgyZQp2796NR48eCR2NJEhNTQ0pKSkwMzODmpoaZDIZivsVyjEwZYMFhlTI5XLExMTAwcFB6CgkcUZGRggPD4erqysAIDMzE3K5HI8fP4aJiYnA6Uhq7t+/DxsbG8hkMty/f/+D29ra2pZRKuligSEVfn5+aNiwIYYPHy50FJK4d//iLWJgYIDY2FgWbCKJ480cSYWjoyOmTp2Kc+fOFTu/Ae9CTWXpyJEjMDQ0VDwuKChAWFgYrl69qlj2+eefCxGNJGzjxo0fXO/j41NGSaSLR2BIhb29/XvXyWQy3L17twzTkJQVzbXxIRxvQEIwNjZWepybm4vMzExoaWlBT08PL168ECiZdPAIDKngRGBUXvz9En6i8qK4GZ9v376NESNGYOLEiQIkkh4egSEiIiohUVFR+Oqrr3Dz5k2ho1R4PAJDAIDx48dj9uzZqFSpEsaPH//BbX/++ecySkVSdu7cOTRp0uSjts3MzERCQgInuSPBaWho8DL/MsICQwCAS5cuITc3V/Hv9/n7fT+ISsvAgQPh4OAAf39/dOrUCZUqVVLZ5vr16/jtt9+wfv16/PjjjywwVGb27t2r9LiwsBDJyclYunQpmjVrJlAqaeEpJCIql3Jzc7FixQosW7YMd+/eRY0aNWBlZQUdHR28fPkSN2/eREZGBnr06IHvv/8ebm5uQkcmCfn7AHOZTAZTU1O0adMGCxcuhKWlpUDJpIMFhojKvaioKJw5cwb3799HVlYWqlSpAnd3d7Ru3RqVK1cWOh4RCYAFhlS0bt36g6eKjh8/XoZpiIiIVHEMDKmoV6+e0uPc3FzExMTg6tWrGDRokDChiIjKkfdd7CCTyaCjowNHR0d069aNRwhLEY/A0EebMWMGMjIy8NNPPwkdhYhIUK1bt8bFixeRn58PZ2dnAMCtW7egrq6OmjVrIi4uDjKZDGfOnFHcy4tKFgsMfbQ7d+6gUaNGnGGSiCQvODgYf/31F9avXw+5XA4ASEtLg7+/P5o3b46hQ4eif//+yMrKwpEjRwROWzGxwNBH27RpEwIDAznHARFJXtWqVREaGqpydOXatWto3749Hj58iIsXL6J9+/Z49uyZQCkrNo6BIRU9e/ZUelw0v0FUVBSmTp0qUCoiovIjLS0NT548USkwT58+RXp6OgDAyMgIOTk5QsSTBBYYUvHunX+Bt/MdODs7Y9asWWjfvr1AqUjqwsLCEBYWhidPnqjcI2ndunUCpSKp6tatG3x9fbFw4UI0bNgQAHDhwgVMmDAB3bt3BwBERkaiRo0aAqas2HgKiYjKvZkzZ2LWrFlo0KABLC0tVS7z3717t0DJSKoyMjIwbtw4bNy4EXl5eQDe3kZg0KBBWLRoESpVqoSYmBgAqld2UslggSGics/S0hJBQUEYOHCg0FGIlGRkZODu3bsAAAcHB+jr6wucSDpYYEiFsbFxsRPZvTu/weDBgzFkyBAB0pEUmZiYIDIyEtWrVxc6ChGVExwDQyqmTZuGuXPnomPHjmjUqBGAt+dyDx8+jJEjRyIhIQEjRoxAXl4ehg4dKnBakgJ/f39s2bKFg8ip3Hj9+jUWLFjw3nFZRUdlqPSwwJCKM2fOYM6cORg+fLjS8lWrVuHo0aP4448/UKdOHSxZsoQFhsrEmzdv8Ouvv+LYsWOoU6cONDU1ldb//PPPAiUjqfL398epU6cwcODAYsdlUenjKSRSoa+vj5iYGDg6Oiotv3PnDurVq4eMjAzEx8ejTp06eP36tUApSUpat2793nUymYz356IyZ2RkhAMHDqBZs2ZCR5EsHoEhFZUrV8a+ffswbtw4peX79u1T3Nfj9evXMDAwECIeSdCJEyeEjkCkxNjYmPc5EhgLDKmYOnUqRowYgRMnTijGwFy4cAEHDx7EypUrAQChoaH47LPPhIxJRCSY2bNnY9q0adiwYQP09PSEjiNJPIVExTp79iyWLl2KuLg4AICzszNGjRqFpk2bCpyMpKJnz54ICQmBXC5XmR3673bt2lVGqYjecnd3R3x8PAoLC2FnZ6cyLuvixYsCJZMOHoGhYjVr1ozndklQhoaGioGRf58dmkhoRbPtknB4BIaKVVBQgDt37hR7eWDLli0FSkVERPQWj8CQinPnzqF///64f/8+/t5vZTIZ8vPzBUpGRFR+pKamYufOnYiPj8fEiRNRuXJlXLx4Eebm5qhatarQ8So8HoEhFfXq1UONGjUwc+bMYuc34OF8Kmv29vYfnGeDk4ZRWbt8+TK8vLxgaGiIe/fuIS4uDg4ODpgyZQoSExOxceNGoSNWeDwCQypu376NnTt3qswDQySUsWPHKj3Ozc3FpUuXcPjwYUycOFGYUCRp48ePx+DBgxEUFKQ0pUSnTp3Qv39/AZNJBwsMqWjcuDHu3LnDAkPlxpgxY4pdvmzZMkRFRZVxGqK3U0usWrVKZXnVqlWRkpIiQCLpYYEhFaNGjcK3336LlJQUuLm5qVweWKdOHYGSESnr2LEjJk+ejPXr1wsdhSRGW1sb6enpKstv3boFU1NTARJJD8fAkAo1NTWVZTKZDIWFhRzES+VKUFAQli9fjnv37gkdhSTG398fz58/x/bt21G5cmVcvnwZ6urq6N69O1q2bIng4GChI1Z4LDCk4v79+x9cb2trW0ZJiN5yd3dXGsRbWFiIlJQUPH36FMuXL8ewYcMETEdSlJaWhi+++AJRUVF49eoVrKyskJKSAk9PTxw8eBCVKlUSOmKFxwJDROXezJkzlR6rqanB1NQUrVq1Qs2aNQVKRQScOXMGly9fRkZGBjw8PODl5SV0JMlggaFibdq0CStXrkRCQgIiIiJga2uL4OBg2Nvbo1u3bkLHIyIiiVMd7ECSt2LFCowfPx6dOnVCamqqYsyLkZERz+uSINLT04v9evXqFXJycoSORxIVFhaGLl26oHr16qhevTq6dOmCY8eOCR1LMlhgSMUvv/yC1atX44cffoC6urpieYMGDXDlyhUBk5FUGRkZwdjYWOXLyMgIurq6sLW1xfTp01Vue0FUWpYvX44OHTrAwMAAY8aMwZgxYyCXy9GpUycsW7ZM6HiSwMuoSUVCQgLc3d1Vlmtra+P169cCJCKpCwkJwQ8//IDBgwejUaNGAIDIyEhs2LABU6ZMwdOnT/HTTz9BW1sb33//vcBpSQrmzZuHRYsWISAgQLFs9OjRaNasGebNm4eRI0cKmE4aWGBIhb29PWJiYlSuNjp8+DBcXFwESkVStmHDBixcuBB9+vRRLOvatSvc3NywatUqhIWFwcbGBnPnzmWBoTKRmpqKDh06qCxv3749AgMDBUgkPTyFRCrGjx+PkSNHYtu2bSgsLERkZCTmzp2LyZMnY9KkSULHIwkKDw8v9qigu7s7IiIiAADNmzdHYmJiWUcjifr888+xe/duleV//vknunTpIkAi6eERGFLh7+8PXV1dTJkyBZmZmejfvz+srKywePFi9O3bV+h4JEHW1tZYu3YtFixYoLR87dq1sLa2BgA8f/4cxsbGQsQjCXJ1dcXcuXNx8uRJeHp6AgDOnTuHs2fP4ttvv8WSJUsU244ePVqomBUaL6MmFdnZ2cjLy0OlSpWQmZmJjIwMmJmZCR2LJGzv3r3o3bs3atasiYYNGwIAoqKicPPmTezcuRNdunTBihUrcPv2bfz8888CpyUpsLe3/6jtZDIZ75ZeSlhgSOHp06fw8fHBsWPHUFBQgIYNG2Lz5s2oXr260NGIkJCQgFWrVuHWrVsAAGdnZ3z99dews7MTNhgRCYIFhhR8fX1x6NAhjB49Gjo6Oli1ahUsLS1x4sQJoaMREREpYYEhBWtra6xZswbe3t4AgNu3b8PFxQWvX7+Gtra2wOlI6lJTUxEZGYknT56ozPfi4+MjUCoiEgoLDCmoq6vj4cOHsLCwUCyrVKkSrl27xsP0JKh9+/ZhwIAByMjIgFwuV7qxo0wmw4sXLwRMR0RC4GXUpOTdmXeLHrPjktC+/fZb+Pr6IiMjA6mpqXj58qXii+WFSJp4BIYU1NTUYGhoqPTXbWpqKuRyOdTU/q/r8hcGlbVKlSrhypUrcHBwEDoKEZUTnAeGFNavXy90BKJieXt7IyoqigWGypXU1FSsXbsWN27cAADUqlULvr6+MDQ0FDiZNPAIDBGVe2vXrsWsWbMwZMgQuLm5QVNTU2n9559/LlAykqqoqCh4e3tDV1dXcX+uCxcuICsrC0ePHoWHh4fACSs+FhgiKvfePYX5dzKZDPn5+WWYhgho0aIFHB0dsXr1amhovD2ZkZeXB39/f9y9exenT58WOGHFxwJDRET0iXR1dXHp0iXUrFlTafn169fRoEEDZGZmCpRMOngVEhGJyps3b4SOQAS5XF7szUOTkpJgYGAgQCLpYYEhonIvPz8fs2fPRtWqVaGvr6+4t8zUqVOxdu1agdORFH355Zfw8/PDtm3bkJSUhKSkJGzduhX+/v7o16+f0PEkgQWG3isnJwdxcXHIy8sTOgpJ3Ny5cxESEoKgoCBoaWkplteuXRtr1qwRMBlJ1U8//YSePXvCx8cHdnZ2sLOzw+DBg/HFF1/gxx9/FDqeJHAMDKnIzMzEqFGjsGHDBgDArVu34ODggFGjRqFq1ar47rvvBE5IUuPo6IhVq1ahbdu2MDAwQGxsLBwcHHDz5k14enri5cuXQkckicrMzER8fDwAoHr16tDT0xM4kXTwCAypmDx5MmJjY3Hy5Eno6Ogolnt5eWHbtm0CJiOpevjwIRwdHVWWFxQUIDc3V4BERG/p6enB2NgYxsbGLC9ljAWGVOzZswdLly5F8+bNlWblrVWrluIvDaKy5Orqir/++ktl+c6dO+Hu7i5AIpK6goICzJo1C4aGhrC1tYWtrS2MjIwwe/ZslZuNUungTLyk4unTpzAzM1NZ/vr1a6VCQ1RWpk2bhkGDBuHhw4coKCjArl27EBcXh40bN2L//v1CxyMJ+uGHH7B27VosWLAAzZo1AwCcOXMGM2bMwJs3bzB37lyBE1Z8HANDKlq2bInevXtj1KhRMDAwwOXLl2Fvb49Ro0bh9u3bOHz4sNARSYL++usvzJo1C7GxscjIyICHhwemTZuG9u3bCx2NJMjKygorV65UmQX6zz//xDfffIOHDx8KlEw6eASGVMybNw8dO3bE9evXkZeXh8WLF+P69esIDw/HqVOnhI5HEtWiRQuEhoYKHYMIwNub2v59EjsAqFmzJm94W0Y4BoZUNG/eHDExMcjLy4ObmxuOHj0KMzMzREREoH79+kLHIwmLiorCpk2bsGnTJkRHRwsdhySsbt26WLp0qcrypUuXom7dugIkkh6eQiKicu/Bgwfo168fzp49CyMjIwBv7wTctGlTbN26FdWqVRM2IEnOqVOn0LlzZ9jY2MDT0xMAEBERgaSkJBw8eBAtWrQQOGHFxyMwBABIT09X+veHvojKmr+/P3Jzc3Hjxg28ePECL168wI0bN1BQUAB/f3+h45EEffbZZ7h16xZ69OiB1NRUpKamomfPnoiLi2N5KSM8AkMAAHV1dSQnJ8PMzAxqamrFXm1UWFjIO/+SIHR1dREeHq5yyXR0dDRatGjBG+dRmUtMTIS1tXWxPysTExNhY2MjQCpp4SBeAgAcP34clStXBgCcOHFC4DREyqytrYudsC4/Px9WVlYCJCKps7e3V/zR967nz5/D3t6ef+iVARYYAvD2cGhx/yYqD/73v/9h1KhRWLZsGRo0aADg7YDeMWPG4KeffhI4HUlR0RHpv8vIyFCawZxKD08hEQDg8uXLH71tnTp1SjEJkSpjY2NkZmYiLy8PGhpv/+4q+nelSpWUtuUlrFSaxo8fDwBYvHgxhg4dqnT7gPz8fJw/fx7q6uo4e/asUBElg0dgCABQr149yGQy/FOf5RgYEkJwcLDQEYgAAJcuXQLw9gjMlStXlO6OrqWlhbp162LChAlCxZMUHoEhAMD9+/c/eltbW9tSTEJEVP4NGTIEixcvhlwuFzqKZLHAEBERkehwHhgq1qZNm9CsWTNYWVkpjs4EBwfjzz//FDgZEZHwXr9+jalTp6Jp06ZwdHSEg4OD0heVPo6BIRUrVqzAtGnTMHbsWMydO1cx5sXIyAjBwcHo1q2bwAmJiITl7++PU6dOYeDAgbC0tCz2iiQqXTyFRCpcXV0xb948dO/eHQYGBoiNjYWDgwOuXr2KVq1a4dmzZ0JHJCISlJGREQ4cOIBmzZoJHUWyeAqJVCQkJKjMeAoA2traeP36tQCJiP5PUlISkpKShI5BEmdsbKyY/JOEwQJDKuzt7RETE6Oy/PDhw3BxcSn7QCR5eXl5mDp1KgwNDWFnZwc7OzsYGhpiypQpxc7QS1TaZs+ejWnTpvE2FgLiGBhSMX78eIwcORJv3rxBYWEhIiMj8fvvv2P+/PlYs2aN0PFIgkaNGoVdu3YhKChI6c6/M2bMwPPnz7FixQqBE5LULFy4EPHx8TA3N4ednR00NTWV1l+8eFGgZNLBMTBUrM2bN2PGjBmIj48HAFhZWWHmzJnw8/MTOBlJkaGhIbZu3YqOHTsqLT948CD69euHtLQ0gZKRVM2cOfOD66dPn15GSaSLBYY+KDMzExkZGSo3LCMqS2ZmZjh16pTKKcwbN26gZcuWePr0qUDJiEgoHANDH6Snp8fyQoILCAjA7NmzkZ2drViWnZ2NuXPnIiAgQMBkJGWpqalYs2YNJk+erLgH18WLF/Hw4UOBk0kDj8AQAMDd3f2j5zHguV0qaz169EBYWBi0tbVRt25dAEBsbCxycnLQtm1bpW137dolRESSmMuXL8PLywuGhoa4d+8e4uLi4ODggClTpiAxMREbN24UOmKFx0G8BADo3r274t9v3rzB8uXL4erqqhgwee7cOVy7dg3ffPONQAlJyoyMjNCrVy+lZdbW1gKlIXp7scPgwYMRFBQEAwMDxfJOnTqhf//+AiaTDh6BIRX+/v6wtLTE7NmzlZZPnz4dSUlJWLdunUDJiIjKB0NDQ1y8eBHVq1dXmvDz/v37cHZ2xps3b4SOWOFxDAyp2LFjB3x8fFSWf/XVV/jjjz8ESEREVL5oa2sjPT1dZfmtW7dgamoqQCLp4SkkUqGrq4uzZ8/CyclJafnZs2eho6MjUCqSup07d2L79u1ITExETk6O0jqOy6Ky9vnnn2PWrFnYvn07AEAmkyExMRGBgYEqpzupdPAIDKkYO3YsRowYgdGjR+O3337Db7/9hlGjRmHkyJEYN26c0PFIgpYsWYIhQ4bA3Nwcly5dQqNGjWBiYoK7d++qzA1DVBYWLlyomGIiKysLn332GRwdHWFgYIC5c+cKHU8SOAaGirV9+3YsXrwYN27cAAC4uLhgzJgx6NOnj8DJSIpq1qyJ6dOno1+/fkrjDaZNm4YXL15g6dKlQkckiTpz5gwuX76MjIwMeHh4wMvLS+hIksECQ5/k6tWrqF27ttAxSGL09PRw48YN2NrawszMDKGhoahbty5u376NJk2a4Pnz50JHJKIyxjEw9I9evXqF33//HWvWrEF0dDTy8/OFjkQSY2FhgRcvXsDW1hY2NjY4d+4c6tati4SEBPBvMCpLWVlZCAsLQ5cuXQAAkydPVppgUV1dHbNnz+Z4wTLAAkPvdfr0aaxZswa7du2ClZUVevbsiWXLlgkdiySoTZs22Lt3L9zd3TFkyBCMGzcOO3fuRFRUFHr27Cl0PJKQDRs24MCBA4oCs3TpUtSqVQu6uroAgJs3b8LKyorjBcsATyGRkpSUFISEhGDt2rVIT09Hnz59sHLlSsTGxsLV1VXoeCRRBQUFKCgogIbG27+5tm7divDwcDg5OeHrr7+GlpaWwAlJKlq0aIFJkyaha9euAKA0JgsAfvvtNyxbtgwRERFCxpQEFhhS6Nq1K06fPo3OnTtjwIAB6NChA9TV1aGpqckCQ4LJy8vDvHnz4Ovri2rVqgkdhyTO0tISERERsLOzAwCYmpriwoULise3bt1Cw4YNeYf0MsDLqEnh0KFD8PPzw8yZM9G5c2eoq6sLHYkIGhoaCAoKQl5entBRiJCamqo05uXp06eK8gK8PVr47noqPSwwpHDmzBm8evUK9evXR+PGjbF06VI8e/ZM6FhEaNu2LU6dOiV0DCJUq1YNV69efe/6y5cv80hhGeEpJFLx+vVrbNu2DevWrUNkZCTy8/Px888/w9fXV+mmZURlZeXKlZg5cyYGDBiA+vXro1KlSkrrP//8c4GSkdSMGTMGx44dQ3R0tMqVRllZWWjQoAG8vLywePFigRJKBwsMfVBcXBzWrl2LTZs2ITU1Fe3atcPevXuFjkUSo6b2/oPFMpmMl/ZTmXn8+DHq1asHLS0tBAQEoEaNGgDe/qxcunQp8vLycOnSJZibmwuctOJjgaGPkp+fj3379mHdunUsMEQkaQkJCRgxYgRCQ0MV8xDJZDK0a9cOy5cvV1yRRKWLBYaIyr2NGzfiyy+/hLa2ttLynJwcbN26tdi7pxOVthcvXuDOnTsAAEdHR1SuXFngRNLCAkNE5Z66ujqSk5NhZmamtPz58+cwMzPjKSQiCeJVSERU7hUWFkImk6ksf/DgAQwNDQVIRERC460EiKjccnd3h0wmg0wmQ9u2bRUz8QJvx2UlJCSgQ4cOAiYkIqGwwBBRudW9e3cAQExMDLy9vaGvr69Yp6WlBTs7O/Tq1UugdEQkJI6BIaJyb8OGDejbt6/KIF4iki4WGCIq95KSkiCTyRQznEZGRmLLli1wdXXFsGHDBE5HRELgIF4iKvf69++PEydOAHh7x3QvLy9ERkbihx9+wKxZswROR0RCYIEhonLv6tWraNSoEQBg+/btcHNzQ3h4ODZv3oyQkBBhwxGRIFhgiKjcy83NVYx/OXbsmOLeRzVr1kRycrKQ0YhIICwwRFTu1apVCytXrsRff/2F0NBQxaXTjx49gomJicDpiEgILDBEVO79+OOPWLVqFVq1aoV+/fqhbt26AIC9e/cqTi0RkbTwKiQiEoX8/Hykp6fD2NhYsezevXvQ09NTucUAEVV8LDBEREQkOjyFRETl3uPHjzFw4EBYWVlBQ0MD6urqSl9EJD28lQARlXuDBw9GYmIipk6dCktLy2Jv7EhE0sJTSERU7hkYGOCvv/5CvXr1hI5CROUETyERUblnbW0N/q1FRO9igSGici84OBjfffcd7t27J3QUIioneAqJiMo9Y2NjZGZmIi8vD3p6etDU1FRa/+LFC4GSEZFQOIiXiMq94OBgoSMQUTnDIzBEREQkOjwCQ0TlUnp6OuRyueLfH1K0HRFJB4/AEFG5pK6ujuTkZJiZmUFNTa3YuV8KCwshk8mQn58vQEIiEhKPwBBRuXT8+HFUrlwZAHDixAmB0xBRecMjMERERCQ6PAJDRKKQmpqKyMhIPHnyBAUFBUrrfHx8BEpFRELhERgiKvf27duHAQMGICMjA3K5XGk8jEwm4zwwRBLEAkNE5V6NGjXQqVMnzJs3D3p6ekLHIaJygAWGiMq9SpUq4cqVK3BwcBA6ChGVE7wXEhGVe97e3oiKihI6BhGVIxzES0Tl0t69exX/7ty5MyZOnIjr16/Dzc1N5V5In3/+eVnHIyKB8RQSEZVLamofd4CYE9kRSRMLDBEREYkOx8AQERGR6LDAEFG5dfz4cbi6uhZ7M8e0tDTUqlULp0+fFiAZEQmNBYaIyq3g4GAMHTq02LtNGxoa4uuvv8aiRYsESEZEQmOBIaJyKzY2Fh06dHjv+vbt2yM6OroMExFRecECQ0Tl1uPHj1UumX6XhoYGnj59WoaJiKi8YIEhonKratWquHr16nvXX758GZaWlmWYiIjKCxYYIiq3OnXqhKlTp+LNmzcq67KysjB9+nR06dJFgGREJDTOA0NE5dbjx4/h4eEBdXV1BAQEwNnZGQBw8+ZNLFu2DPn5+bh48SLMzc0FTkpEZY0FhojKtfv372PEiBE4cuQIin5cyWQyeHt7Y9myZbC3txc4IREJgQWGiETh5cuXuHPnDgoLC+Hk5ARjY2OhIxGRgFhgiIiISHQ4iJeIiIhEhwWGiIiIRIcFhoiIiESHBYaIiIhEhwWGiCqcwYMHo3v37kLHIKJSxKuQiKjCSUtLQ2FhIYyMjISOQkSlhAWGiIiIRIenkIioVOzcuRNubm7Q1dWFiYkJvLy88Pr1a8XpnZkzZ8LU1BRyuRzDhw9HTk6O4rkFBQWYP38+7O3toauri7p162Lnzp1K+7927Rq6dOkCuVwOAwMDtGjRAvHx8QBUTyH90/5evnyJAQMGwNTUFLq6unBycsL69etL9xtERP+JhtABiKjiSU5ORr9+/RAUFIQePXrg1atX+OuvvxS3AggLC4OOjg5OnjyJe/fuYciQITAxMcHcuXMBAPPnz8dvv/2GlStXwsnJCadPn8ZXX30FU1NTfPbZZ3j48CFatmyJVq1a4fjx45DL5Th79izy8vKKzfNP+5s6dSquX7+OQ4cOoUqVKrhz5w6ysrLK7PtFRJ+Op5CIqMRdvHgR9evXx71792Bra6u0bvDgwdi3bx+SkpKgp6cHAFi5ciUmTpyItLQ05ObmonLlyjh27Bg8PT0Vz/P390dmZia2bNmC77//Hlu3bkVcXBw0NTVVXn/w4MFITU3Fnj17kJ2d/Y/7+/zzz1GlShWsW7eulL4jRFTSeASGiEpc3bp10bZtW7i5ucHb2xvt27fHF198obh/Ud26dRXlBQA8PT2RkZGBpKQkZGRkIDMzE+3atVPaZ05ODtzd3QEAMTExaNGiRbHl5e/u3Lnzj/sbMWIEevXqhYsXL6J9+/bo3r07mjZt+p++B0RUulhgiKjEqaurIzQ0FOHh4Th69Ch++eUX/PDDDzh//vw/PjcjIwMAcODAAVStWlVpnba2NgBAV1f3o7N8zP46duyI+/fv4+DBgwgNDUXbtm0xcuRI/PTTTx/9OkRUtlhgiKhUyGQyNGvWDM2aNcO0adNga2uL3bt3AwBiY2ORlZWlKCLnzp2Dvr4+rK2tUblyZWhrayMxMRGfffZZsfuuU6cONmzYgNzc3H88CuPq6vqP+wMAU1NTDBo0CIMGDUKLFi0wceJEFhiicowFhohK3Pnz5xEWFob27dvDzMwM58+fx9OnT+Hi4oLLly8jJycHfn5+mDJlCu7du4fp06cjICAAampqMDAwwIQJ/6+d+1VRLQgAMP6BIFiOgphNImIRBYv/gsk30OQLWIQ1HIMIBpMg+BLazQZB8CnEV9B2itx22YULd1l272Xg+/UZhkkfM8y8MZ1Oeb1etNttHo8Hl8uFKIoYj8dMJhN2ux3D4ZA4jslms1yvV5rNJuVy+cNaPjPfYrGg0WhQrVZJkoTj8UilUvlPuyfpMwwYSd8uiiLO5zPb7Zbn80mxWGSz2TAYDDgcDvT7fUqlEt1ulyRJGI1GLJfL3+NXqxWFQoH1es3tdiOXy1Gv15nP5wDk83lOpxOz2Yxer0cqlaJWq9Fqtf64nr/Nl06nieOY+/1OJpOh0+mw3+9/fJ8kfZ2vkCT9U+9fCEnSV/mRnSRJCo4BI0mSguMVkiRJCo4nMJIkKTgGjCRJCo4BI0mSgmPASJKk4BgwkiQpOAaMJEkKjgEjSZKCY8BIkqTg/ALCmM69OcjNSgAAAABJRU5ErkJggg==", + "image/png": "", "text/plain": [ "
" ] @@ -709,13 +859,13 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job 61a0ba75-d130-445a-b412-e0b4bc0af31e is DONE. 12.9 kB processed. Open Job" + "Query job 58f9cb42-382b-428b-9ed5-95c064e3ab29 is DONE. 12.9 kB processed. Open Job" ], "text/plain": [ "" @@ -727,7 +877,7 @@ { "data": { "text/html": [ - "Query job fc75ea4f-4296-4c93-9f58-55acb67946c9 is DONE. 12.9 kB processed. Open Job" + "Query job 18128891-8a44-42bb-9f30-a1e978203f41 is DONE. 12.9 kB processed. Open Job" ], "text/plain": [ "" @@ -739,7 +889,7 @@ { "data": { "text/html": [ - "Query job d4a76bed-2722-488d-8a8a-a4301bb589fd is DONE. 12.9 kB processed. Open Job" + "Query job 1ddc23a6-4b05-448b-bb21-d778634ea5c5 is DONE. 12.9 kB processed. Open Job" ], "text/plain": [ "" @@ -751,7 +901,7 @@ { "data": { "text/html": [ - "Query job 9c8f68a5-c141-4f3d-b1e8-ca6b5964ec96 is DONE. 12.9 kB processed. Open Job" + "Query job b4617c85-24c9-4b63-8926-b5db63e7c319 is DONE. 12.9 kB processed. Open Job" ], "text/plain": [ "" @@ -763,7 +913,7 @@ { "data": { "text/html": [ - "Query job ae0a5e44-e566-453f-bd97-3c5c8532e79f is DONE. 12.9 kB processed. Open Job" + "Query job 95df92b9-1b7d-4c6d-9f20-69f928e36850 is DONE. 12.9 kB processed. Open Job" ], "text/plain": [ "" @@ -774,7 +924,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -807,7 +957,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -822,7 +972,7 @@ { "data": { "text/html": [ - "Query job ff4c22ff-2735-46db-b52b-a33cf8fef27e is DONE. 28.9 kB processed. Open Job" + "Query job 6474b1c7-b6e5-4be7-93c6-cf8b0338dd58 is DONE. 34.5 kB processed. Open Job" ], "text/plain": [ "" @@ -843,7 +993,7 @@ { "data": { "text/html": [ - "Load job 49eb4aa0-7166-484e-90b1-63a675eafd00 is DONE. Open Job" + "Load job c93fdfb6-bbe1-446c-b7be-4922d67498ed is DONE. Open Job" ], "text/plain": [ "" @@ -1834,7 +1984,8 @@ "toc_visible": true }, "kernelspec": { - "display_name": "Python 3", + "display_name": "venv", + "language": "python", "name": "python3" }, "language_info": { @@ -1847,7 +1998,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/noxfile.py b/noxfile.py index 967ced87ab..863c7b26d3 100644 --- a/noxfile.py +++ b/noxfile.py @@ -589,20 +589,16 @@ def prerelease(session: nox.sessions.Session, tests_path, extra_pytest_options=( ) already_installed.add("pandas") - # Ibis has introduced breaking changes. Let's exclude ibis head - # from prerelease install list for now. We should enable the head back - # once bigframes supports the version at HEAD. - # session.install( - # "--upgrade", - # "-e", # Use -e so that py.typed file is included. - # "git+https://github.com/ibis-project/ibis.git#egg=ibis-framework", - # ) + # Try to avoid a cap on our SQLGlot so that bigframes + # can be integrated with SQLMesh. See: + # https://github.com/googleapis/python-bigquery-dataframes/issues/942 + # If SQLGlot introduces something that breaks us, lets file an issue + # upstream and/or make sure we fix bigframes to work with it. session.install( "--upgrade", - "--pre", - "ibis-framework>=9.0.0,<=9.2.0", + "git+https://github.com/tobymao/sqlglot.git#egg=sqlglot", ) - already_installed.add("ibis-framework") + already_installed.add("sqlglot") # Workaround https://github.com/googleapis/python-db-dtypes-pandas/issues/178 session.install("--no-deps", "db-dtypes") diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py index b66860418b..60b8d13149 100644 --- a/samples/snippets/create_single_timeseries_forecasting_model_test.py +++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py @@ -91,8 +91,22 @@ def test_create_single_timeseries() -> None: # 3 0 1 1 False -2470.632377 4945.264753 44319.379307 ['WEEKLY'] False False True # 4 2 1 1 True -2463.671247 4937.342493 42633.299513 ['WEEKLY'] False False True # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_evaluate] + + # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial_forecast] + prediction = model.predict(horizon=30, confidence_level=0.8) + + print(prediction.peek()) + # Expected output: + # forecast_timestamp forecast_value standard_error confidence_level prediction_interval_lower_bound prediction_interval_upper_bound confidence_interval_lower_bound confidence_interval_upper_bound + # 11 2017-08-13 00:00:00+00:00 1845.439732 328.060405 0.8 1424.772257 2266.107208 1424.772257 2266.107208 + # 29 2017-08-31 00:00:00+00:00 2615.993932 431.286628 0.8 2062.960849 3169.027015 2062.960849 3169.027015 + # 7 2017-08-09 00:00:00+00:00 2639.285993 300.301186 0.8 2254.213792 3024.358193 2254.213792 3024.358193 + # 25 2017-08-27 00:00:00+00:00 1853.735689 410.596551 0.8 1327.233216 2380.238162 1327.233216 2380.238162 + # 1 2017-08-03 00:00:00+00:00 2621.33159 241.093355 0.8 2312.180802 2930.482379 2312.180802 2930.482379 + # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_forecast] assert coef is not None assert summary is not None assert model is not None assert parsed_date is not None + assert prediction is not None assert total_visits is not None diff --git a/scripts/manage_cloud_functions.py b/scripts/manage_cloud_functions.py index 145e178f4d..ccf588bde7 100644 --- a/scripts/manage_cloud_functions.py +++ b/scripts/manage_cloud_functions.py @@ -63,7 +63,7 @@ def get_bigframes_functions(project, region): - parent = f"projects/{args.project_id}/locations/{region}" + parent = f"projects/{project}/locations/{region}" functions = GCF_CLIENT.list_functions( functions_v2.ListFunctionsRequest(parent=parent) ) @@ -72,7 +72,7 @@ def get_bigframes_functions(project, region): function for function in functions if function.name.startswith( - f"projects/{args.project_id}/locations/{region}/functions/bigframes-" + f"projects/{project}/locations/{region}/functions/bigframes-" ) ] diff --git a/setup.py b/setup.py index e3fda9d36f..74a0d5475c 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,7 @@ "python-dateutil>=2.8.2,<3", "pytz>=2022.7", "toolz>=0.11,<2", - "typing-extensions>=4.3.0,<5", + "typing-extensions>=4.5.0,<5", "rich>=12.4.4,<14", ] extras = { diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index c0ffcfaa1c..015153cb01 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -26,3 +26,11 @@ tabulate==0.9 ipywidgets==7.7.1 humanize==4.6.0 matplotlib==3.7.1 +# For vendored ibis-framework. +atpublic==2.3 +parsy==2.0 +python-dateutil==2.8.2 +pytz==2022.7 +toolz==0.11 +typing-extensions==4.5.0 +rich==12.4.4 diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py index 4368a6511d..d0eb6c1904 100644 --- a/tests/system/large/test_remote_function.py +++ b/tests/system/large/test_remote_function.py @@ -24,6 +24,7 @@ import google.api_core.exceptions from google.cloud import bigquery, functions_v2, storage import pandas +import pyarrow import pytest import test_utils.prefixer @@ -261,6 +262,43 @@ def func(x, y): ) +@pytest.mark.flaky(retries=2, delay=120) +def test_remote_function_binop_array_output( + session, scalars_dfs, dataset_id, bq_cf_connection +): + try: + + def func(x, y): + return [len(x), abs(y % 4)] + + remote_func = session.remote_function( + [str, int], + list[int], + dataset_id, + bq_cf_connection, + reuse=False, + )(func) + + scalars_df, scalars_pandas_df = scalars_dfs + + scalars_df = scalars_df.dropna() + scalars_pandas_df = scalars_pandas_df.dropna() + bf_result = ( + scalars_df["string_col"] + .combine(scalars_df["int64_col"], remote_func) + .to_pandas() + ) + pd_result = scalars_pandas_df["string_col"].combine( + scalars_pandas_df["int64_col"], func + ) + pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) + finally: + # clean up the gcp assets created for the remote function + cleanup_remote_function_assets( + session.bqclient, session.cloudfunctionsclient, remote_func + ) + + @pytest.mark.flaky(retries=2, delay=120) def test_remote_function_decorator_with_bigframes_series( session, scalars_dfs, dataset_id, bq_cf_connection @@ -2117,6 +2155,90 @@ def foo(x, y, z): ) +def test_df_apply_axis_1_multiple_params_array_output(session): + bf_df = bigframes.dataframe.DataFrame( + { + "Id": [1, 2, 3], + "Age": [22.5, 23, 23.5], + "Name": ["alpha", "beta", "gamma"], + } + ) + + expected_dtypes = ( + bigframes.dtypes.INT_DTYPE, + bigframes.dtypes.FLOAT_DTYPE, + bigframes.dtypes.STRING_DTYPE, + ) + + # Assert the dataframe dtypes + assert tuple(bf_df.dtypes) == expected_dtypes + + try: + + @session.remote_function([int, float, str], list[str], reuse=False) + def foo(x, y, z): + return [str(x), str(y), z] + + assert getattr(foo, "is_row_processor") is False + assert getattr(foo, "input_dtypes") == expected_dtypes + assert getattr(foo, "output_dtype") == pandas.ArrowDtype( + pyarrow.list_( + bigframes.dtypes.bigframes_dtype_to_arrow_dtype( + bigframes.dtypes.STRING_DTYPE + ) + ) + ) + + # Fails to apply on dataframe with incompatible number of columns + with pytest.raises( + ValueError, + match="^Remote function takes 3 arguments but DataFrame has 2 columns\\.$", + ): + bf_df[["Id", "Age"]].apply(foo, axis=1) + with pytest.raises( + ValueError, + match="^Remote function takes 3 arguments but DataFrame has 4 columns\\.$", + ): + bf_df.assign(Country="lalaland").apply(foo, axis=1) + + # Fails to apply on dataframe with incompatible column datatypes + with pytest.raises( + ValueError, + match="^Remote function takes arguments of types .* but DataFrame dtypes are .*", + ): + bf_df.assign(Age=bf_df["Age"].astype("Int64")).apply(foo, axis=1) + + # Successfully applies to dataframe with matching number of columns + # and their datatypes + bf_result = bf_df.apply(foo, axis=1).to_pandas() + + # Since this scenario is not pandas-like, let's handcraft the + # expected result + expected_result = pandas.Series( + [ + ["1", "22.5", "alpha"], + ["2", "23", "beta"], + ["3", "23.5", "gamma"], + ] + ) + + pandas.testing.assert_series_equal( + expected_result, bf_result, check_dtype=False, check_index_type=False + ) + + # Let's make sure the read_gbq_function path works for this function + foo_reuse = session.read_gbq_function(foo.bigframes_remote_function) + bf_result = bf_df.apply(foo_reuse, axis=1).to_pandas() + pandas.testing.assert_series_equal( + expected_result, bf_result, check_dtype=False, check_index_type=False + ) + finally: + # clean up the gcp assets created for the remote function + cleanup_remote_function_assets( + session.bqclient, session.cloudfunctionsclient, foo + ) + + def test_df_apply_axis_1_single_param_non_series(session): bf_df = bigframes.dataframe.DataFrame( { @@ -2181,6 +2303,46 @@ def foo(x): ) +@pytest.mark.flaky(retries=2, delay=120) +def test_df_apply_axis_1_array_output(session, scalars_dfs): + columns = ["int64_col", "int64_too"] + scalars_df, scalars_pandas_df = scalars_dfs + try: + + @session.remote_function(reuse=False) + def generate_stats(row: pandas.Series) -> list[int]: + import pandas as pd + + sum = row["int64_too"] + avg = row["int64_too"] + if pd.notna(row["int64_col"]): + sum += row["int64_col"] + avg = round((avg + row["int64_col"]) / 2) + return [sum, avg] + + assert getattr(generate_stats, "is_row_processor") + + bf_result = scalars_df[columns].apply(generate_stats, axis=1).to_pandas() + pd_result = scalars_pandas_df[columns].apply(generate_stats, axis=1) + + # bf_result.dtype is 'list[pyarrow]' while pd_result.dtype + # is 'object', ignore this mismatch by using check_dtype=False. + pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + + # Let's make sure the read_gbq_function path works for this function + generate_stats_reuse = session.read_gbq_function( + generate_stats.bigframes_remote_function, + is_row_processor=True, + ) + bf_result = scalars_df[columns].apply(generate_stats_reuse, axis=1).to_pandas() + pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + finally: + # clean up the gcp assets created for the remote function + cleanup_remote_function_assets( + session.bqclient, session.cloudfunctionsclient, generate_stats + ) + + @pytest.mark.parametrize( ("ingress_settings_args", "effective_ingress_settings"), [ @@ -2397,3 +2559,125 @@ def add_one(x: int) -> int: cleanup_remote_function_assets( session.bqclient, session.cloudfunctionsclient, add_one_remote_persist ) + + +@pytest.mark.parametrize( + "array_dtype", + [ + bool, + int, + float, + str, + ], +) +@pytest.mark.flaky(retries=2, delay=120) +def test_remote_function_array_output( + session, scalars_dfs, dataset_id, bq_cf_connection, array_dtype +): + try: + + @session.remote_function( + dataset=dataset_id, + bigquery_connection=bq_cf_connection, + reuse=False, + ) + def featurize(x: int) -> list[array_dtype]: # type: ignore + return [array_dtype(i) for i in [x, x + 1, x + 2]] + + scalars_df, scalars_pandas_df = scalars_dfs + + bf_int64_col = scalars_df["int64_too"] + bf_result = bf_int64_col.apply(featurize).to_pandas() + + pd_int64_col = scalars_pandas_df["int64_too"] + pd_result = pd_int64_col.apply(featurize) + + # ignore any dtype disparity + pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + + # Let's make sure the read_gbq_function path works for this function + featurize_reuse = session.read_gbq_function( + featurize.bigframes_remote_function # type: ignore + ) + bf_result = scalars_df["int64_too"].apply(featurize_reuse).to_pandas() + pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + finally: + # clean up the gcp assets created for the remote function + cleanup_remote_function_assets( + session.bqclient, session.cloudfunctionsclient, featurize + ) + + +@pytest.mark.flaky(retries=2, delay=120) +def test_remote_function_array_output_partial_ordering_mode( + unordered_session, scalars_dfs, dataset_id, bq_cf_connection +): + try: + + @unordered_session.remote_function( + dataset=dataset_id, + bigquery_connection=bq_cf_connection, + reuse=False, + ) + def featurize(x: float) -> list[float]: # type: ignore + return [x, x + 1, x + 2] + + scalars_df, scalars_pandas_df = scalars_dfs + + bf_int64_col = scalars_df["float64_col"].dropna() + bf_result = bf_int64_col.apply(featurize).to_pandas() + + pd_int64_col = scalars_pandas_df["float64_col"].dropna() + pd_result = pd_int64_col.apply(featurize) + + # ignore any dtype disparity + pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + + # Let's make sure the read_gbq_function path works for this function + featurize_reuse = unordered_session.read_gbq_function( + featurize.bigframes_remote_function # type: ignore + ) + bf_int64_col = scalars_df["float64_col"].dropna() + bf_result = bf_int64_col.apply(featurize_reuse).to_pandas() + pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + finally: + # clean up the gcp assets created for the remote function + cleanup_remote_function_assets( + unordered_session.bqclient, + unordered_session.cloudfunctionsclient, + featurize, + ) + + +@pytest.mark.flaky(retries=2, delay=120) +def test_remote_function_array_output_multiindex( + session, scalars_dfs, dataset_id, bq_cf_connection +): + try: + + @session.remote_function( + dataset=dataset_id, + bigquery_connection=bq_cf_connection, + reuse=False, + ) + def featurize(x: int) -> list[float]: + return [x, x + 0.5, x + 0.33] + + scalars_df, scalars_pandas_df = scalars_dfs + multiindex_cols = ["rowindex", "string_col"] + scalars_df = scalars_df.reset_index().set_index(multiindex_cols) + scalars_pandas_df = scalars_pandas_df.reset_index().set_index(multiindex_cols) + + bf_int64_col = scalars_df["int64_too"] + bf_result = bf_int64_col.apply(featurize).to_pandas() + + pd_int64_col = scalars_pandas_df["int64_too"] + pd_result = pd_int64_col.apply(featurize) + + # ignore any dtype disparity + pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + finally: + # clean up the gcp assets created for the remote function + cleanup_remote_function_assets( + session.bqclient, session.cloudfunctionsclient, featurize + ) diff --git a/tests/system/small/bigquery/test_datetime.py b/tests/system/small/bigquery/test_datetime.py new file mode 100644 index 0000000000..984e75ac10 --- /dev/null +++ b/tests/system/small/bigquery/test_datetime.py @@ -0,0 +1,66 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import pandas as pd + +from bigframes import bigquery + + +def test_unix_seconds(scalars_dfs): + bigframes_df, pandas_df = scalars_dfs + + actual_res = bigquery.unix_seconds(bigframes_df["timestamp_col"]).to_pandas() + + expected_res = ( + pandas_df["timestamp_col"] + .apply(lambda ts: _to_unix_epoch(ts, "s")) + .astype("Int64") + ) + pd.testing.assert_series_equal(actual_res, expected_res) + + +def test_unix_millis(scalars_dfs): + bigframes_df, pandas_df = scalars_dfs + + actual_res = bigquery.unix_millis(bigframes_df["timestamp_col"]).to_pandas() + + expected_res = ( + pandas_df["timestamp_col"] + .apply(lambda ts: _to_unix_epoch(ts, "ms")) + .astype("Int64") + ) + pd.testing.assert_series_equal(actual_res, expected_res) + + +def test_unix_micros(scalars_dfs): + bigframes_df, pandas_df = scalars_dfs + + actual_res = bigquery.unix_micros(bigframes_df["timestamp_col"]).to_pandas() + + expected_res = ( + pandas_df["timestamp_col"] + .apply(lambda ts: _to_unix_epoch(ts, "us")) + .astype("Int64") + ) + pd.testing.assert_series_equal(actual_res, expected_res) + + +def _to_unix_epoch( + ts: pd.Timestamp, unit: typing.Literal["s", "ms", "us"] +) -> typing.Optional[int]: + if pd.isna(ts): + return None + return (ts - pd.Timestamp("1970-01-01", tz="UTC")) // pd.Timedelta(1, unit) diff --git a/tests/system/small/bigquery/test_sql.py b/tests/system/small/bigquery/test_sql.py new file mode 100644 index 0000000000..283624100a --- /dev/null +++ b/tests/system/small/bigquery/test_sql.py @@ -0,0 +1,50 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bigframes.bigquery + + +def test_sql_scalar_on_scalars_null_index(scalars_df_null_index): + series = bigframes.bigquery.sql_scalar( + """ + CAST({0} AS INT64) + + BYTE_LENGTH({1}) + + UNIX_DATE({2}) + + EXTRACT(YEAR FROM {3}) + + ST_NUMPOINTS({4}) + + LEAST( + {5}, + CAST({6} AS INT64), + CAST({7} AS INT64) + ) + CHAR_LENGTH({8}) + + EXTRACT(SECOND FROM {9}) + + UNIX_SECONDS({10}) + """, + columns=[ + # Try to include all scalar types in a single test. + scalars_df_null_index["bool_col"], + scalars_df_null_index["bytes_col"], + scalars_df_null_index["date_col"], + scalars_df_null_index["datetime_col"], + scalars_df_null_index["geography_col"], + scalars_df_null_index["int64_col"], + scalars_df_null_index["numeric_col"], + scalars_df_null_index["float64_col"], + scalars_df_null_index["string_col"], + scalars_df_null_index["time_col"], + scalars_df_null_index["timestamp_col"], + ], + ) + result = series.to_pandas() + assert len(result) == len(scalars_df_null_index) diff --git a/tests/system/small/bigquery/test_vector_search.py b/tests/system/small/bigquery/test_vector_search.py index b6a6d59c4c..6297d729ea 100644 --- a/tests/system/small/bigquery/test_vector_search.py +++ b/tests/system/small/bigquery/test_vector_search.py @@ -23,6 +23,7 @@ import bigframes.bigquery as bbq import bigframes.pandas as bpd +from tests.system.utils import assert_pandas_df_equal # Need at least 5,000 rows to create a vector index. VECTOR_DF = pd.DataFrame( @@ -148,8 +149,11 @@ def test_vector_search_basic_params_with_df(): }, index=pd.Index([1, 0, 0, 1], dtype="Int64"), ) - pd.testing.assert_frame_equal( - vector_search_result, expected, check_dtype=False, rtol=0.1 + assert_pandas_df_equal( + expected.sort_values("id"), + vector_search_result.sort_values("id"), + check_dtype=False, + rtol=0.1, ) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 4e0e5c2739..e7d6ad67e1 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2554,6 +2554,27 @@ def test_join_param_on(scalars_dfs, how): assert_pandas_df_equal(bf_result, pd_result, ignore_order=True) +@all_joins +def test_df_join_series(scalars_dfs, how): + bf_df, pd_df = scalars_dfs + + bf_df_a = bf_df[["string_col", "int64_col", "rowindex_2"]] + bf_df_a = bf_df_a.assign(rowindex_2=bf_df_a["rowindex_2"] + 2) + bf_series_b = bf_df["float64_col"] + + if how == "cross": + with pytest.raises(ValueError): + bf_df_a.join(bf_series_b, on="rowindex_2", how=how) + else: + bf_result = bf_df_a.join(bf_series_b, on="rowindex_2", how=how).to_pandas() + + pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]] + pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2) + pd_series_b = pd_df["float64_col"] + pd_result = pd_df_a.join(pd_series_b, on="rowindex_2", how=how) + assert_pandas_df_equal(bf_result, pd_result, ignore_order=True) + + @pytest.mark.parametrize( ("by", "ascending", "na_position"), [ @@ -2581,6 +2602,11 @@ def test_dataframe_sort_values( ) +def test_dataframe_sort_values_invalid_input(scalars_df_index): + with pytest.raises(KeyError): + scalars_df_index.sort_values(by=scalars_df_index["int64_col"]) + + def test_dataframe_sort_values_stable(scalars_df_index, scalars_pandas_df_index): bf_result = ( scalars_df_index.sort_values("int64_col", kind="stable") diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py index f70e16447a..6da4c6ff9c 100644 --- a/tests/system/small/test_null_index.py +++ b/tests/system/small/test_null_index.py @@ -247,6 +247,7 @@ def test_null_index_stack(scalars_df_null_index, scalars_pandas_df_default_index .droplevel(level=0, axis=0) ) pd_result.index = pd_result.index.astype(bf_result.index.dtype) + pd.testing.assert_series_equal( bf_result, pd_result, diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py index 2f677bebed..c3f3890459 100644 --- a/tests/system/small/test_remote_function.py +++ b/tests/system/small/test_remote_function.py @@ -20,6 +20,7 @@ import pandas as pd import pyarrow import pytest +import test_utils.prefixer import bigframes import bigframes.dtypes @@ -28,6 +29,8 @@ from bigframes.functions import remote_function as rf from tests.system.utils import assert_pandas_df_equal +_prefixer = test_utils.prefixer.Prefixer("bigframes", "") + @pytest.fixture(scope="module") def bq_cf_connection() -> str: @@ -770,7 +773,7 @@ def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id): @pytest.mark.flaky(retries=2, delay=120) -def test_read_gbq_function_enforces_explicit_types( +def test_read_gbq_function_requires_explicit_types( session, bigquery_client, dataset_id ): dataset_ref = bigquery.DatasetReference.from_string(dataset_id) @@ -839,6 +842,156 @@ def test_read_gbq_function_enforces_explicit_types( ) +@pytest.mark.parametrize( + ("session_fixture",), + [ + pytest.param("session"), + pytest.param("unordered_session"), + ], +) +@pytest.mark.parametrize( + ("array_type", "expected_data"), + [ + pytest.param(None, ["[1,2,3]", "[10,11,12]", "[100,101,102]"], id="None"), + pytest.param( + list[str], + [["1", "2", "3"], ["10", "11", "12"], ["100", "101", "102"]], + id="list-str", + ), + pytest.param( + list[int], [[1, 2, 3], [10, 11, 12], [100, 101, 102]], id="list-int" + ), + ], +) +@pytest.mark.flaky(retries=2, delay=120) +def test_read_gbq_function_respects_python_output_type( + request, session_fixture, bigquery_client, dataset_id, array_type, expected_data +): + session = request.getfixturevalue(session_fixture) + dataset_ref = bigquery.DatasetReference.from_string(dataset_id) + arg = bigquery.RoutineArgument( + name="x", + data_type=bigquery.StandardSqlDataType(bigquery.StandardSqlTypeNames.INT64), + ) + sql_routine = bigquery.Routine( + dataset_ref.routine(_prefixer.create_prefix()), + body="TO_JSON_STRING([x, x+1, x+2])", + arguments=[arg], + return_type=bigquery.StandardSqlDataType(bigquery.StandardSqlTypeNames.STRING), + description=rf_utils.get_bigframes_metadata(python_output_type=array_type), + type_=bigquery.RoutineType.SCALAR_FUNCTION, + ) + + # Create the routine in BigQuery and read it back using read_gbq_function. + bigquery_client.create_routine(sql_routine, exists_ok=True) + func = rf.read_gbq_function(str(sql_routine.reference), session=session) + + # test that the function works as expected + s = bigframes.series.Series([1, 10, 100]) + expected = pd.Series(expected_data) + actual = s.apply(func).to_pandas() + + # ignore type disparities, e.g. "int64" in pandas v/s "Int64" in bigframes + pd.testing.assert_series_equal( + expected, actual, check_dtype=False, check_index_type=False + ) + + +@pytest.mark.parametrize( + ("array_type",), + [ + pytest.param(list[bool], id="list-bool"), + pytest.param(list[float], id="list-float"), + pytest.param(list[int], id="list-int"), + pytest.param(list[str], id="list-str"), + ], +) +@pytest.mark.flaky(retries=2, delay=120) +def test_read_gbq_function_supports_python_output_type_only_for_string_outputs( + session, bigquery_client, dataset_id, array_type +): + dataset_ref = bigquery.DatasetReference.from_string(dataset_id) + arg = bigquery.RoutineArgument( + name="x", + data_type=bigquery.StandardSqlDataType(bigquery.StandardSqlTypeNames.INT64), + ) + sql_routine = bigquery.Routine( + dataset_ref.routine(_prefixer.create_prefix()), + body="x+1", + arguments=[arg], + return_type=bigquery.StandardSqlDataType(bigquery.StandardSqlTypeNames.INT64), + description=rf_utils.get_bigframes_metadata(python_output_type=array_type), + type_=bigquery.RoutineType.SCALAR_FUNCTION, + ) + + # Create the routine in BigQuery and read it back using read_gbq_function. + bigquery_client.create_routine(sql_routine, exists_ok=True) + + # reading back will fail because we currently allow specifying an explicit + # output_type for BQ functions with STRING output + with pytest.raises( + TypeError, + match="An explicit output_type should be provided only for a BigQuery function with STRING output.", + ): + rf.read_gbq_function(str(sql_routine.reference), session=session) + + +@pytest.mark.parametrize( + ("array_type",), + [ + pytest.param(list[bool], id="list-bool"), + pytest.param(list[float], id="list-float"), + pytest.param(list[int], id="list-int"), + pytest.param(list[str], id="list-str"), + ], +) +@pytest.mark.flaky(retries=2, delay=120) +def test_read_gbq_function_supported_python_output_type( + session, bigquery_client, dataset_id, array_type +): + dataset_ref = bigquery.DatasetReference.from_string(dataset_id) + arg = bigquery.RoutineArgument( + name="x", + data_type=bigquery.StandardSqlDataType(bigquery.StandardSqlTypeNames.INT64), + ) + sql_routine = bigquery.Routine( + dataset_ref.routine(_prefixer.create_prefix()), + body="CAST(x AS STRING)", + arguments=[arg], + return_type=bigquery.StandardSqlDataType(bigquery.StandardSqlTypeNames.STRING), + description=rf_utils.get_bigframes_metadata(python_output_type=array_type), + type_=bigquery.RoutineType.SCALAR_FUNCTION, + ) + + # Create the routine in BigQuery and read it back using read_gbq_function. + bigquery_client.create_routine(sql_routine, exists_ok=True) + rf.read_gbq_function(str(sql_routine.reference), session=session) + + +@pytest.mark.flaky(retries=2, delay=120) +def test_df_apply_scalar_func(session, scalars_dfs): + scalars_df, _ = scalars_dfs + bdf = bigframes.pandas.DataFrame( + { + "Column1": scalars_df["string_col"], + "Column2": scalars_df["string_col"], + } + ) + + # The "cw_lower_case_ascii_only" is a scalar function. + func_ref = session.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only") + + # DataFrame '.apply()' only supports series level application. + with pytest.raises(NotImplementedError) as context: + bdf.apply(func_ref) + assert str(context.value) == ( + "BigFrames DataFrame '.apply()' does not support remote function for " + "column-wise (i.e. with axis=0) operations, please use a regular python " + "function instead. For element-wise operations of the remote function, " + "please use '.map()'." + ) + + @pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_multiple_inputs_not_a_row_processor(session): with pytest.raises(ValueError) as context: diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 960e40465b..e95509e033 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -407,9 +407,12 @@ def test_read_gbq_w_ambigous_name( session: bigframes.Session, ): # Ensure read_gbq works when table and column share a name - df = session.read_gbq( - "bigframes-dev.bigframes_tests_sys.ambiguous_name" - ).to_pandas() + df = ( + session.read_gbq("bigframes-dev.bigframes_tests_sys.ambiguous_name") + .sort_values("x", ascending=False) + .reset_index(drop=True) + .to_pandas() + ) pd_df = pd.DataFrame({"x": [2, 1], "ambiguous_name": [20, 10]}) pd.testing.assert_frame_equal(df, pd_df, check_dtype=False, check_index_type=False) diff --git a/tests/unit/core/test_dtypes.py b/tests/unit/core/test_dtypes.py index a5b0889bf9..e1fac624d7 100644 --- a/tests/unit/core/test_dtypes.py +++ b/tests/unit/core/test_dtypes.py @@ -259,11 +259,11 @@ def test_literal_to_ibis_scalar_throws_on_incompatible_literal(): def test_remote_function_io_types_are_supported_bigframes_types(): from bigframes_vendored.ibis.expr.datatypes.core import ( - dtype as python_type_to_bigquery_type, + dtype as python_type_to_ibis_type, ) from bigframes.dtypes import RF_SUPPORTED_IO_PYTHON_TYPES as rf_supported_io_types for python_type in rf_supported_io_types: - ibis_type = python_type_to_bigquery_type(python_type) + ibis_type = python_type_to_ibis_type(python_type) assert ibis_type in bigframes.core.compile.ibis_types.IBIS_TO_BIGFRAMES diff --git a/tests/unit/core/test_log_adapter.py b/tests/unit/core/test_log_adapter.py index 1199391813..7b626838ac 100644 --- a/tests/unit/core/test_log_adapter.py +++ b/tests/unit/core/test_log_adapter.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + +from google.cloud import bigquery import pytest from bigframes.core import log_adapter @@ -22,6 +25,12 @@ MAX_LABELS_COUNT = 56 +@pytest.fixture +def mock_bqclient(): + mock_bqclient = mock.create_autospec(spec=bigquery.Client) + return mock_bqclient + + @pytest.fixture def test_instance(): # Create a simple class for testing @@ -61,3 +70,88 @@ def test_get_and_reset_api_methods(test_instance): previous_methods = log_adapter.get_and_reset_api_methods() assert previous_methods is not None assert log_adapter._api_methods == [] + + +@pytest.mark.parametrize( + ("class_name", "method_name", "args", "kwargs", "task", "expected_labels"), + ( + ( + "DataFrame", + "resample", + ["a", "b", "c"], + {"aa": "bb", "rule": "1s"}, + log_adapter.PANDAS_API_TRACKING_TASK, + { + "task": log_adapter.PANDAS_API_TRACKING_TASK, + "class_name": "dataframe", + "method_name": "resample", + "args_count": 3, + "kwargs_0": "rule", + }, + ), + ( + "Series", + "resample", + [], + {"aa": "bb", "rule": "1s"}, + log_adapter.PANDAS_PARAM_TRACKING_TASK, + { + "task": log_adapter.PANDAS_PARAM_TRACKING_TASK, + "class_name": "series", + "method_name": "resample", + "args_count": 0, + "kwargs_0": "rule", + }, + ), + ( + "DataFrame", + "resample", + [], + {"aa": "bb"}, + log_adapter.PANDAS_API_TRACKING_TASK, + { + "task": log_adapter.PANDAS_API_TRACKING_TASK, + "class_name": "dataframe", + "method_name": "resample", + "args_count": 0, + }, + ), + ( + "DataFrame", + "resample", + [], + {}, + log_adapter.PANDAS_API_TRACKING_TASK, + { + "task": log_adapter.PANDAS_API_TRACKING_TASK, + "class_name": "dataframe", + "method_name": "resample", + "args_count": 0, + }, + ), + ), +) +def test_submit_pandas_labels( + mock_bqclient, class_name, method_name, args, kwargs, task, expected_labels +): + log_adapter.submit_pandas_labels( + mock_bqclient, class_name, method_name, args, kwargs, task + ) + + mock_bqclient.query.assert_called_once() + + query_call_args = mock_bqclient.query.call_args_list[0] + labels = query_call_args[1]["job_config"].labels + assert labels == expected_labels + + +def test_submit_pandas_labels_without_valid_params_for_param_logging(mock_bqclient): + log_adapter.submit_pandas_labels( + mock_bqclient, + "Series", + "resample", + task=log_adapter.PANDAS_PARAM_TRACKING_TASK, + ) + + # For param tracking task without kwargs, we won't submit labels + mock_bqclient.query.assert_not_called() diff --git a/tests/unit/core/test_sql.py b/tests/unit/core/test_sql.py index a2ee2f359e..ca286cafff 100644 --- a/tests/unit/core/test_sql.py +++ b/tests/unit/core/test_sql.py @@ -12,10 +12,66 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import decimal + +import pytest +import shapely # type: ignore from bigframes.core import sql +@pytest.mark.parametrize( + ("value", "expected"), + ( + # Try to have some literals for each scalar data type: + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types + (None, "NULL"), + # TODO: support ARRAY type (possibly another method?) + (True, "True"), + (False, "False"), + ( + b"\x01\x02\x03ABC", + r"b'\x01\x02\x03ABC'", + ), + ( + datetime.date(2025, 1, 1), + "DATE('2025-01-01')", + ), + ( + datetime.datetime(2025, 1, 2, 3, 45, 6, 789123), + "DATETIME('2025-01-02T03:45:06.789123')", + ), + ( + shapely.Point(0, 1), + "ST_GEOGFROMTEXT('POINT (0 1)')", + ), + # TODO: INTERVAL type (e.g. from dateutil.relativedelta) + # TODO: JSON type (TBD what Python object that would correspond to) + (123, "123"), + (decimal.Decimal("123.75"), "CAST('123.75' AS NUMERIC)"), + # TODO: support BIGNUMERIC by looking at precision/scale of the DECIMAL + (123.75, "123.75"), + # TODO: support RANGE type + ("abc", "'abc'"), + # TODO: support STRUCT type (possibly another method?) + ( + datetime.time(12, 34, 56, 789123), + "TIME(DATETIME('1970-01-01 12:34:56.789123'))", + ), + ( + datetime.datetime( + 2025, 1, 2, 3, 45, 6, 789123, tzinfo=datetime.timezone.utc + ), + "TIMESTAMP('2025-01-02T03:45:06.789123+00:00')", + ), + ), +) +def test_simple_literal(value, expected): + got = sql.simple_literal(value) + assert got == expected + + def test_create_vector_search_sql_simple(): result_query = sql.create_vector_search_sql( sql_string="SELECT embedding FROM my_embeddings_table WHERE id = 1", diff --git a/tests/unit/functions/test_remote_function_utils.py b/tests/unit/functions/test_remote_function_utils.py new file mode 100644 index 0000000000..0bcfee5c4e --- /dev/null +++ b/tests/unit/functions/test_remote_function_utils.py @@ -0,0 +1,138 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import bigframes.dtypes +from bigframes.functions import _utils + + +@pytest.mark.parametrize( + ["metadata_options", "metadata_string"], + ( + pytest.param( + {}, + '{"value": {}}', + id="empty", + ), + pytest.param( + {"python_output_type": None}, + '{"value": {}}', + id="None", + ), + pytest.param( + {"python_output_type": list[bool]}, + '{"value": {"python_array_output_type": "bool"}}', + id="list-bool", + ), + pytest.param( + {"python_output_type": list[float]}, + '{"value": {"python_array_output_type": "float"}}', + id="list-float", + ), + pytest.param( + {"python_output_type": list[int]}, + '{"value": {"python_array_output_type": "int"}}', + id="list-int", + ), + pytest.param( + {"python_output_type": list[str]}, + '{"value": {"python_array_output_type": "str"}}', + id="list-str", + ), + ), +) +def test_get_bigframes_metadata(metadata_options, metadata_string): + assert _utils.get_bigframes_metadata(**metadata_options) == metadata_string + + +@pytest.mark.parametrize( + ["output_type"], + ( + pytest.param(bool), + pytest.param(bytes), + pytest.param(float), + pytest.param(int), + pytest.param(str), + pytest.param(list), + pytest.param(list[bytes], id="list-bytes"), + ), +) +def test_get_bigframes_metadata_array_type_not_serializable(output_type): + with pytest.raises(ValueError) as context: + _utils.get_bigframes_metadata(python_output_type=output_type) + assert str(context.value) == ( + f"python_output_type {output_type} is not serializable." + ) + + +@pytest.mark.parametrize( + ["metadata_string", "python_output_type"], + ( + pytest.param( + None, + None, + id="None", + ), + pytest.param( + "", + None, + id="empty", + ), + pytest.param( + "{}", + None, + id="empty-dict", + ), + pytest.param( + '{"value": {}}', + None, + id="empty-value", + ), + pytest.param( + '{"value": {"python_array_output_type": "bool"}}', + list[bool], + id="list-bool", + ), + pytest.param( + '{"value": {"python_array_output_type": "float"}}', + list[float], + id="list-float", + ), + pytest.param( + '{"value": {"python_array_output_type": "int"}}', + list[int], + id="list-int", + ), + pytest.param( + '{"value": {"python_array_output_type": "str"}}', + list[str], + id="list-str", + ), + ), +) +def test_get_python_output_type_from_bigframes_metadata( + metadata_string, python_output_type +): + assert ( + _utils.get_python_output_type_from_bigframes_metadata(metadata_string) + == python_output_type + ) + + +def test_metadata_roundtrip_supported_array_types(): + for array_of in bigframes.dtypes.RF_SUPPORTED_ARRAY_OUTPUT_PYTHON_TYPES: + ser = _utils.get_bigframes_metadata(python_output_type=list[array_of]) # type: ignore + deser = _utils.get_python_output_type_from_bigframes_metadata(ser) + assert deser == list[array_of] # type: ignore diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py index 36caea0c0e..fa05fffcb2 100644 --- a/tests/unit/session/test_io_bigquery.py +++ b/tests/unit/session/test_io_bigquery.py @@ -341,10 +341,12 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str) (("string_col", "in", ["Hello, World!", "こんにちは"]),), ], 123, # max_results, - datetime.datetime(2024, 5, 14, 12, 42, 36, 125125), + datetime.datetime( + 2024, 5, 14, 12, 42, 36, 125125, tzinfo=datetime.timezone.utc + ), ( "SELECT `row_index`, `string_col` FROM `test_table` " - "FOR SYSTEM_TIME AS OF TIMESTAMP('2024-05-14T12:42:36.125125') " + "FOR SYSTEM_TIME AS OF TIMESTAMP('2024-05-14T12:42:36.125125+00:00') " "WHERE `rowindex` NOT IN (0, 6) OR `string_col` IN ('Hello, World!', " "'こんにちは') LIMIT 123" ), @@ -364,14 +366,16 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str) ("string_col", "==", "Hello, World!"), ], 123, # max_results, - datetime.datetime(2024, 5, 14, 12, 42, 36, 125125), + datetime.datetime( + 2024, 5, 14, 12, 42, 36, 125125, tzinfo=datetime.timezone.utc + ), ( """SELECT `rowindex`, `string_col` FROM (SELECT rowindex, string_col, FROM `test_table` AS t ) """ - "FOR SYSTEM_TIME AS OF TIMESTAMP('2024-05-14T12:42:36.125125') " + "FOR SYSTEM_TIME AS OF TIMESTAMP('2024-05-14T12:42:36.125125+00:00') " "WHERE `rowindex` < 4 AND `string_col` = 'Hello, World!' " "LIMIT 123" ), diff --git a/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py b/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py index a9f9fdef9c..ccd4a57e11 100644 --- a/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py +++ b/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py @@ -1089,6 +1089,13 @@ def visit_SimpleCase(self, op, *, base=None, cases, results, default): visit_SearchedCase = visit_SimpleCase + def visit_SqlScalar(self, op, *, sql_template, values, output_type): + # TODO: can we include a string in the sqlglot expression without parsing? + return sg.parse_one( + sql_template.format(*[value.sql(dialect="bigquery") for value in values]), + dialect="bigquery", + ) + def visit_ExistsSubquery(self, op, *, rel): select = rel.this.select(1, append=False) return self.f.exists(select) diff --git a/third_party/bigframes_vendored/ibis/expr/operations/generic.py b/third_party/bigframes_vendored/ibis/expr/operations/generic.py index 36836b5963..c77ecc3e71 100644 --- a/third_party/bigframes_vendored/ibis/expr/operations/generic.py +++ b/third_party/bigframes_vendored/ibis/expr/operations/generic.py @@ -330,4 +330,19 @@ def dtype(self): return rlz.highest_precedence_dtype(exprs) +@public +class SqlScalar(Value): + """Inject a SQL string as a scalar value.""" + + sql_template: str + values: VarTuple[Value] + output_type: dt.DataType + + shape = ds.scalar + + @property + def dtype(self): + return self.output_type + + public(NULL=NULL) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index f1565ed536..c8ca1b74b5 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -4384,7 +4384,7 @@ def join(self, other, *, on: Optional[str] = None, how: str) -> DataFrame: Args: other: - DataFrame with an Index similar to the Index of this one. + DataFrame or Series with an Index similar to the Index of this one. on: Column in the caller to join on the index in other, otherwise joins index-on-index. Like an Excel VLOOKUP operation. @@ -4626,6 +4626,33 @@ def apply(self, func, *, axis=0, args=(), **kwargs): 1 19 dtype: Int64 + You could return an array output for every input row from the remote + function. + + >>> @bpd.remote_function(reuse=False) + ... def marks_analyzer(marks: pd.Series) -> list[float]: + ... import statistics + ... average = marks.mean() + ... median = marks.median() + ... gemetric_mean = statistics.geometric_mean(marks.values) + ... harmonic_mean = statistics.harmonic_mean(marks.values) + ... return [ + ... round(stat, 2) for stat in + ... (average, median, gemetric_mean, harmonic_mean) + ... ] + + >>> df = bpd.DataFrame({ + ... "physics": [67, 80, 75], + ... "chemistry": [88, 56, 72], + ... "algebra": [78, 91, 79] + ... }, index=["Alice", "Bob", "Charlie"]) + >>> stats = df.apply(marks_analyzer, axis=1) + >>> stats + Alice [77.67 78. 77.19 76.71] + Bob [75.67 80. 74.15 72.56] + Charlie [75.33 75. 75.28 75.22] + dtype: list[pyarrow] + You could also apply a remote function which accepts multiple parameters to every row of a DataFrame by using it with `axis=1` if the DataFrame has matching number of columns and data types. Note: This feature is @@ -6772,9 +6799,9 @@ def iat(self): >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], ... columns=['A', 'B', 'C']) - >>> bpd.options.display.progress_bar = None >>> df A B C 0 0 2 3 @@ -6806,9 +6833,9 @@ def at(self): >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], ... index=[4, 5, 6], columns=['A', 'B', 'C']) - >>> bpd.options.display.progress_bar = None >>> df A B C 4 0 2 3 diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 7c8f452a8f..727e25836a 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -35,6 +35,51 @@ def dt(self): """ Accessor object for datetime-like properties of the Series values. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> seconds_series = bpd.Series(pd.date_range("2000-01-01", periods=3, freq="s")) + >>> seconds_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + dtype: timestamp[us][pyarrow] + + >>> seconds_series.dt.second + 0 0 + 1 1 + 2 2 + dtype: Int64 + + >>> hours_series = bpd.Series(pd.date_range("2000-01-01", periods=3, freq="h")) + >>> hours_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 01:00:00 + 2 2000-01-01 02:00:00 + dtype: timestamp[us][pyarrow] + + >>> hours_series.dt.hour + 0 0 + 1 1 + 2 2 + dtype: Int64 + + >>> quarters_series = bpd.Series(pd.date_range("2000-01-01", periods=3, freq="QE")) + >>> quarters_series + 0 2000-03-31 00:00:00 + 1 2000-06-30 00:00:00 + 2 2000-09-30 00:00:00 + dtype: timestamp[us][pyarrow] + + >>> quarters_series.dt.quarter + 0 1 + 1 2 + 2 3 + dtype: Int64 + Returns: bigframes.operations.datetimes.DatetimeMethods: An accessor containing datetime methods. @@ -97,15 +142,16 @@ def index(self): Name: Age, dtype: Int64 >>> s1.index # doctest: +ELLIPSIS MultiIndex([( 'Alice', 'Seattle'), - ( 'Bob', 'New York'), - ('Aritra', 'Kona')], - names=['Name', 'Location']) + ( 'Bob', 'New York'), + ('Aritra', 'Kona')], + names=['Name', 'Location']) >>> s1.index.values array([('Alice', 'Seattle'), ('Bob', 'New York'), ('Aritra', 'Kona')], - dtype=object) + dtype=object) Returns: - Index: The index object of the Series. + Index: + The index object of the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -168,6 +214,13 @@ def name(self) -> Hashable: >>> s.name 'Numbers' + >>> s.name = "Integers" + >>> s + 0 1 + 1 2 + 2 3 + Name: Integers, dtype: Int64 + If the Series is part of a DataFrame: >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) @@ -182,7 +235,8 @@ def name(self) -> Hashable: 'col1' Returns: - hashable object: The name of the Series, also the column name + hashable object: + The name of the Series, also the column name if part of a DataFrame. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -260,7 +314,8 @@ def transpose(self) -> Series: dtype: string Returns: - Series: Series. + bigframes.pandas.Series: + Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -280,6 +335,7 @@ def reset_index( **Examples:** >>> import bigframes.pandas as bpd + >>> import pandas as pd >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3, 4], name='foo', @@ -324,6 +380,24 @@ def reset_index( 3 4 Name: foo, dtype: Int64 + >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']), + ... np.array(['one', 'two', 'one', 'two'])] + >>> s2 = bpd.Series( + ... range(4), name='foo', + ... index=pd.MultiIndex.from_arrays(arrays, + ... names=['a', 'b'])) + + If level is not set, all levels are removed from the Index. + + >>> s2.reset_index() + a b foo + 0 bar one 0 + 1 bar two 1 + 2 baz one 2 + 3 baz two 3 + + [4 rows x 3 columns] + Args: drop (bool, default False): Just reset the index, without inserting it as a column in @@ -334,7 +408,8 @@ def reset_index( when `drop` is True. Returns: - Series or DataFrame or None; When `drop` is False (the default), + bigframes.pandas.Series or bigframes.pandas.DataFrame or None: + When `drop` is False (the default), a DataFrame is returned. The newly created columns will come first in the DataFrame, followed by the original Series values. When `drop` is True, a `Series` is returned. @@ -394,8 +469,8 @@ def to_string( of rows is above `max_rows`). Returns: - str or None: String representation of Series if ``buf=None``, - otherwise None. + str or None: + String representation of Series if ``buf=None``, otherwise None. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -407,7 +482,36 @@ def to_markdown( **kwargs, ) -> str | None: """ - Print {klass} in Markdown-friendly format. + Print Series in Markdown-friendly format. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + + Output markdown with a tabulate option. + + >>> print(s.to_markdown(tablefmt="grid")) + +----+----------+ + | | animal | + +====+==========+ + | 0 | elk | + +----+----------+ + | 1 | pig | + +----+----------+ + | 2 | dog | + +----+----------+ + | 3 | quetzal | + +----+----------+ Args: buf (str, Path or StringIO-like, optional, default None): @@ -418,7 +522,8 @@ def to_markdown( Add index (row) labels. Returns: - str: {klass} in Markdown-friendly format. + str: + Series in Markdown-friendly format. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -426,6 +531,23 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: """ Convert Series to {label -> value} dict or dict-like object. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> from collections import OrderedDict, defaultdict + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3, 4]) + >>> s.to_dict() + {np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4} + + >>> s.to_dict(into=OrderedDict) + OrderedDict({np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4}) + + >>> dd = defaultdict(list) + >>> s.to_dict(into=dd) + defaultdict(, {np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4}) + Args: into (class, default dict): The collections.abc.Mapping subclass to use as the return @@ -434,7 +556,8 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: collections.defaultdict, you must pass it initialized. Returns: - collections.abc.Mapping: Key-value representation of Series. + collections.abc.Mapping: + Key-value representation of Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -445,11 +568,27 @@ def to_frame(self, name=None) -> DataFrame: The column in the new dataframe will be named name (the keyword parameter) if the name parameter is provided and not None. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(["a", "b", "c"], + ... name="vals") + >>> s.to_frame() + vals + 0 a + 1 b + 2 c + + [3 rows x 1 columns] + Args: name (Hashable, default None) Returns: - bigframes.dataframe.DataFrame: DataFrame representation of Series. + bigframes.pandas.DataFrame: + DataFrame representation of Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -491,7 +630,8 @@ def to_latex(self, buf=None, columns=None, header=True, index=True, **kwargs): Write row names (index). Returns: - str or None: If buf is None, returns the result as a string. + str or None: + If buf is None, returns the result as a string. Otherwise returns None. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -520,7 +660,8 @@ def tolist(self) -> list: [1, 2, 3] Returns: - list: list of the values + list: + list of the values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -530,6 +671,32 @@ def to_numpy(self, dtype, copy=False, na_value=None): """ A NumPy ndarray representing the values in this Series or Index. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> ser = bpd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.to_numpy() + array(['a', 'b', 'a'], dtype=object) + + Specify the dtype to control how datetime-aware data is represented. Use + dtype=object to return an ndarray of pandas Timestamp objects, each with + the correct tz. + + >>> ser = bpd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> ser.to_numpy(dtype=object) + array([Timestamp('1999-12-31 23:00:00+0000', tz='UTC'), + Timestamp('2000-01-01 23:00:00+0000', tz='UTC')], dtype=object) + + Or ``dtype=datetime64[ns]`` to return an ndarray of native datetime64 values. + The values are converted to UTC and the timezone info is dropped. + + >>> ser.to_numpy(dtype="datetime64[ns]") + array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'], + dtype='datetime64[ns]') + Args: dtype (str or numpy.dtype, optional): The dtype to pass to :meth:`numpy.asarray`. @@ -546,7 +713,8 @@ def to_numpy(self, dtype, copy=False, na_value=None): of the underlying array (for extension arrays). Returns: - numpy.ndarray: A NumPy ndarray representing the values in this + numpy.ndarray: + A NumPy ndarray representing the values in this Series or Index. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -555,11 +723,43 @@ def to_pickle(self, path, **kwargs): """ Pickle (serialize) object to file. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> original_df = bpd.DataFrame({"foo": range(5), "bar": range(5, 10)}) + >>> original_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + + [5 rows x 2 columns] + + >>> original_df.to_pickle("./dummy.pkl") + + >>> unpickled_df = bpd.read_pickle("./dummy.pkl") + >>> unpickled_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + + [5 rows x 2 columns] + Args: path (str, path object, or file-like object): String, path object (implementing ``os.PathLike[str]``), or file-like object implementing a binary ``write()`` function. File path where the pickled object will be stored. + + Returns: + None """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -568,7 +768,8 @@ def to_xarray(self): Return an xarray object from the pandas object. Returns: - xarray.DataArray or xarray.Dataset: Data in the pandas structure + xarray.DataArray or xarray.Dataset: + Data in the pandas structure converted to Dataset if the object is a DataFrame, or a DataArray if the object is a Series. """ @@ -606,7 +807,8 @@ def agg(self, func): function names, e.g. ``['sum', 'mean']``. Returns: - scalar or Series: Aggregated results + scalar or bigframes.pandas.Series: + Aggregated results. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -629,8 +831,8 @@ def count(self): np.int64(2) Returns: - int or Series (if level specified): Number of non-null values in the - Series. + int or bigframes.pandas.Series (if level specified): + Number of non-null values in the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -640,8 +842,26 @@ def nunique(self) -> int: Excludes NA values by default. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 3, 5, 7, 7]) + >>> s + 0 1 + 1 3 + 2 5 + 3 7 + 4 7 + dtype: Int64 + + >>> s.nunique() + np.int64(4) + Returns: - int: number of unique elements in the object. + int: + number of unique elements in the object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -671,6 +891,7 @@ def unique(self, keep_order=True) -> Series: Name: A, dtype: Int64 Example with order preservation: Slower, but keeps order + >>> s.unique() 0 2 1 1 @@ -678,6 +899,7 @@ def unique(self, keep_order=True) -> Series: Name: A, dtype: Int64 Example without order preservation: Faster, but loses original order + >>> s.unique(keep_order=False) 0 1 1 2 @@ -685,7 +907,8 @@ def unique(self, keep_order=True) -> Series: Name: A, dtype: Int64 Returns: - Series: The unique values returned as a Series. + bigframes.pandas.Series: + The unique values returned as a Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -697,8 +920,20 @@ def mode(self) -> Series: Always returns Series even if only one value is returned. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([2, 4, 8, 2, 4, None]) + >>> s.mode() + 0 2.0 + 1 4.0 + dtype: Float64 + Returns: - bigframes.series.Series: Modes of the Series in sorted order. + bigframes.pandas.Series: + Modes of the Series in sorted order. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -710,6 +945,53 @@ def drop_duplicates( """ Return Series with duplicate values removed. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Generate a Series with duplicated entries. + + >>> s = bpd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', 'hippo'], + ... name='animal') + >>> s + 0 llama + 1 cow + 2 llama + 3 beetle + 4 llama + 5 hippo + Name: animal, dtype: string + + With the 'keep' parameter, the selection behaviour of duplicated values + can be changed. The value 'first' keeps the first occurrence for each set + of duplicated entries. The default value of keep is 'first'. + + >>> s.drop_duplicates() + 0 llama + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: string + + The value 'last' for parameter 'keep' keeps the last occurrence for + each set of duplicated entries. + + >>> s.drop_duplicates(keep='last') + 1 cow + 3 beetle + 4 llama + 5 hippo + Name: animal, dtype: string + + The value False for parameter 'keep' discards all sets of duplicated entries. + + >>> s.drop_duplicates(keep=False) + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: string + Args: keep ({'first', 'last', ``False``}, default 'first'): Method to handle dropping duplicates: @@ -719,7 +1001,8 @@ def drop_duplicates( ``False`` : Drop all duplicates. Returns: - bigframes.series.Series: Series with duplicates dropped or None if ``inplace=True``. + bigframes.pandas.Series: + Series with duplicates dropped or None if ``inplace=True``. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -731,6 +1014,54 @@ def duplicated(self, keep="first") -> Series: Series. Either all duplicates, all except the first or all except the last occurrence of duplicates can be indicated. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + By default, for each set of duplicated values, the first occurrence is + set on False and all others on True: + + >>> animals = bpd.Series(['llama', 'cow', 'llama', 'beetle', 'llama']) + >>> animals.duplicated() + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: boolean + + which is equivalent to + + >>> animals.duplicated(keep='first') + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: boolean + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> animals.duplicated(keep='last') + 0 True + 1 False + 2 True + 3 False + 4 False + dtype: boolean + + By setting keep on False, all duplicates are True: + + >>> animals.duplicated(keep=False) + 0 True + 1 False + 2 True + 3 False + 4 True + dtype: boolean + Args: keep ({'first', 'last', False}, default 'first'): Method to handle dropping duplicates: @@ -742,7 +1073,8 @@ def duplicated(self, keep="first") -> Series: ``False`` : Mark all duplicates as ``True``. Returns: - bigframes.series.Series: Series indicating whether each value has occurred in the + bigframes.pandas.Series: + Series indicating whether each value has occurred in the preceding values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -754,6 +1086,23 @@ def idxmin(self) -> Hashable: If multiple values equal the minimum, the first row label with that value is returned. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(data=[1, None, 4, 1], + ... index=['A', 'B', 'C', 'D']) + >>> s + A 1.0 + B + C 4.0 + D 1.0 + dtype: Float64 + + >>> s.idxmin() + 'A' + Returns: Index: Label of the minimum value. """ @@ -766,6 +1115,24 @@ def idxmax(self) -> Hashable: If multiple values equal the maximum, the first row label with that value is returned. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(data=[1, None, 4, 3, 4], + ... index=['A', 'B', 'C', 'D', 'E']) + >>> s + A 1.0 + B + C 4.0 + D 3.0 + E 4.0 + dtype: Float64 + + >>> s.idxmax() + 'C' + Returns: Index: Label of the maximum value. """ @@ -800,7 +1167,8 @@ def round(self, decimals: int = 0) -> Series: it specifies the number of positions to the left of the decimal point. Returns: - bigframes.series.Series: Rounded values of the Series. + bigframes.pandas.Series: + Rounded values of the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -814,6 +1182,12 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series: >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([[1, 2, 3], [], [3, 4]]) + >>> s + 0 [1 2 3] + 1 [] + 2 [3 4] + dtype: list[pyarrow] + >>> s.explode() 0 1 0 2 @@ -828,7 +1202,8 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series: If True, the resulting index will be labeled 0, 1, …, n - 1. Returns: - bigframes.series.Series: Exploded lists to rows; index will be duplicated for these rows. + bigframes.pandas.Series: + Exploded lists to rows; index will be duplicated for these rows. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -865,7 +1240,8 @@ def corr(self, other, method="pearson", min_periods=None) -> float: are not yet supported, so a result will be returned for at least two observations. Returns: - float: Will return NaN if there are fewer than two numeric pairs, either series has a + float: + Will return NaN if there are fewer than two numeric pairs, either series has a variance or covariance of zero, or any input value is infinite. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -885,10 +1261,11 @@ def autocorr(self, lag: int = 1) -> float: >>> s = bpd.Series([0.25, 0.5, 0.2, -0.05]) >>> s.autocorr() # doctest: +ELLIPSIS np.float64(0.10355263309024067) + >>> s.autocorr(lag=2) np.float64(-1.0) - If the Pearson correlation is not well defined, then 'NaN' is returned. + If the Pearson correlation is not well defined, then 'NaN' is returned. >>> s = bpd.Series([1, 0, 0, 0]) >>> s.autocorr() @@ -899,7 +1276,8 @@ def autocorr(self, lag: int = 1) -> float: Number of lags to apply before performing autocorrelation. Returns: - float: The Pearson correlation between self and self.shift(lag). + float: + The Pearson correlation between self and self.shift(lag). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -913,6 +1291,16 @@ def cov( The two `Series` objects are not required to be the same length and will be aligned internally before the covariance is calculated. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s1 = bpd.Series([0.90010907, 0.13484424, 0.62036035]) + >>> s2 = bpd.Series([0.12528585, 0.26962463, 0.51111198]) + >>> s1.cov(s2) + np.float64(-0.01685762652715874) + Args: other (Series): Series with which to compute the covariance. @@ -928,8 +1316,48 @@ def diff(self) -> Series: """ First discrete difference of element. - Calculates the difference of a {klass} element compared with another - element in the {klass} (default is element in previous row). + Calculates the difference of a Series element compared with another + element in the Series (default is element in previous row). + + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Difference with previous row + + >>> s = bpd.Series([1, 1, 2, 3, 5, 8]) + >>> s.diff() + 0 + 1 0 + 2 1 + 3 1 + 4 2 + 5 3 + dtype: Int64 + + Difference with 3rd previous row + + >>> s.diff(periods=3) + 0 + 1 + 2 + 3 2 + 4 4 + 5 6 + dtype: Int64 + + Difference with following row + + >>> s.diff(periods=-1) + 0 0 + 1 -1 + 2 -1 + 3 -2 + 4 -3 + 5 + dtype: Int64 Args: periods (int, default 1): @@ -937,7 +1365,8 @@ def diff(self) -> Series: values. Returns: - Series: First differences of the Series. + bigframes.pandas.Series: + First differences of the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -977,7 +1406,8 @@ def dot(self, other) -> Series | np.ndarray: The other object to compute the dot product with its columns. Returns: - scalar, Series or numpy.ndarray: Return the dot product of the Series + scalar, bigframes.pandas.Series or numpy.ndarray: + Return the dot product of the Series and other if other is a Series, the Series of the dot product of Series and each rows of other if other is a DataFrame or a numpy.ndarray between the Series and each columns of the numpy array. @@ -1015,6 +1445,7 @@ def sort_values( **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([np.nan, 1, 3, 10, 5]) @@ -1081,15 +1512,16 @@ def sort_values( ascending (bool or list of bools, default True): If True, sort values in ascending order, otherwise descending. kind (str, default to 'quicksort'): - Choice of sorting algorithm. Accepts 'quicksort’, ‘mergesort’, - ‘heapsort’, ‘stable’. Ignored except when determining whether to + Choice of sorting algorithm. Accepts quicksort', 'mergesort', + 'heapsort', 'stable'. Ignored except when determining whether to sort stably. 'mergesort' or 'stable' will result in stable reorder na_position ({'first' or 'last'}, default 'last'): Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. Returns: - bigframes.series.Series: Series ordered by values or None if ``inplace=True``. + bigframes.pandas.Series or None: + Series ordered by values or None if ``inplace=True``. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1106,6 +1538,40 @@ def sort_index( Returns a new Series sorted by label if `inplace` argument is ``False``, otherwise updates the original series and returns None. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4]) + >>> s.sort_index() + 1 c + 2 b + 3 a + 4 d + dtype: string + + Sort Descending + + >>> s.sort_index(ascending=False) + 4 d + 3 a + 2 b + 1 c + dtype: string + + By default NaNs are put at the end, but use na_position to place them at + the beginning + + >>> s = bpd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan]) + >>> s.sort_index(na_position='first') + d + 1.0 c + 2.0 b + 3.0 a + dtype: string + Args: axis ({0 or 'index'}): Unused. Parameter needed for compatibility with DataFrame. @@ -1117,7 +1583,8 @@ def sort_index( Not implemented for MultiIndex. Returns: - bigframes.series.Series: The original Series sorted by the labels or None if + bigframes.pandas.Series or None: + The original Series sorted by the labels or None if ``inplace=True``. """ @@ -1130,21 +1597,85 @@ def nlargest( """ Return the largest `n` elements. - Args: - n (int, default 5): - Return this many descending sorted values. - keep ({'first', 'last', 'all'}, default 'first'): - When there are duplicate values that cannot all fit in a - Series of `n` elements: - ``first`` : return the first `n` occurrences in order - of appearance. - ``last`` : return the last `n` occurrences in reverse - order of appearance. - ``all`` : keep all occurrences. This can result in a Series of - size larger than `n`. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} + >>> s = bpd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: Int64 + + The n largest elements where `n=5` by default. + + >>> s.nlargest() + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: Int64 + + The n largest elements where `n=3`. Default keep value is `first` so Malta + will be kept. + + >>> s.nlargest(3) + France 65000000 + Italy 59000000 + Malta 434000 + dtype: Int64 + + The n largest elements where `n=3` and keeping the last duplicates. Brunei + will be kept since it is the last with value 434000 based on the index order. + + >>> s.nlargest(3, keep='last') + France 65000000 + Italy 59000000 + Brunei 434000 + dtype: Int64 + + The n largest elements where n`=3` with all duplicates kept. Note that the + returned Series has five elements due to the three duplicates. + + >>> s.nlargest(3, keep='all') + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: Int64 + + Args: + n (int, default 5): + Return this many descending sorted values. + keep ({'first', 'last', 'all'}, default 'first'): + When there are duplicate values that cannot all fit in a + Series of `n` elements: + ``first`` : return the first `n` occurrences in order + of appearance. + ``last`` : return the last `n` occurrences in reverse + order of appearance. + ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. Returns: - bigframes.series.Series: The `n` largest values in the Series, sorted in decreasing order. + bigframes.pandas.Series: + The `n` largest values in the Series, sorted in decreasing order. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1152,6 +1683,59 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: """ Return the smallest `n` elements. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} + >>> s = bpd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: Int64 + + The n smallest elements where `n=5` by default. + + >>> s.nsmallest() + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Iceland 337000 + dtype: Int64 + + The n smallest elements where `n=3`. Default keep value is `first` so + Nauru and Tuvalu will be kept. + + >>> s.nsmallest(3) + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + dtype: Int64 + + The n smallest elements where `n=3` with all duplicates kept. Note that + the returned Series has four elements due to the three duplicates. + + >>> s.nsmallest(3, keep='all') + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + dtype: Int64 + Args: n (int, default 5): Return this many ascending sorted values. @@ -1167,7 +1751,8 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: size larger than `n`. Returns: - bigframes.series.Series: The `n` smallest values in the Series, sorted in increasing order. + bigframes.pandas.Series: + The `n` smallest values in the Series, sorted in increasing order. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1241,6 +1826,28 @@ def apply( >>> names = bpd.Series(["Alice", "Bob"]) >>> hashes = names.apply(get_hash) + You could return an array output from the remote function. + + >>> @bpd.remote_function(reuse=False) + ... def text_analyzer(text: str) -> list[int]: + ... words = text.count(" ") + 1 + ... periods = text.count(".") + ... exclamations = text.count("!") + ... questions = text.count("?") + ... return [words, periods, exclamations, questions] + + >>> texts = bpd.Series([ + ... "The quick brown fox jumps over the lazy dog.", + ... "I love this product! It's amazing.", + ... "Hungry? Wanna eat? Lets go!" + ... ]) + >>> features = texts.apply(text_analyzer) + >>> features + 0 [9 1 0 0] + 1 [6 1 1 0] + 2 [5 0 1 2] + dtype: list[pyarrow] + Simple vectorized functions, lambdas or ufuncs can be applied directly with `by_row=False`. @@ -1285,9 +1892,10 @@ def apply( the func will be passed the whole Series at once. Returns: - bigframes.series.Series: A new Series with values representing the - return value of the ``func`` applied to each element of the original - Series. + bigframes.pandas.Series: + A new Series with values representing the + return value of the ``func`` applied to each element of the + original Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1310,8 +1918,8 @@ def combine( >>> import numpy as np >>> bpd.options.display.progress_bar = None - Consider 2 Datasets ``s1`` and ``s2`` containing - highest clocked speeds of different birds. + Consider 2 Datasets ``s1`` and ``s2`` containing + highest clocked speeds of different birds. >>> s1 = bpd.Series({'falcon': 330.0, 'eagle': 160.0}) >>> s1 @@ -1325,8 +1933,8 @@ def combine( duck 30.0 dtype: Float64 - Now, to combine the two datasets and view the highest speeds - of the birds across the two datasets + Now, to combine the two datasets and view the highest speeds + of the birds across the two datasets >>> s1.combine(s2, np.maximum) falcon 345.0 @@ -1343,7 +1951,8 @@ def combine( Also accepts some numpy binary functions. Returns: - Series: The result of combining the Series with the other object. + bigframes.pandas.Series: + The result of combining the Series with the other object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1485,7 +2094,8 @@ def groupby( If False, NA values will also be treated as the key in groups. Returns: - bigframes.core.groupby.SeriesGroupBy: Returns a groupby object that contains + bigframes.core.groupby.SeriesGroupBy: + Returns a groupby object that contains information about the groups. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1598,11 +2208,13 @@ def drop( For MultiIndex, level for which the labels will be removed. Returns: - bigframes.series.Series: Series with specified index labels removed + bigframes.pandas.Series or None: + Series with specified index labels removed or None if ``inplace=True``. Raises: - KeyError: If none of the labels are found in the index. + KeyError: + If none of the labels are found in the index. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1636,7 +2248,8 @@ def swaplevel(self, i, j): Levels of the indices to be swapped. Can pass level name as string. Returns: - Series: Series with levels swapped in MultiIndex + bigframes.pandas.Series: + Series with levels swapped in MultiIndex """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1654,7 +2267,8 @@ def droplevel(self, level, axis): For `Series` this parameter is unused and defaults to 0. Returns: - Series with requested index / column level(s) removed. + bigframes.pandas.Series: + Series with requested index / column level(s) removed. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1665,33 +2279,25 @@ def interpolate(self, method: str = "linear"): **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({ - ... 'A': [1, 2, 3, None, None, 6], - ... 'B': [None, 6, None, 2, None, 3], - ... }, index=[0, 0.1, 0.3, 0.7, 0.9, 1.0]) - >>> df.interpolate() - A B - 0.0 1.0 - 0.1 2.0 6.0 - 0.3 3.0 4.0 - 0.7 4.0 2.0 - 0.9 5.0 2.5 - 1.0 6.0 3.0 - - [6 rows x 2 columns] - >>> df.interpolate(method="values") - A B - 0.0 1.0 - 0.1 2.0 6.0 - 0.3 3.0 4.666667 - 0.7 4.714286 2.0 - 0.9 5.571429 2.666667 - 1.0 6.0 3.0 - - [6 rows x 2 columns] + Filling in NaN in a Series via linear interpolation. + + >>> s = bpd.Series([0, 1, np.nan, 3]) + >>> s + 0 0.0 + 1 1.0 + 2 + 3 3.0 + dtype: Float64 + >>> s.interpolate() + 0 0.0 + 1 1.0 + 2 2.0 + 3 3.0 + dtype: Float64 Args: method (str, default 'linear'): @@ -1702,7 +2308,7 @@ def interpolate(self, method: str = "linear"): 'pad': Fill in NaNs using existing values. 'nearest', 'zero', 'slinear': Emulates `scipy.interpolate.interp1d` Returns: - Series: + bigframes.pandas.Series: Returns the same object type as the caller, interpolated at some or all ``NaN`` values """ @@ -1718,6 +2324,7 @@ def fillna( **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([np.nan, 2, np.nan, -1]) @@ -1752,7 +2359,8 @@ def fillna( Value to use to fill holes (e.g. 0). Returns: - Series or None: Object with missing values filled or None. + bigframes.pandas.Series or None: + Object with missing values filled or None. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1874,7 +2482,8 @@ def replace( string. Returns: - Series/DataFrame: Object after replacement. + bigframes.pandas.Series or bigframes.pandas.DataFrame: + Object after replacement. Raises: TypeError: @@ -1897,6 +2506,7 @@ def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series: **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None Drop NA values from a Series: @@ -1939,7 +2549,8 @@ def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series: Not in use. Kept for compatibility. Returns: - Series: Series with NA entries dropped from it. + bigframes.pandas.Series: + Series with NA entries dropped from it. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1959,6 +2570,7 @@ def between( **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None Boundary values are included by default: @@ -2001,8 +2613,9 @@ def between( Include boundaries. Whether to set each bound as closed or open. Returns: - Series: Series representing whether each element is between left and - right (inclusive). + bigframes.pandas.Series: + Series representing whether each element is between left and + right (inclusive). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2016,6 +2629,7 @@ def case_when( **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None >>> c = bpd.Series([6, 7, 8, 9], name="c") @@ -2036,11 +2650,10 @@ def case_when( **See also:** - - :func:`bigframes.series.Series.mask` : Replace values where the condition is True. + - :func:`bigframes.pandas.Series.mask` : Replace values where the condition is True. Args: - caselist: - A list of tuples of conditions and expected replacements + caselist (A list of tuples of conditions and expected replacements): Takes the form: ``(condition0, replacement0)``, ``(condition1, replacement1)``, ... . ``condition`` should be a 1-D boolean array-like object @@ -2056,7 +2669,7 @@ def case_when( (though pandas doesn`t check it). Returns: - bigframes.series.Series + bigframes.pandas.Series """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2070,6 +2683,7 @@ def cumprod(self): **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([2, np.nan, 5, -1, 0]) @@ -2092,7 +2706,8 @@ def cumprod(self): dtype: Float64 Returns: - bigframes.series.Series: Return cumulative sum of scalar or Series. + bigframes.pandas.Series: + Return cumulative sum of scalar or Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2103,13 +2718,39 @@ def cumsum(self): Returns a DataFrame or Series of the same size containing the cumulative sum. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: Float64 + + By default, NA values are ignored. + + >>> s.cumsum() + 0 2.0 + 1 + 2 7.0 + 3 6.0 + 4 6.0 + dtype: Float64 + Args: axis ({0 or 'index', 1 or 'columns'}, default 0): The index or the name of the axis. 0 is equivalent to None or 'index'. For `Series` this parameter is unused and defaults to 0. Returns: - scalar or Series: Return cumulative sum of scalar or Series. + bigframes.pandas.Series: + Return cumulative sum of scalar or Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2120,13 +2761,35 @@ def cummax(self): Returns a DataFrame or Series of the same size containing the cumulative maximum. - Args: - axis ({{0 or 'index', 1 or 'columns'}}, default 0): - The index or the name of the axis. 0 is equivalent to None or 'index'. - For `Series` this parameter is unused and defaults to 0. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: Float64 + + By default, NA values are ignored. + + >>> s.cummax() + 0 2.0 + 1 + 2 5.0 + 3 5.0 + 4 5.0 + dtype: Float64 + Returns: - bigframes.series.Series: Return cumulative maximum of scalar or Series. + bigframes.pandas.Series: + Return cumulative maximum of scalar or Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2137,30 +2800,75 @@ def cummin(self): Returns a DataFrame or Series of the same size containing the cumulative minimum. - Args: - axis ({0 or 'index', 1 or 'columns'}, default 0): - The index or the name of the axis. 0 is equivalent to None or 'index'. - For `Series` this parameter is unused and defaults to 0. - skipna (bool, default True): - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. - `*args`, `**kwargs`: - Additional keywords have no effect but might be accepted for - compatibility with NumPy. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: Float64 + + By default, NA values are ignored. + + >>> s.cummin() + 0 2.0 + 1 + 2 2.0 + 3 -1.0 + 4 -1.0 + dtype: Float64 Returns: - bigframes.series.Series: Return cumulative minimum of scalar or Series. + bigframes.pandas.Series: + Return cumulative minimum of scalar or Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def eq(self, other) -> Series: """Return equal of Series and other, element-wise (binary operator eq). - Equivalent to ``other == series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``other == series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.eq(b) + a True + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: Series: The result of the operation. @@ -2171,83 +2879,241 @@ def eq(self, other) -> Series: def ne(self, other) -> Series: """Return not equal of Series and other, element-wise (binary operator ne). - Equivalent to ``other != series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``other != series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.ne(b) + a False + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def le(self, other) -> Series: - """Get 'less than or equal to' of Series and other, element-wise (binary operator `<=`). + """Get 'less than or equal to' of Series and other, element-wise (binary + operator le). + + Equivalent to ``series <= other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``series <= other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.le(b) + a True + b + c + d + e + dtype: boolean Args: other: Series, or scalar value Returns: - bigframes.series.Series: The result of the comparison. + bigframes.pandas.Series: + The result of the comparison. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def lt(self, other) -> Series: - """Get 'less than' of Series and other, element-wise (binary operator `<`). + """Get 'less than' of Series and other, element-wise (binary operator lt). + + Equivalent to ``series < other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 - Equivalent to ``series < other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.lt(b) + a False + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def ge(self, other) -> Series: - """Get 'greater than or equal to' of Series and other, element-wise (binary operator `>=`). + """Get 'greater than or equal to' of Series and other, element-wise + (binary operator ge). + + Equivalent to ``series >= other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 - Equivalent to ``series >= other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.ge(b) + a True + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def gt(self, other) -> Series: - """Get 'less than or equal to' of Series and other, element-wise (binary operator `<=`). + """Return Greater than of series and other, element-wise + (binary operator gt). + + Equivalent to ``series <= other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``series <= other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.gt(b) + a False + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def add(self, other) -> Series: - """Return addition of Series and other, element-wise (binary operator add). + """Return addition of Series and other, element-wise (binary operator + add). - Equivalent to ``series + other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series + other``, but with support to substitute a + fill_value for missing data in either one of the inputs. **Examples:** @@ -2299,10 +3165,11 @@ def add(self, other) -> Series: dtype: Int64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2352,21 +3219,54 @@ def __add__(self, other): Object to be added to the Series. Returns: - Series: The result of adding `other` to Series. + bigframes.pandas.Series: + The result of adding `other` to Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def radd(self, other) -> Series: - """Return addition of Series and other, element-wise (binary operator radd). + """Return addition of Series and other, element-wise (binary operator + radd). + + Equivalent to ``other + series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``other + series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.add(b) + a 2.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series, or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2381,7 +3281,8 @@ def __radd__(self, other): Object to which Series should be added. Returns: - Series: The result of adding Series to `other`. + bigframes.pandas.Series: + The result of adding Series to `other`. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2389,16 +3290,48 @@ def sub( self, other, ) -> Series: - """Return subtraction of Series and other, element-wise (binary operator sub). + """Return subtraction of Series and other, element-wise (binary operator + sub). - Equivalent to ``series - other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series - other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.subtract(b) + a 0.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series, or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2448,21 +3381,54 @@ def __sub__(self, other): Object to subtract from the Series. Returns: - Series: The result of subtraction. + bigframes.pandas.Series: + The result of subtraction. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rsub(self, other) -> Series: - """Return subtraction of Series and other, element-wise (binary operator rsub). + """Return subtraction of Series and other, element-wise (binary operator + rsub). + + Equivalent to ``other - series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``other - series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.subtract(b) + a 0.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series, or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2477,21 +3443,54 @@ def __rsub__(self, other): Object to subtract the Series from. Returns: - Series: The result of subtraction. + bigframes.pandas.Series: + The result of subtraction. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def mul(self, other) -> Series: - """Return multiplication of Series and other, element-wise (binary operator mul). + """Return multiplication of Series and other, element-wise (binary + operator mul). + + Equivalent to ``other * series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 - Equivalent to ``other * series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.multiply(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2530,21 +3529,54 @@ def __mul__(self, other): Object to multiply with the Series. Returns: - Series: The result of the multiplication. + bigframes.pandas.Series: + The result of the multiplication. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rmul(self, other) -> Series: - """Return multiplication of Series and other, element-wise (binary operator mul). + """Return multiplication of Series and other, element-wise (binary + operator mul). + + Equivalent to ``series * others``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``series * others``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.multiply(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2559,21 +3591,53 @@ def __rmul__(self, other): Object to multiply the Series with. Returns: - Series: The result of the multiplication. + bigframes.pandas.Series: The result of the multiplication. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def truediv(self, other) -> Series: - """Return floating division of Series and other, element-wise (binary operator truediv). + """Return floating division of Series and other, element-wise (binary + operator truediv). + + Equivalent to ``series / other``, but with support to substitute a + fill_value for missing data in either one of the inputs. - Equivalent to ``series / other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.divide(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2612,21 +3676,54 @@ def __truediv__(self, other): Object to divide the Series by. Returns: - Series: The result of the division. + bigframes.pandas.Series: + The result of the division. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rtruediv(self, other) -> Series: - """Return floating division of Series and other, element-wise (binary operator rtruediv). + """Return floating division of Series and other, element-wise (binary + operator rtruediv). + + Equivalent to ``other / series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``other / series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.divide(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2642,21 +3739,53 @@ def __rtruediv__(self, other): Object to divide by the Series. Returns: - Series: The result of the division. + bigframes.pandas.Series: The result of the division. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def floordiv(self, other) -> Series: - """Return integer division of Series and other, element-wise (binary operator floordiv). + """Return integer division of Series and other, element-wise + (binary operator floordiv). + + Equivalent to ``series // other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 - Equivalent to ``series // other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.floordiv(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2695,21 +3824,54 @@ def __floordiv__(self, other): Object to divide the Series by. Returns: - Series: The result of the integer divison. + bigframes.pandas.Series: + The result of the integer division. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rfloordiv(self, other) -> Series: - """Return integer division of Series and other, element-wise (binary operator rfloordiv). + """Return integer division of Series and other, element-wise (binary + operator rfloordiv). + + Equivalent to ``other // series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``other // series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.floordiv(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2725,21 +3887,53 @@ def __rfloordiv__(self, other): Object to divide by the Series. Returns: - Series: The result of the integer division. + bigframes.pandas.Series: + The result of the integer division. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def mod(self, other) -> Series: """Return modulo of Series and other, element-wise (binary operator mod). - Equivalent to ``series % other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series % other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.mod(b) + a 0.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2778,21 +3972,53 @@ def __mod__(self, other): Object to modulo the Series by. Returns: - Series: The result of the modulo. + bigframes.pandas.Series: + The result of the modulo. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rmod(self, other) -> Series: """Return modulo of Series and other, element-wise (binary operator mod). - Equivalent to ``series % other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series % other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.mod(b) + a 0.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2808,21 +4034,54 @@ def __rmod__(self, other): Object to modulo by the Series. Returns: - Series: The result of the modulo. + bigframes.pandas.Series: + The result of the modulo. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def pow(self, other) -> Series: - """Return Exponential power of series and other, element-wise (binary operator `pow`). + """Return Exponential power of series and other, element-wise (binary + operator `pow`). + + Equivalent to ``series ** other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``series ** other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.pow(b) + a 1.0 + b 1.0 + c 1.0 + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2861,22 +4120,55 @@ def __pow__(self, other): other (scalar or Series): Object to exponentiate the Series with. - Returns: - Series: The result of the exponentiation. - """ - raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + Returns: + bigframes.pandas.Series: + The result of the exponentiation. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def rpow(self, other) -> Series: + """Return Exponential power of series and other, element-wise (binary + operator `rpow`). + + Equivalent to ``other ** series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None - def rpow(self, other) -> Series: - """Return Exponential power of series and other, element-wise (binary operator `rpow`). + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``other ** series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.pow(b) + a 1.0 + b 1.0 + c 1.0 + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2893,21 +4185,60 @@ def __rpow__(self, other): Object to exponentiate with the Series. Returns: - Series: The result of the exponentiation. + bigframes.pandas.Series: + The result of the exponentiation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def divmod(self, other) -> Series: - """Return integer division and modulo of Series and other, element-wise (binary operator divmod). + """Return integer division and modulo of Series and other, element-wise + (binary operator divmod). Equivalent to divmod(series, other). + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.divmod(b) + (a 1.0 + b + c + d + e + dtype: Float64, + a 0.0 + b + c + d + e + dtype: Float64) + Args: other: Series, or scalar value Returns: - 2-Tuple of Series: The result of the operation. The result is always - consistent with (floordiv, mod) (though pandas may not). + Tuple[bigframes.pandas.Series, bigframes.pandas.Series]: + The result of the operation. The result is always + consistent with (floordiv, mod) (though pandas may not). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2917,12 +4248,49 @@ def rdivmod(self, other) -> Series: Equivalent to other divmod series. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.divmod(b) + (a 1.0 + b + c + d + e + dtype: Float64, + a 0.0 + b + c + d + e + dtype: Float64) + Args: other: Series, or scalar value Returns: - 2-Tuple of Series: The result of the operation. The result is always - consistent with (rfloordiv, rmod) (though pandas may not). + Tuple[bigframes.pandas.Series, bigframes.pandas.Series]: + The result of the operation. The result is always + consistent with (rfloordiv, rmod) (though pandas may not). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2949,8 +4317,8 @@ def combine_first(self, other) -> Series: 2 5.0 dtype: Float64 - Null values still persist if the location of that null value - does not exist in `other` + Null values still persist if the location of that null value + does not exist in `other` >>> s1 = bpd.Series({'falcon': np.nan, 'eagle': 160.0}) >>> s2 = bpd.Series({'eagle': 200.0, 'duck': 30.0}) @@ -2965,7 +4333,8 @@ def combine_first(self, other) -> Series: The value(s) to be used for filling null values. Returns: - Series: The result of combining the provided Series with the other object. + bigframes.pandas.Series: + The result of combining the provided Series with the other object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3018,8 +4387,8 @@ def update(self, other) -> None: 2 6 dtype: Int64 - ``other`` can also be a non-Series object type - that is coercible into a Series + ``other`` can also be a non-Series object type + that is coercible into a Series >>> s = bpd.Series([1, 2, 3]) >>> s.update([4, np.nan, 6]) @@ -3039,6 +4408,9 @@ def update(self, other) -> None: Args: other (Series, or object coercible into Series) + + Returns: + None """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3052,7 +4424,8 @@ def all( DataFrame axis that is False or equivalent (e.g. zero or empty). Returns: - scalar or Series: If level is specified, then, Series is returned; + scalar or bigframes.pandas.Series: + If level is specified, then, Series is returned; otherwise, scalar is returned. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3066,8 +4439,29 @@ def any( Returns False unless there is at least one element within a series or along a Dataframe axis that is True or equivalent (e.g. non-zero or non-empty). + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + For Series input, the output is a scalar indicating whether any element is True. + + >>> bpd.Series([False, False]).any() + np.False_ + + >>> bpd.Series([True, False]).any() + np.True_ + + >>> bpd.Series([], dtype="float64").any() + np.False_ + + >>> bpd.Series([np.nan]).any() + np.False_ + Returns: - scalar or Series: If level is specified, then, Series is returned; + scalar or bigframes.pandas.Series: + If level is specified, then, Series is returned; otherwise, scalar is returned. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3093,6 +4487,7 @@ def max( 0 1 1 3 dtype: Int64 + >>> s.max() np.int64(3) @@ -3104,6 +4499,7 @@ def max( 1 3 2 dtype: Int64 + >>> s.max() np.int64(3) @@ -3133,6 +4529,7 @@ def min( 0 1 1 3 dtype: Int64 + >>> s.min() np.int64(1) @@ -3144,6 +4541,7 @@ def min( 1 3 2 dtype: Int64 + >>> s.min() np.int64(1) @@ -3184,9 +4582,8 @@ def std( height 0.237417 dtype: Float64 - Returns - ------- - scalar or Series (if level specified) + Returns: + scalar or bigframes.pandas.Series (if level specified) """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3199,7 +4596,8 @@ def var( Normalized by N-1 by default. Returns: - scalar or Series (if level specified): Variance. + scalar or bigframes.pandas.Series (if level specified): + Variance. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3220,6 +4618,7 @@ def sum(self): 0 1 1 3 dtype: Int64 + >>> s.sum() np.int64(4) @@ -3231,6 +4630,7 @@ def sum(self): 1 3 2 dtype: Int64 + >>> s.sum() np.int64(4) @@ -3254,6 +4654,7 @@ def mean(self): 0 1 1 3 dtype: Int64 + >>> s.mean() np.float64(2.0) @@ -3265,6 +4666,7 @@ def mean(self): 1 3 2 dtype: Int64 + >>> s.mean() np.float64(2.0) @@ -3276,6 +4678,30 @@ def mean(self): def median(self, *, exact: bool = True): """Return the median of the values over the requested axis. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3]) + >>> s.median() + np.float64(2.0) + + With a DataFrame + + >>> df = bpd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) + >>> df + a b + tiger 1 2 + zebra 2 3 + + [2 rows x 2 columns] + + >>> df.median() + a 1.5 + b 2.5 + dtype: Float64 + Args: exact (bool. default True): Default True. Get the exact median instead of an approximate @@ -3297,9 +4723,11 @@ def quantile( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series([1, 2, 3, 4]) >>> s.quantile(.5) np.float64(2.5) + >>> s.quantile([.25, .5, .75]) 0.25 1.75 0.5 2.5 @@ -3307,11 +4735,11 @@ def quantile( dtype: Float64 Args: - q (float or array-like, default 0.5 (50% quantile)): + q (Union[float, Sequence[float], default 0.5 (50% quantile)): The quantile(s) to compute, which can lie in range: 0 <= q <= 1. Returns: - float or Series: + Union[float, bigframes.pandas.Series]: If ``q`` is an array, a Series will be returned where the index is ``q`` and the values are the quantiles, otherwise a float will be returned. @@ -3331,6 +4759,33 @@ def skew(self): Normalized by N-1. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3]) + >>> s.skew() + np.float64(0.0) + + With a DataFrame + + >>> df = bpd.DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4], 'c': [1, 3, 5]}, + ... index=['tiger', 'zebra', 'cow']) + >>> df + a b c + tiger 1 2 1 + zebra 2 3 3 + cow 3 4 5 + + [3 rows x 3 columns] + + >>> df.skew() + a 0.0 + b 0.0 + c 0.0 + dtype: Float64 + Returns: scalar: Scalar. """ @@ -3339,46 +4794,72 @@ def skew(self): def kurt(self): """Return unbiased kurtosis over requested axis. - Kurtosis obtained using Fisher’s definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1. + Kurtosis obtained using Fisher’s definition of kurtosis (kurtosis of + normal == 0.0). Normalized by N-1. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 2, 3], index=['cat', 'dog', 'dog', 'mouse']) + >>> s + cat 1 + dog 2 + dog 2 + mouse 3 + dtype: Int64 + + >>> s.kurt() + np.float64(1.5) + + With a DataFrame + + >>> df = bpd.DataFrame({'a': [1, 2, 2, 3], 'b': [3, 4, 4, 4]}, + ... index=['cat', 'dog', 'dog', 'mouse']) + >>> df + a b + cat 1 3 + dog 2 4 + dog 2 4 + mouse 3 4 + + [4 rows x 2 columns] + + >>> df.kurt() + a 1.5 + b 4.0 + dtype: Float64 Returns: - scalar or scalar: Unbiased kurtosis over requested axis. + scalar or scalar: + Unbiased kurtosis over requested axis. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def items(self): """ - Iterate over (index, value) pairs of a Series. + Lazily iterate over (index, value) tuples. - Iterates over the Series contents, returning a tuple with - the index and the value of a Series. + This method returns an iterable tuple (index, value). + This is convenient if you want to create a lazy iterator. **Examples:** >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series(['bear', 'bear', 'marsupial'], - ... index=['panda', 'polar', 'koala']) - >>> s - panda bear - polar bear - koala marsupial - dtype: string - + >>> s = bpd.Series(['A', 'B', 'C']) >>> for index, value in s.items(): - ... print(f'--> index: {index}') - ... print(f'--> value: {value}') - ... - --> index: panda - --> value: bear - --> index: polar - --> value: bear - --> index: koala - --> value: marsupial + ... print(f"Index : {index}, Value : {value}") + Index : 0, Value : A + Index : 1, Value : B + Index : 2, Value : C Returns: - Iterator: Iterator of index, value for each content of the Series. + iterable: + Iterable of tuples containing the (index, value) pairs from a + Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3445,7 +4926,8 @@ def where(self, cond, other): extension dtypes). Returns: - bigframes.series.Series: Series after the replacement. + bigframes.pandas.Series: + Series after the replacement. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3567,7 +5049,8 @@ def mask(self, cond, other): extension dtypes). Returns: - bigframes.series.Series: Series after the replacement. + bigframes.pandas.Series: + Series after the replacement. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3580,13 +5063,16 @@ def clip(self, lower, upper): Args: lower (float or array-like, default None): - Minimum threshold value. All values below this threshold will be set to it. A missing threshold (e.g NA) will not clip the value. + Minimum threshold value. All values below this threshold will + be set to it. A missing threshold (e.g NA) will not clip the value. upper (float or array-like, default None): - Maximum threshold value. All values above this threshold will be set to it. A missing threshold (e.g NA) will not clip the value. + Maximum threshold value. All values above this threshold will + be set to it. A missing threshold (e.g NA) will not clip the value. Returns: - Series: Series. + bigframes.pandas.Series: + Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3594,20 +5080,17 @@ def unstack(self, level): """ Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. - Args: - level (int, str, or list of these, default last level): - Level(s) to unstack, can pass level name. - Returns: - DataFrame: Unstacked Series. + bigframes.pandas.DataFrame: Unstacked Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def argmax(self): """ - Return int position of the smallest value in the series. + Return int position of the largest value in the series. - If the minimum is achieved in multiple locations, the first row position is returned. + If the maximum is achieved in multiple locations, the first row position + is returned. **Examples:** @@ -3635,15 +5118,17 @@ def argmax(self): calories is the first element, since series is zero-indexed. Returns: - Series: Row position of the maximum value. + int: + Row position of the maximum value. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def argmin(self): """ - Return int position of the largest value in the Series. + Return int position of the smallest value in the Series. - If the maximum is achieved in multiple locations, the first row position is returned. + If the minimum is achieved in multiple locations, the first row position + is returned. **Examples:** @@ -3671,7 +5156,8 @@ def argmin(self): calories is the first element, since series is zero-indexed. Returns: - Series: Row position of the minimum value. + int: + Row position of the minimum value. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3721,7 +5207,8 @@ def rename(self, index, **kwargs) -> Series | None: attribute. Returns: - bigframes.series.Series: Series with index labels. + bigframes.pandas.Series: + Series with index labels. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3734,8 +5221,53 @@ def rename_axis(self, mapper, **kwargs): mapper (scalar, list-like, optional): Value to set the axis name attribute. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Series + + >>> s = bpd.Series(["dog", "cat", "monkey"]) + >>> s + 0 dog + 1 cat + 2 monkey + dtype: string + + >>> s.rename_axis("animal") + animal + 0 dog + 1 cat + 2 monkey + dtype: string + + DataFrame + + >>> df = bpd.DataFrame({"num_legs": [4, 4, 2], + ... "num_arms": [0, 0, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs num_arms + dog 4 0 + cat 4 0 + monkey 2 2 + + [3 rows x 2 columns] + + >>> df = df.rename_axis("animal") + >>> df + num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + + [3 rows x 2 columns] + Returns: - bigframes.series.Series: Series with the name of the axis set. + bigframes.pandas.Series or bigframes.pandas.DataFrame: + The same type as the caller. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3819,7 +5351,8 @@ def value_counts( Don't include counts of NaN. Returns: - Series: Series containing counts of unique values. + bigframes.pandas.Series: + Series containing counts of unique values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3860,6 +5393,16 @@ def plot(self): """ Make plots of Series. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> ser = bpd.Series([1, 2, 3, 3]) + >>> plot = ser.plot(kind='hist', title="My plot") + >>> plot + + Returns: bigframes.operations.plotting.PlotAccessor: An accessor making plots. @@ -3894,9 +5437,23 @@ def isin(self, values): 5 hippo Name: animal, dtype: string - >>> s.isin(['cow', 'llama']) + To invert the boolean values, use the ~ operator: + + >>> ~s.isin(['cow', 'llama']) + 0 False + 1 False + 2 False + 3 True + 4 False + 5 True + Name: animal, dtype: boolean + + Passing a single string as s.isin('llama') will raise an error. Use a + list of one element instead: + + >>> s.isin(['llama']) 0 True - 1 True + 1 False 2 True 3 False 4 True @@ -3918,7 +5475,7 @@ def isin(self, values): TypeError. Instead, turn a single string into a list of one element. Returns: - bigframes.series.Series: Series of booleans indicating if each element is in values. + bigframes.pandas.Series: Series of booleans indicating if each element is in values. Raises: TypeError: If input is not list-like. @@ -3944,7 +5501,8 @@ def is_monotonic_increasing(self) -> bool: np.False_ Returns: - bool: Boolean. + bool: + Boolean. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3967,7 +5525,8 @@ def is_monotonic_decreasing(self) -> bool: np.False_ Returns: - bool: Boolean. + bool: + Boolean. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4051,7 +5610,8 @@ def map( index entry. Returns: - Series: Same index as caller. + bigframes.pandas.Series: + Same index as caller. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4059,8 +5619,79 @@ def map( def iloc(self): """Purely integer-location based indexing for selection by position. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, + ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, + ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}] + >>> df = bpd.DataFrame(mydict) + >>> df + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + [3 rows x 4 columns] + + Indexing just the rows + + With a scalar integer. + + >>> type(df.iloc[0]) + + + >>> df.iloc[0] + a 1 + b 2 + c 3 + d 4 + Name: 0, dtype: Int64 + + With a list of integers. + + >>> df.iloc[0] + a 1 + b 2 + c 3 + d 4 + Name: 0, dtype: Int64 + + >>> type(df.iloc[[0]]) + + + >>> df.iloc[[0, 1]] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + + [2 rows x 4 columns] + + With a slice object. + + >>> df.iloc[:3] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + [3 rows x 4 columns] + + Indexing both axes + + You can mix the indexer types for the index and columns. Use : to select + the entire axis. + + With scalar integers. + + >>> df.iloc[0, 1] + np.int64(2) + Returns: - bigframes.core.indexers.IlocSeriesIndexer: Purely integer-location Indexers. + bigframes.core.indexers.IlocSeriesIndexer: + Purely integer-location Indexers. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4068,8 +5699,86 @@ def iloc(self): def loc(self): """Access a group of rows and columns by label(s) or a boolean array. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame([[1, 2], [4, 5], [7, 8]], + ... index=['cobra', 'viper', 'sidewinder'], + ... columns=['max_speed', 'shield']) + >>> df + max_speed shield + cobra 1 2 + viper 4 5 + sidewinder 7 8 + + [3 rows x 2 columns] + + Single label. Note this returns the row as a Series. + + >>> df.loc['viper'] + max_speed 4 + shield 5 + Name: viper, dtype: Int64 + + List of labels. Note using [[]] returns a DataFrame. + + >>> df.loc[['viper', 'sidewinder']] + max_speed shield + viper 4 5 + sidewinder 7 8 + + [2 rows x 2 columns] + + Slice with labels for row and single label for column. As mentioned + above, note that both the start and stop of the slice are included. + + >>> df.loc['cobra', 'shield'] + np.int64(2) + + Index (same behavior as df.reindex) + + >>> df.loc[bpd.Index(["cobra", "viper"], name="foo")] + max_speed shield + cobra 1 2 + viper 4 5 + + [2 rows x 2 columns] + + Conditional that returns a boolean Series with column labels specified + + >>> df.loc[df['shield'] > 6, ['max_speed']] + max_speed + sidewinder 7 + + [1 rows x 1 columns] + + Multiple conditional using | that returns a boolean Series + + >>> df.loc[(df['max_speed'] > 4) | (df['shield'] < 5)] + max_speed shield + cobra 1 2 + sidewinder 7 8 + + [2 rows x 2 columns] + + Please ensure that each condition is wrapped in parentheses (). + + Set value for an entire column + + >>> df.loc[:, 'max_speed'] = 30 + >>> df + max_speed shield + cobra 30 2 + viper 30 5 + sidewinder 30 8 + + [3 rows x 2 columns] + Returns: - bigframes.core.indexers.LocSeriesIndexer: Indexers object. + bigframes.core.indexers.LocSeriesIndexer: + Indexers object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4080,21 +5789,31 @@ def iat(self): **Examples:** >>> import bigframes.pandas as bpd - >>> s = bpd.Series(bpd.Series([1, 2, 3])) >>> bpd.options.display.progress_bar = None - >>> s - 0 1 - 1 2 - 2 3 - dtype: Int64 - Get value at specified row number + >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... columns=['A', 'B', 'C']) + >>> df + A B C + 0 0 2 3 + 1 0 4 1 + 2 10 20 30 + + [3 rows x 3 columns] + + Get value at specified row/column pair + + >>> df.iat[1, 2] + np.int64(1) + + Get value within a series - >>> s.iat[1] + >>> df.loc[0].iat[1] np.int64(2) Returns: - bigframes.core.indexers.IatSeriesIndexer: Indexers object. + bigframes.core.indexers.IatSeriesIndexer: + Indexers object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4105,22 +5824,31 @@ def at(self): **Examples:** >>> import bigframes.pandas as bpd - >>> s = bpd.Series([1, 2, 3], index=['A', 'B', 'C']) >>> bpd.options.display.progress_bar = None - >>> s - A 1 - B 2 - C 3 - dtype: Int64 - Get value at specified row label + >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... index=[4, 5, 6], columns=['A', 'B', 'C']) + >>> df + A B C + 4 0 2 3 + 5 0 4 1 + 6 10 20 30 + + [3 rows x 3 columns] + + Get value at specified row/column pair - >>> s.at['B'] + >>> df.at[4, 'B'] np.int64(2) + Get value at specified row label + + >>> df.loc[5].at['B'] + np.int64(4) Returns: - bigframes.core.indexers.AtSeriesIndexer: Indexers object. + bigframes.core.indexers.AtSeriesIndexer: + Indexers object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4141,7 +5869,8 @@ def values(self): array(['a', 'a', 'b', 'c'], dtype=object) Returns: - numpy.ndarray or ndarray-like: Values in the Series. + numpy.ndarray or ndarray-like: + Values in the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4157,7 +5886,12 @@ def size(self) -> int: For Series: - >>> s = bpd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) + >>> s + 0 Ant + 1 Bear + 2 Cow + dtype: string >>> s.size 3 @@ -4168,7 +5902,8 @@ def size(self) -> int: 3 Returns: - int: Return the number of elements in the underlying data. + int: + Return the number of elements in the underlying data. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4236,7 +5971,8 @@ def __invert__(self): dtype: boolean Returns: - Series: The inverted values in the series. + bigframes.pandas.Series: + The inverted values in the series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4274,7 +6010,8 @@ def __and__(self, other): Object to bitwise AND with the Series. Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4312,7 +6049,8 @@ def __or__(self, other): Object to bitwise OR with the Series. Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4350,7 +6088,8 @@ def __xor__(self, other): Object to bitwise XOR with the Series. Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4365,6 +6104,7 @@ def __getitem__(self, indexer): >>> s = bpd.Series([15, 30, 45]) >>> s[1] np.int64(30) + >>> s[0:2] 0 15 1 30 @@ -4375,6 +6115,7 @@ def __getitem__(self, indexer): Index or slice of indices. Returns: - Series or Value: Value(s) at the requested index(es). + bigframes.pandas.Series or Value: + Value(s) at the requested index(es). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py index 0858c02c1e..50dde36b01 100644 --- a/third_party/bigframes_vendored/version.py +++ b/third_party/bigframes_vendored/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "1.32.0" +__version__ = "1.33.0"