GoogleCloudPlatform · tswast · Sep 24, 2019 · Sep 24, 2019 · shollyman · Sep 24, 2019
diff --git a/bigquery/pandas-gbq-migration/requirements.txt b/bigquery/pandas-gbq-migration/requirements.txt
@@ -1,2 +1,5 @@
-google-cloud-bigquery[pandas,pyarrow]==1.9.0
-pandas-gbq==0.9.0
+google-cloud-bigquery==1.20.0
+google-cloud-bigquery-storage==0.7.0
+pandas==0.25.1
+pandas-gbq==0.11.0
+pyarrow==0.14.1
diff --git a/bigquery/pandas-gbq-migration/samples_test.py b/bigquery/pandas-gbq-migration/samples_test.py
@@ -81,6 +81,41 @@ def test_pandas_gbq_query():
    assert len(df) > 0


+def test_client_library_query_bqstorage():
+    # [START bigquery_migration_client_library_query_bqstorage]
+    import google.auth
+    from google.cloud import bigquery
+    from google.cloud import bigquery_storage_v1beta1
+
+    # Create a BigQuery client and a BigQuery Storage API client with the same
+    # credentials to avoid authenticating twice.
+    credentials, project_id = google.auth.default(
+        scopes=["https://www.googleapis.com/auth/cloud-platform"]
+    )
+    client = bigquery.Client(credentials=credentials, project=project_id)
+    bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient(
+        credentials=credentials
+    )
+    sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`"
+
+    # Use a BigQuery Storage API client to download results more quickly.
+    df = client.query(sql).to_dataframe(bqstorage_client=bqstorage_client)
+    # [END bigquery_migration_client_library_query_bqstorage]
+    assert len(df) > 0
+
+
+def test_pandas_gbq_query_bqstorage():
+    # [START bigquery_migration_pandas_gbq_query_bqstorage]
+    import pandas
+
+    sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`"
+
+    # Use the BigQuery Storage API to download results more quickly.
+    df = pandas.read_gbq(sql, dialect='standard', use_bqstorage_api=True)
+    # [END bigquery_migration_pandas_gbq_query_bqstorage]
+    assert len(df) > 0
+
+
 def test_client_library_legacy_query():
    # [START bigquery_migration_client_library_query_legacy]
    from google.cloud import bigquery
@@ -184,16 +219,28 @@ def test_client_library_upload_from_dataframe(temp_dataset):
        }
    )
    client = bigquery.Client()
-    dataset_ref = client.dataset('my_dataset')
+    table_id = 'my_dataset.new_table'
    # [END bigquery_migration_client_library_upload_from_dataframe]
-    dataset_ref = client.dataset(temp_dataset.dataset_id)
+    table_id = (
+        temp_dataset.dataset_id
+        + ".test_client_library_upload_from_dataframe"
+    )
    # [START bigquery_migration_client_library_upload_from_dataframe]
-    table_ref = dataset_ref.table('new_table')
+    # Since string columns use the "object" dtype, pass in a (partial) schema
+    # to ensure the correct BigQuery data type.
+    job_config = bigquery.LoadJobConfig(schema=[
+        bigquery.SchemaField("my_string", "STRING"),
+    ])
+
+    job = client.load_table_from_dataframe(
+        df, table_id, job_config=job_config
+    )

-    client.load_table_from_dataframe(df, table_ref).result()
+    # Wait for the load job to complete.
+    job.result()
    # [END bigquery_migration_client_library_upload_from_dataframe]
    client = bigquery.Client()
-    table = client.get_table(table_ref)
+    table = client.get_table(table_id)
    assert table.num_rows == 3


@@ -209,16 +256,16 @@ def test_pandas_gbq_upload_from_dataframe(temp_dataset):
            'my_float64': [4.0, 5.0, 6.0],
        }
    )
-    full_table_id = 'my_dataset.new_table'
-    project_id = 'my-project-id'
+    table_id = 'my_dataset.new_table'
    # [END bigquery_migration_pandas_gbq_upload_from_dataframe]
-    table_id = 'new_table'
-    full_table_id = '{}.{}'.format(temp_dataset.dataset_id, table_id)
-    project_id = os.environ['GCLOUD_PROJECT']
+    table_id = (
+        temp_dataset.dataset_id
+        + ".test_pandas_gbq_upload_from_dataframe"
+    )
    # [START bigquery_migration_pandas_gbq_upload_from_dataframe]

-    df.to_gbq(full_table_id, project_id=project_id)
+    df.to_gbq(table_id)
    # [END bigquery_migration_pandas_gbq_upload_from_dataframe]
    client = bigquery.Client()
-    table = client.get_table(temp_dataset.table(table_id))
+    table = client.get_table(table_id)
    assert table.num_rows == 3
diff --git a/bigquery_storage/to_dataframe/jupyter_test.py b/bigquery_storage/to_dataframe/jupyter_test.py
@@ -75,9 +75,6 @@ def test_jupyter_small_query(ipython):
    assert "stackoverflow" in ip.user_ns  # verify that variable exists


-@pytest.mark.skipif(
-    "TRAVIS" in os.environ, reason="Not running long-running queries on Travis"
-)
 def test_jupyter_tutorial(ipython):
    ip = IPython.get_ipython()
    ip.extension_manager.load_extension("google.cloud.bigquery")
@@ -86,33 +83,18 @@ def test_jupyter_tutorial(ipython):
    # speed-up of using the BigQuery Storage API to download the results.
    sample = """
    # [START bigquerystorage_jupyter_tutorial_query]
-    %%bigquery nodejs_deps --use_bqstorage_api
-    SELECT
-        dependency_name,
-        dependency_platform,
-        project_name,
-        project_id,
-        version_number,
-        version_id,
-        dependency_kind,
-        optional_dependency,
-        dependency_requirements,
-        dependency_project_id
-    FROM
-        `bigquery-public-data.libraries_io.dependencies`
-    WHERE
-        LOWER(dependency_platform) = 'npm'
-    LIMIT 2500000
+    %%bigquery tax_forms --use_bqstorage_api
+    SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`
    # [END bigquerystorage_jupyter_tutorial_query]
    """
    result = ip.run_cell(_strip_region_tags(sample))
    result.raise_error()  # Throws an exception if the cell failed.

-    assert "nodejs_deps" in ip.user_ns  # verify that variable exists
-    nodejs_deps = ip.user_ns["nodejs_deps"]
+    assert "tax_forms" in ip.user_ns  # verify that variable exists
+    tax_forms = ip.user_ns["tax_forms"]

    # [START bigquerystorage_jupyter_tutorial_results]
-    nodejs_deps.head()
+    tax_forms.head()
    # [END bigquerystorage_jupyter_tutorial_results]

    # [START bigquerystorage_jupyter_tutorial_context]
@@ -123,26 +105,11 @@ def test_jupyter_tutorial(ipython):

    sample = """
    # [START bigquerystorage_jupyter_tutorial_query_default]
-    %%bigquery java_deps
-    SELECT
-        dependency_name,
-        dependency_platform,
-        project_name,
-        project_id,
-        version_number,
-        version_id,
-        dependency_kind,
-        optional_dependency,
-        dependency_requirements,
-        dependency_project_id
-    FROM
-        `bigquery-public-data.libraries_io.dependencies`
-    WHERE
-        LOWER(dependency_platform) = 'maven'
-    LIMIT 2500000
+    %%bigquery tax_forms
+    SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`
    # [END bigquerystorage_jupyter_tutorial_query_default]
    """
    result = ip.run_cell(_strip_region_tags(sample))
    result.raise_error()  # Throws an exception if the cell failed.

-    assert "java_deps" in ip.user_ns  # verify that variable exists
+    assert "tax_forms" in ip.user_ns  # verify that variable exists
diff --git a/bigquery_storage/to_dataframe/requirements.txt b/bigquery_storage/to_dataframe/requirements.txt
@@ -1,6 +1,6 @@
 google-auth==1.6.2
-google-cloud-bigquery-storage==0.6.0
-google-cloud-bigquery==1.17.0
-pyarrow==0.13.0
+google-cloud-bigquery-storage==0.7.0
+google-cloud-bigquery==1.20.0
+pyarrow==0.14.1
 ipython==7.2.0
-pandas==0.24.2
+pandas==0.25.1
diff --git a/noxfile.py b/noxfile.py
@@ -167,7 +167,8 @@ def _setup_appengine_sdk(session):
 PY3_ONLY_SAMPLES = [
    sample for sample in ALL_TESTED_SAMPLES
    if (sample.startswith('./appengine/standard_python37')
-        or sample.startswith('./functions/'))]
+        or sample.startswith('./functions/')
+        or sample.startswith('./bigquery/pandas-gbq-migration'))]
 NON_GAE_STANDARD_SAMPLES_PY2 = sorted(list((
    set(ALL_TESTED_SAMPLES) -
    set(GAE_STANDARD_SAMPLES)) -
-Original file line number
+Diff line change
@@ -1,2 +1,5 @@
-    google-cloud-bigquery[pandas,pyarrow]==1.9.0
-    pandas-gbq==0.9.0
+    google-cloud-bigquery==1.20.0
+    google-cloud-bigquery-storage==0.7.0
+    pandas==0.25.1
+    pandas-gbq==0.11.0
+    pyarrow==0.14.1
             Copy link

  
      
    
  

  
      

  
    Contributor


      

  

  
    
      

      
            shollyman
  

      

      

      


        Sep 24, 2019


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      The pyarrow req still feels odd to me.  Should we add a comment if there's an alternate option, or start expressing a harder dependency via one of the other libraries?
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    

    



    
        
  
    
        
    
  


      
          
  
      
            Copy link

  
      
    
  

  
      

  
    Contributor


      

  Author


  

  
    
      

      
            tswast
  

      

      

      


        Sep 24, 2019


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      I think at some point we'll make pyarrow a hard dependency in google-cloud-bigquery-storage (in fact, it already is in the conda package), but probably not until after filtering support is launched for the arrow wire format.
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions