googleapis · rey-esp · Nov 19, 2024 · Nov 11, 2024 · Nov 11, 2024 · Nov 12, 2024
@@ -14,7 +14,7 @@


 def test_boosted_tree_model(random_model_id: str) -> None:
-    # your_model_id = random_model_id
+    your_model_id = random_model_id
    # [START bigquery_dataframes_bqml_boosted_tree_prepare]
    import bigframes.pandas as bpd

@@ -39,4 +39,28 @@ def test_boosted_tree_model(random_model_id: str) -> None:
    )
    del input_data["functional_weight"]
    # [END bigquery_dataframes_bqml_boosted_tree_prepare]
+    # [START bigquery_dataframes_bqml_boosted_tree_create]
+    from bigframes.ml import ensemble
+
+    # input_data is defined in an earlier step.
+    training_data = input_data[input_data["dataframe"] == "training"]
+    X = training_data.drop(columns=["income_bracket", "dataframe"])
+    y = training_data["income_bracket"]
+
+    # create and train the model
+    census_model = ensemble.XGBClassifier(
+        n_estimators=1,
+        booster="gbtree",
+        tree_method="hist",
+        max_iterations=1,  # For a more accurate model, try 50 iterations.
+        subsample=0.85,
+    )
+    census_model.fit(X, y)
+
+    census_model.to_gbq(
+        your_model_id,  # For example: "your-project.census.census_model"
+        replace=True,
+    )
+    # [END bigquery_dataframes_bqml_boosted_tree_create]
    assert input_data is not None
+    assert census_model is not None
-Original file line number
+Diff line change
@@ -14,7 +14,7 @@
     def test_boosted_tree_model(random_model_id: str) -> None:
-        # your_model_id = random_model_id
+        your_model_id = random_model_id
         # [START bigquery_dataframes_bqml_boosted_tree_prepare]
         import bigframes.pandas as bpd
@@ -39,4 +39,28 @@ def test_boosted_tree_model(random_model_id: str) -> None:
         )
         del input_data["functional_weight"]
         # [END bigquery_dataframes_bqml_boosted_tree_prepare]
+        # [START bigquery_dataframes_bqml_boosted_tree_create]
+        from bigframes.ml import ensemble
+        # input_data is defined in an earlier step.
+        training_data = input_data[input_data["dataframe"] == "training"]
               Copy link

  
        
      
    

    
        

  
    Collaborator


        

    
  

  
    
      
          
      

      
            tswast
  

      

      

      


        Nov 15, 2024


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      No action needed, but something to consider for future: it would be nice to update the prepare section above to work without referencing an index (e.g. when ordering mode = "partial").
We have a few options, but the easiest will be to start with a string column and add (True, "training") as the last in the list of cases.
Aside: we have an issue open (349926559) to allow selecting any column in the dataframe (such as functional_weight, which would be a natural choice in this example) even if its a different type, so long as a True (default) case is provided.
    
  
  



    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
+        X = training_data.drop(columns=["income_bracket", "dataframe"])
+        y = training_data["income_bracket"]
               Copy link

  
        
      
    

    
        

  
    Collaborator


        

    
  

  
    
      
          
      

      
            tswast
  

      

      

      


        Nov 15, 2024


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      Presumably you ran this code sample and it worked OK? I remember we had some bugs where y had to be a DataFrame not a Series in past, so just double-checking.
    
  
  



    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    

    



    
        
  
    
      
          
    
  


        
            
  
      
              Copy link

  
        
      
    

    
        

  
    Contributor


        

  Author


    
  

  
    
      
          
      

      
            rey-esp
  

      

      

      


        Nov 15, 2024


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      The code sample seems to run! Not sure if I did it right so here's the colab: https://colab.sandbox.google.com/drive/10jA6zSRiptXWrTkCcmyCT_sYBjLqGJx0?resourcekey=0-0TrIkmDzAJw_F6ONFikwaA#scrollTo=wU367u1SAj3Y
    
  
  



    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
+        # create and train the model
+        census_model = ensemble.XGBClassifier(
+            n_estimators=1,
+            booster="gbtree",
+            tree_method="hist",
+            max_iterations=1,  # For a more accurate model, try 50 iterations.
+            subsample=0.85,
+        )
+        census_model.fit(X, y)
+        census_model.to_gbq(
+            your_model_id,  # For example: "your-project.census.census_model"
+            replace=True,
+        )
+        # [END bigquery_dataframes_bqml_boosted_tree_create]
         assert input_data is not None
+        assert census_model is not None