From 7c0e30731564089f0206f6950166edbe1d6f5371 Mon Sep 17 00:00:00 2001 From: Alec Glassford Date: Wed, 3 Apr 2019 17:18:36 -0700 Subject: [PATCH 1/2] Add custom prediction routine samples Change-Id: I734bebd77970a3ab627b0cbffdcb8fef320c2de4 --- .../custom-prediction-routines/README.md | 28 +++++++ .../predictor-interface.py | 50 +++++++++++++ .../custom-prediction-routines/preprocess.py | 43 +++++++++++ .../scikit-predictor.py | 73 +++++++++++++++++++ ml_engine/custom-prediction-routines/setup.py | 20 +++++ .../tensorflow-predictor.py | 73 +++++++++++++++++++ 6 files changed, 287 insertions(+) create mode 100644 ml_engine/custom-prediction-routines/README.md create mode 100644 ml_engine/custom-prediction-routines/predictor-interface.py create mode 100644 ml_engine/custom-prediction-routines/preprocess.py create mode 100644 ml_engine/custom-prediction-routines/scikit-predictor.py create mode 100644 ml_engine/custom-prediction-routines/setup.py create mode 100644 ml_engine/custom-prediction-routines/tensorflow-predictor.py diff --git a/ml_engine/custom-prediction-routines/README.md b/ml_engine/custom-prediction-routines/README.md new file mode 100644 index 00000000000..86e66e8e2cb --- /dev/null +++ b/ml_engine/custom-prediction-routines/README.md @@ -0,0 +1,28 @@ +# Custom prediction routines (beta) + +Read the AI Platform documentation about custom prediction routines to learn how +to use these samples: + +* [Custom prediction routines (with a TensorFlow Keras + example)](https://cloud.google.com/ml-engine/docs/tensorflow/custom-prediction-routines) +* [Custom prediction routines (with a scikit-learn + example)](https://cloud.google.com/ml-engine/docs/scikit/custom-prediction-routines) + +If you want to package a predictor directly from this directory, make sure to +edit `setup.py`: replace the reference to `predictor.py` with either +`tensorflow-predictor.py` or `scikit-predictor.py`. + +## What's next + +For a more complete example of how to train and deploy a custom prediction +routine, check out one of the following tutorials: + +* [Creating a custom prediction routine with + Keras](https://cloud.google.com/ml-engine/docs/tensorflow/custom-prediction-routine-keras) + (also available as [a Jupyter + notebook](https://colab.research.google.com/github/GoogleCloudPlatform/cloudml-samples/blob/master/notebooks/tensorflow/custom-prediction-routine-keras.ipynb)) + +* [Creating a custom prediction routine with + scikit-learn](https://cloud.google.com/ml-engine/docs/scikit/custom-prediction-routine-scikit-learn) + (also available as [a Jupyter + notebook](https://colab.research.google.com/github/GoogleCloudPlatform/cloudml-samples/blob/master/notebooks/scikit-learn/custom-prediction-routine-scikit-learn.ipynb)) \ No newline at end of file diff --git a/ml_engine/custom-prediction-routines/predictor-interface.py b/ml_engine/custom-prediction-routines/predictor-interface.py new file mode 100644 index 00000000000..e7efcdbadb7 --- /dev/null +++ b/ml_engine/custom-prediction-routines/predictor-interface.py @@ -0,0 +1,50 @@ +# Copyright 2019 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Predictor(object): + """Interface for constructing custom predictors.""" + + def predict(self, instances, **kwargs): + """Performs custom prediction. + + Instances are the decoded values from the request. They have already + been deserialized from JSON. + + Args: + instances: A list of prediction input instances. + **kwargs: A dictionary of keyword args provided as additional fields + on the predict request body. + + Returns: + A list of outputs containing the prediction results. This list must + be JSON serializable. + """ + raise NotImplementedError() + + @classmethod + def from_path(cls, model_dir): + """Creates an instance of Predictor using the given path. + + Loading of the predictor should be done in this method. + + Args: + model_dir: The local directory that contains the exported model file + along with any additional files uploaded when creating the + version resource. + + Returns: + An instance implementing this Predictor class. + """ + raise NotImplementedError() diff --git a/ml_engine/custom-prediction-routines/preprocess.py b/ml_engine/custom-prediction-routines/preprocess.py new file mode 100644 index 00000000000..8c7d8329fef --- /dev/null +++ b/ml_engine/custom-prediction-routines/preprocess.py @@ -0,0 +1,43 @@ +# Copyright 2019 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + + +class ZeroCenterer(object): + """Stores means of each column of a matrix and uses them for preprocessing. + """ + + def __init__(self): + """On initialization, is not tied to any distribution.""" + self._means = None + + def preprocess(self, data): + """Transforms a matrix. + + The first time this is called, it stores the means of each column of the + input. Then it transforms the input so each column has mean 0. For + subsequent calls, it subtracts the stored means from each column. This + lets you 'center' data at prediction time based on the distribution of + the original training data. + + Args: + data: A NumPy matrix of numerical data. + + Returns: + A transformed matrix with the same dimensions as the input. + """ + if self._means is None: # during training only + self._means = np.mean(data, axis=0) + return data - self._means diff --git a/ml_engine/custom-prediction-routines/scikit-predictor.py b/ml_engine/custom-prediction-routines/scikit-predictor.py new file mode 100644 index 00000000000..3648070a750 --- /dev/null +++ b/ml_engine/custom-prediction-routines/scikit-predictor.py @@ -0,0 +1,73 @@ +# Copyright 2019 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pickle + +import numpy as np +from sklearn.externals import joblib + + +class MyPredictor(object): + """An example Predictor for an AI Platform custom prediction routine.""" + + def __init__(self, model, preprocessor): + """Stores artifacts for prediction. Only initialized via `from_path`. + """ + self._model = model + self._preprocessor = preprocessor + + def predict(self, instances, **kwargs): + """Performs custom prediction. + + Preprocesses inputs, then performs prediction using the trained + scikit-learn model. + + Args: + instances: A list of prediction input instances. + **kwargs: A dictionary of keyword args provided as additional fields + on the predict request body. + + Returns: + A list of outputs containing the prediction results. + """ + inputs = np.asarray(instances) + preprocessed_inputs = self._preprocessor.preprocess(inputs) + outputs = self._model.predict(preprocessed_inputs) + return outputs.tolist() + + @classmethod + def from_path(cls, model_dir): + """Creates an instance of MyPredictor using the given path. + + This loads artifacts that have been copied from your model directory in + Cloud Storage. MyPredictor uses them during prediction. + + Args: + model_dir: The local directory that contains the trained + scikit-learn model and the pickled preprocessor instance. These + are copied from the Cloud Storage model directory you provide + when you deploy a version resource. + + Returns: + An instance of `MyPredictor`. + """ + model_path = os.path.join(model_dir, 'model.joblib') + model = joblib.load(model_path) + + preprocessor_path = os.path.join(model_dir, 'preprocessor.pkl') + with open(preprocessor_path, 'rb') as f: + preprocessor = pickle.load(f) + + return cls(model, preprocessor) diff --git a/ml_engine/custom-prediction-routines/setup.py b/ml_engine/custom-prediction-routines/setup.py new file mode 100644 index 00000000000..f313c0f1404 --- /dev/null +++ b/ml_engine/custom-prediction-routines/setup.py @@ -0,0 +1,20 @@ +# Copyright 2019 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import setup + +setup( + name='my_custom_code', + version='0.1', + scripts=['predictor.py', 'preprocess.py']) diff --git a/ml_engine/custom-prediction-routines/tensorflow-predictor.py b/ml_engine/custom-prediction-routines/tensorflow-predictor.py new file mode 100644 index 00000000000..6e9ae5d803f --- /dev/null +++ b/ml_engine/custom-prediction-routines/tensorflow-predictor.py @@ -0,0 +1,73 @@ +# Copyright 2019 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pickle + +import numpy as np +from tensorflow import keras + + +class MyPredictor(object): + """An example Predictor for an AI Platform custom prediction routine.""" + + def __init__(self, model, preprocessor): + """Stores artifacts for prediction. Only initialized via `from_path`. + """ + self._model = model + self._preprocessor = preprocessor + + def predict(self, instances, **kwargs): + """Performs custom prediction. + + Preprocesses inputs, then performs prediction using the trained Keras + model. + + Args: + instances: A list of prediction input instances. + **kwargs: A dictionary of keyword args provided as additional fields + on the predict request body. + + Returns: + A list of outputs containing the prediction results. + """ + inputs = np.asarray(instances) + preprocessed_inputs = self._preprocessor.preprocess(inputs) + outputs = self._model.predict(preprocessed_inputs) + return outputs.tolist() + + @classmethod + def from_path(cls, model_dir): + """Creates an instance of MyPredictor using the given path. + + This loads artifacts that have been copied from your model directory in + Cloud Storage. MyPredictor uses them during prediction. + + Args: + model_dir: The local directory that contains the trained Keras model + and the pickled preprocessor instance. These are copied from the + Cloud Storage model directory you provide when you deploy a + version resource. + + Returns: + An instance of `MyPredictor`. + """ + model_path = os.path.join(model_dir, 'model.h5') + model = keras.models.load_model(model_path) + + preprocessor_path = os.path.join(model_dir, 'preprocessor.pkl') + with open(preprocessor_path, 'rb') as f: + preprocessor = pickle.load(f) + + return cls(model, preprocessor) From c6fbc2bd5d18c128c7f8e6f48d1f4663fc443052 Mon Sep 17 00:00:00 2001 From: Alec Glassford Date: Thu, 25 Apr 2019 14:51:45 -0700 Subject: [PATCH 2/2] Ensure line limit of 79 characters Change-Id: Ic3b512b7478a1e5052baf2978ed1fbc384793e2e --- .../predictor-interface.py | 8 ++++---- ml_engine/custom-prediction-routines/preprocess.py | 4 ++-- .../custom-prediction-routines/scikit-predictor.py | 4 ++-- .../tensorflow-predictor.py | 12 ++++++------ 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/ml_engine/custom-prediction-routines/predictor-interface.py b/ml_engine/custom-prediction-routines/predictor-interface.py index e7efcdbadb7..a45ea763f80 100644 --- a/ml_engine/custom-prediction-routines/predictor-interface.py +++ b/ml_engine/custom-prediction-routines/predictor-interface.py @@ -24,8 +24,8 @@ def predict(self, instances, **kwargs): Args: instances: A list of prediction input instances. - **kwargs: A dictionary of keyword args provided as additional fields - on the predict request body. + **kwargs: A dictionary of keyword args provided as additional + fields on the predict request body. Returns: A list of outputs containing the prediction results. This list must @@ -40,8 +40,8 @@ def from_path(cls, model_dir): Loading of the predictor should be done in this method. Args: - model_dir: The local directory that contains the exported model file - along with any additional files uploaded when creating the + model_dir: The local directory that contains the exported model + file along with any additional files uploaded when creating the version resource. Returns: diff --git a/ml_engine/custom-prediction-routines/preprocess.py b/ml_engine/custom-prediction-routines/preprocess.py index 8c7d8329fef..e28aaf357df 100644 --- a/ml_engine/custom-prediction-routines/preprocess.py +++ b/ml_engine/custom-prediction-routines/preprocess.py @@ -26,8 +26,8 @@ def __init__(self): def preprocess(self, data): """Transforms a matrix. - The first time this is called, it stores the means of each column of the - input. Then it transforms the input so each column has mean 0. For + The first time this is called, it stores the means of each column of + the input. Then it transforms the input so each column has mean 0. For subsequent calls, it subtracts the stored means from each column. This lets you 'center' data at prediction time based on the distribution of the original training data. diff --git a/ml_engine/custom-prediction-routines/scikit-predictor.py b/ml_engine/custom-prediction-routines/scikit-predictor.py index 3648070a750..ca2998bc68f 100644 --- a/ml_engine/custom-prediction-routines/scikit-predictor.py +++ b/ml_engine/custom-prediction-routines/scikit-predictor.py @@ -36,8 +36,8 @@ def predict(self, instances, **kwargs): Args: instances: A list of prediction input instances. - **kwargs: A dictionary of keyword args provided as additional fields - on the predict request body. + **kwargs: A dictionary of keyword args provided as additional + fields on the predict request body. Returns: A list of outputs containing the prediction results. diff --git a/ml_engine/custom-prediction-routines/tensorflow-predictor.py b/ml_engine/custom-prediction-routines/tensorflow-predictor.py index 6e9ae5d803f..3d8ed8422f8 100644 --- a/ml_engine/custom-prediction-routines/tensorflow-predictor.py +++ b/ml_engine/custom-prediction-routines/tensorflow-predictor.py @@ -36,8 +36,8 @@ def predict(self, instances, **kwargs): Args: instances: A list of prediction input instances. - **kwargs: A dictionary of keyword args provided as additional fields - on the predict request body. + **kwargs: A dictionary of keyword args provided as additional + fields on the predict request body. Returns: A list of outputs containing the prediction results. @@ -55,10 +55,10 @@ def from_path(cls, model_dir): Cloud Storage. MyPredictor uses them during prediction. Args: - model_dir: The local directory that contains the trained Keras model - and the pickled preprocessor instance. These are copied from the - Cloud Storage model directory you provide when you deploy a - version resource. + model_dir: The local directory that contains the trained Keras + model and the pickled preprocessor instance. These are copied + from the Cloud Storage model directory you provide when you + deploy a version resource. Returns: An instance of `MyPredictor`.