From 9358615cde7b977d78bf4c84a6f69f0ceb1d598c Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Fri, 20 Dec 2019 16:33:53 +0100 Subject: [PATCH 01/28] Dockerized posenet-python. --- .gitignore | 2 ++ Dockerfile | 20 ++++++++++++++++++++ NOTICE.txt | 3 +-- README.md | 22 +++++++++++++++++++++- docker_img_build.sh | 3 +++ docker_run.sh | 5 +++++ get_test_images_run.sh | 3 +++ image_demo_run.sh | 3 +++ requirements.txt | 6 ++++++ 9 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 Dockerfile create mode 100755 docker_img_build.sh create mode 100755 docker_run.sh create mode 100755 get_test_images_run.sh create mode 100755 image_demo_run.sh create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 4a224ea..541df98 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ output/* .idea/* .idea _models/* +_posenet_weights/* # Byte-compiled / optimized / DLL files __pycache__/ @@ -174,6 +175,7 @@ cmake-build-*/ # IntelliJ out/ +output.txt # mpeltonen/sbt-idea plugin .idea_modules/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..54d2e2a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM tensorflow/tensorflow:1.15.0-gpu-py3-jupyter +# see: https://www.tensorflow.org/install/docker +# see: https://hub.docker.com/r/tensorflow/tensorflow/ + +# Install system packages +RUN apt-get update && apt-get install -y --no-install-recommends \ + bzip2 \ + git \ + wget && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /work/ + +WORKDIR /work + +RUN pip install -r requirements.txt + +ENV PYTHONPATH='/work/:$PYTHONPATH' + +CMD ["bash"] diff --git a/NOTICE.txt b/NOTICE.txt index f6092d8..d01bd39 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -7,5 +7,4 @@ Modified (c) 2018 Ross Wightman tfjs PoseNet weights and original JS code Copyright 2018 Google LLC. All Rights Reserved. - - +(https://github.com/tensorflow/tfjs-models | Apache License 2.0) diff --git a/README.md b/README.md index 4cfaf80..9d7369a 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,26 @@ pip install opencv-python==3.4.5.20 ``` +### Using Docker + +A convenient way to run this project is by building and running the docker image, because it has all the requirements built-in. The main +requirement is that you have a Linux machine with a GPU set up with docker, the nvidia host driver and the nvidia-docker toolkit. Once set +up, you can make as many images as you want with different depencencies without touching your host OS (or fiddling with conda). + +```bash +./docker_img_build.sh +./get_test_images_run.sh +./image_demo_run.sh +``` + +Some pointers to get you going on the Linux machine setup. Most links are based on Ubuntu, but other distributions should work fine as well. +* [Install docker](https://docs.docker.com/install/linux/docker-ce/ubuntu/ ) +* [Install the NVIDIA host driver](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu-installation) + * remember to reboot here +* [Install the NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-docker) +* check your installation: `docker run --gpus all nvidia/cuda nvidia-smi` + + ### Usage There are three demo apps in the root that utilize the PoseNet model. They are very basic and could definitely be improved. @@ -56,7 +76,7 @@ The webcam demo uses OpenCV to capture images from a connected webcam. The resul The original model, weights, code, etc. was created by Google and can be found at https://github.com/tensorflow/tfjs-models/tree/master/posenet -This port and my work is in no way related to Google. +This port is initially created by Ross Wightman and is in no way related to Google. The Python conversion code that started me on my way was adapted from the CoreML port at https://github.com/infocom-tpo/PoseNet-CoreML diff --git a/docker_img_build.sh b/docker_img_build.sh new file mode 100755 index 0000000..0537dc6 --- /dev/null +++ b/docker_img_build.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +docker build -t posenet-python -f Dockerfile . diff --git a/docker_run.sh b/docker_run.sh new file mode 100755 index 0000000..e73cf39 --- /dev/null +++ b/docker_run.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +WORK=$(pwd) + +docker run --gpus all -it -v $WORK:/work posenet-python python "$@" diff --git a/get_test_images_run.sh b/get_test_images_run.sh new file mode 100755 index 0000000..446801b --- /dev/null +++ b/get_test_images_run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +./docker_run.sh get_test_images.py diff --git a/image_demo_run.sh b/image_demo_run.sh new file mode 100755 index 0000000..44b2cb2 --- /dev/null +++ b/image_demo_run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +./docker_run.sh image_demo.py --model 101 --image_dir ./images --output_dir ./output diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..10e9cf2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +# scipy==1.4.1 +scipy==1.1.* +# pyyaml==5.2 +pyyaml==3.* +opencv-python-headless==3.4.5.20 +# opencv-python==3.4.5.20 From aa6d0e95c09ec11ccc73c687ffda7021d2f90dd1 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Fri, 20 Dec 2019 16:40:40 +0100 Subject: [PATCH 02/28] Prioritising the todo's. --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9d7369a..ca11979 100644 --- a/README.md +++ b/README.md @@ -80,10 +80,10 @@ This port is initially created by Ross Wightman and is in no way related to Goog The Python conversion code that started me on my way was adapted from the CoreML port at https://github.com/infocom-tpo/PoseNet-CoreML -### TODO (someday, maybe) -* More stringent verification of correctness against the original implementation +### TODO +* Migration to Tensorflow 2.x +* Adding ResNet50 (PoseNet 2) * Performance improvements (especially edge loops in 'decode.py') * OpenGL rendering/drawing * Comment interfaces, tensor dimensions, etc -* Implement batch inference for image_demo From 194c8b49d385650e418a665bf0640fab3c5e8a1f Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Mon, 23 Dec 2019 11:34:46 +0100 Subject: [PATCH 03/28] Adding documentation and notes for refactoring. --- README.md | 1 - posenet/converter/tfjs2python.py | 85 +++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index ca11979..fd3f29f 100644 --- a/README.md +++ b/README.md @@ -86,4 +86,3 @@ The Python conversion code that started me on my way was adapted from the CoreML * Performance improvements (especially edge loops in 'decode.py') * OpenGL rendering/drawing * Comment interfaces, tensor dimensions, etc - diff --git a/posenet/converter/tfjs2python.py b/posenet/converter/tfjs2python.py index 649fb43..63086d1 100755 --- a/posenet/converter/tfjs2python.py +++ b/posenet/converter/tfjs2python.py @@ -11,8 +11,21 @@ BASE_DIR = os.path.join(tempfile.gettempdir(), '_posenet_weights') +# Note that this file contains reverse-engineered documentation that contains several notes about points that need to be verified. + def to_output_strided_layers(convolution_def, output_stride): + """ + There seem to be some magic formulas used in this function. The output magically aligns with the details of the layer definition + for MobilenetV1. Not sure how reusable this is for other networks that use depthwise convolutions. + + Note: Verify whether we can reuse this function for other networks, like MobilenetV2. + + :param convolution_def: A MobileNet convolution definition selection from the config.yaml file. + :param output_stride: The chosen output stride. Note to check how the output stride is coupled to the chosen network + variables (see the load_variables function). + :return: An array containing an element for each layer with the detailed layer specs defined in each of them. + """ current_stride = 1 rate = 1 block_id = 0 @@ -21,21 +34,21 @@ def to_output_strided_layers(convolution_def, output_stride): conv_type = _a[0] stride = _a[1] - if current_stride == output_stride: - layer_stride = 1 + if current_stride == output_stride: # How often do we get here? + layer_stride = 1 # tf.nn.depthwise_conv2d nets require the strides to be 1 when the rate (dilation) is >1 layer_rate = rate - rate *= stride + rate *= stride # why is this? else: layer_stride = stride - layer_rate = 1 - current_stride *= stride + layer_rate = 1 # tf.nn.depthwise_conv2d nets require the rate (dilation) to be 1 when the strides are >1 + current_stride *= stride # why is this? buff.append({ 'blockId': block_id, 'convType': conv_type, 'stride': layer_stride, 'rate': layer_rate, - 'outputStride': current_stride + 'outputStride': current_stride # Looks like the variable 'outputStride' is never used anywhere. }) block_id += 1 @@ -43,6 +56,20 @@ def to_output_strided_layers(convolution_def, output_stride): def load_variables(chkpoint, base_dir=BASE_DIR): + """ + Load all weights and biases from the C-struct binary files the manifest.json file refers to into tensorflow variables and + attach those to the manifest data structure as property 'x' under their corresponding variable name. + If no manifest is found, it will be downloaded first together with all the variable files it refers to. + + :param chkpoint: The checkpoint name. This name is important because it is part of the URL structure where the variables + are downloaded from, and the name is reused on the local filesystem for consistency. + :param base_dir: The local folder name where the posenet weights are downloaded in (usually in a temp folder). + :return: The loaded content of the manifest is used as a data structure where the tensorflow variables created in this + function are added to and hashed under the 'x' property of each variable. + + Note for refactoring: To make this function reusable for other networks, the weights downloader should be either + 1/ more generic, or 2/ extracted outside this function. Apart from this, this function is likely very reusable for other networks. + """ manifest_path = os.path.join(base_dir, chkpoint, "manifest.json") if not os.path.exists(manifest_path): print('Weights for checkpoint %s are not downloaded. Downloading to %s ...' % (chkpoint, base_dir)) @@ -67,7 +94,16 @@ def load_variables(chkpoint, base_dir=BASE_DIR): def _read_imgfile(path, width, height): + """ + Read an image file, resize it and normalize its values to match the MobileNetV1's expected input features. + + :param path: The path on the fs where the image is located. + :param width: The requested image target width. + :param height: The requested image target height. + :return: The resized image with normalized pixels as a 3D array (height, width, channels). + """ img = cv2.imread(path) + # The cv2.resize shape definition is indeed (width, height), while the image shape from cv2.imread is (height, width, channels). img = cv2.resize(img, (width, height)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img.astype(float) @@ -76,6 +112,19 @@ def _read_imgfile(path, width, height): def build_network(image, layers, variables): + """ + Build a tensorflow network instance based on the definition in the 'layers' parameter and the given variables. + The layer names used are MobileNetV1 specific. + + Note: See how/if this can be made more generic to build other networks like MobileNetV2 / ResNet50 / ... + + :param image: The tensor placeholder that will be used to feed image data into the network. It's the starting point for the network. + :param layers: The layer definitions as defined by the 'to_output_strided_layers' function. + :param variables: The variables that instantiate the requested network. This parameter represents the network's manifest that + was loaded from the manifest.json file and that was enriched with tensorflow variables that were loaded from the variable + snapshot files the manifest refers to (by the 'load_variables' function). + :return: The built tensorflow network. + """ def _weights(layer_name): return variables["MobilenetV1/" + layer_name + "/weights"]['x'] @@ -94,7 +143,9 @@ def _conv_to_output(mobile_net_output, output_layer_name): def _conv(inputs, stride, block_id): return tf.nn.relu6( tf.nn.conv2d(inputs, _weights("Conv2d_" + str(block_id)), stride, padding='SAME') - + _biases("Conv2d_" + str(block_id))) + + + _biases("Conv2d_" + str(block_id)) + ) def _separable_conv(inputs, stride, block_id, dilations): if dilations is None: @@ -103,8 +154,12 @@ def _separable_conv(inputs, stride, block_id, dilations): dw_layer = "Conv2d_" + str(block_id) + "_depthwise" pw_layer = "Conv2d_" + str(block_id) + "_pointwise" - w = tf.nn.depthwise_conv2d( - inputs, _depthwise_weights(dw_layer), stride, 'SAME', rate=dilations, data_format='NHWC') + # 'NHWC' = data format [batch, height, width, channels] + # The dilations are the number of repeated values in the height and width dimension to get a depthwise convolution. + # A depthwise convolution uses a filter (kernel) with a depth of 1 instead of the channel depth to get fewer variables that + # have to be learned, and so achieve a faster but less accurate network. When the rate (or dilation) is 1, then the strides + # must all be 1, see: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/nn/depthwise_conv2d + w = tf.nn.depthwise_conv2d(inputs, _depthwise_weights(dw_layer), stride, 'SAME', rate=dilations, data_format='NHWC') w = tf.nn.bias_add(w, _biases(dw_layer)) w = tf.nn.relu6(w) @@ -115,7 +170,7 @@ def _separable_conv(inputs, stride, block_id, dilations): return w x = image - buff = [] + buff = [] # remove this buffer, seems like it's not used with tf.variable_scope(None, 'MobilenetV1'): for m in layers: @@ -123,16 +178,19 @@ def _separable_conv(inputs, stride, block_id, dilations): rate = [m['rate'], m['rate']] if m['convType'] == "conv2d": x = _conv(x, stride, m['blockId']) - buff.append(x) + buff.append(x) # remove this buffer elif m['convType'] == "separableConv": x = _separable_conv(x, stride, m['blockId'], rate) - buff.append(x) + buff.append(x) # remove this buffer heatmaps = _conv_to_output(x, 'heatmap_2') offsets = _conv_to_output(x, 'offset_2') displacement_fwd = _conv_to_output(x, 'displacement_fwd_2') displacement_bwd = _conv_to_output(x, 'displacement_bwd_2') heatmaps = tf.sigmoid(heatmaps, 'heatmap') + # It looks like the outputs 'partheat', 'partoff' and 'segment' are not used. + # It looks like only the '_2' variant is used of 'heatmap', 'offset', 'displacement_fwd' and 'displacement_bwd'. + # To verify: Are the '_2' variants coupled to the choice of the outputstride of 16 in the config.yaml file? return heatmaps, offsets, displacement_fwd, displacement_bwd @@ -141,7 +199,7 @@ def convert(model_id, model_dir, check=False): cfg = load_config() checkpoints = cfg['checkpoints'] image_size = cfg['imageSize'] - output_stride = cfg['outputStride'] + output_stride = cfg['outputStride'] # to verify: is this output_stride coupled to the downloaded weights? (current assumption is 'yes') chkpoint = checkpoints[model_id] if chkpoint == 'mobilenet_v1_050': @@ -150,6 +208,7 @@ def convert(model_id, model_dir, check=False): mobile_net_arch = cfg['mobileNet75Architecture'] else: mobile_net_arch = cfg['mobileNet100Architecture'] + # The 'mobilenet_v1_101' seems to have the same architecture as 'mobileNet100Architecture'. width = image_size height = image_size From 92fe3a2c750c5e94dac29d6d53ac9f64b7913b16 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Mon, 23 Dec 2019 12:32:08 +0100 Subject: [PATCH 04/28] Converted to TF2 using the upgrade script and tested on TF2 without manual intervention. Now we can migrate to idiomatic TF2 code. --- Dockerfile | 2 +- benchmark.py | 2 +- docker_img_build.sh | 2 ++ image_demo.py | 2 +- posenet/converter/tfjs2python.py | 33 ++++++++++++++++++++++---------- posenet/model.py | 4 ++-- upgrade-tf-v2.sh | 8 ++++++++ webcam_demo.py | 2 +- 8 files changed, 39 insertions(+), 16 deletions(-) mode change 100755 => 100644 posenet/converter/tfjs2python.py create mode 100755 upgrade-tf-v2.sh diff --git a/Dockerfile b/Dockerfile index 54d2e2a..4423af0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM tensorflow/tensorflow:1.15.0-gpu-py3-jupyter +FROM tensorflow/tensorflow:2.0.0-gpu-py3-jupyter # see: https://www.tensorflow.org/install/docker # see: https://hub.docker.com/r/tensorflow/tensorflow/ diff --git a/benchmark.py b/benchmark.py index a5ab0c4..fdbba1a 100644 --- a/benchmark.py +++ b/benchmark.py @@ -15,7 +15,7 @@ def main(): - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] num_images = args.num_images diff --git a/docker_img_build.sh b/docker_img_build.sh index 0537dc6..4a5962c 100755 --- a/docker_img_build.sh +++ b/docker_img_build.sh @@ -1,3 +1,5 @@ #!/usr/bin/env bash +docker rmi -f posenet-python + docker build -t posenet-python -f Dockerfile . diff --git a/image_demo.py b/image_demo.py index 70decba..32b3fad 100644 --- a/image_demo.py +++ b/image_demo.py @@ -18,7 +18,7 @@ def main(): - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] diff --git a/posenet/converter/tfjs2python.py b/posenet/converter/tfjs2python.py old mode 100755 new mode 100644 index 63086d1..ff2fd56 --- a/posenet/converter/tfjs2python.py +++ b/posenet/converter/tfjs2python.py @@ -26,6 +26,7 @@ def to_output_strided_layers(convolution_def, output_stride): variables (see the load_variables function). :return: An array containing an element for each layer with the detailed layer specs defined in each of them. """ + current_stride = 1 rate = 1 block_id = 0 @@ -70,6 +71,7 @@ def load_variables(chkpoint, base_dir=BASE_DIR): Note for refactoring: To make this function reusable for other networks, the weights downloader should be either 1/ more generic, or 2/ extracted outside this function. Apart from this, this function is likely very reusable for other networks. """ + manifest_path = os.path.join(base_dir, chkpoint, "manifest.json") if not os.path.exists(manifest_path): print('Weights for checkpoint %s are not downloaded. Downloading to %s ...' % (chkpoint, base_dir)) @@ -102,6 +104,7 @@ def _read_imgfile(path, width, height): :param height: The requested image target height. :return: The resized image with normalized pixels as a 3D array (height, width, channels). """ + img = cv2.imread(path) # The cv2.resize shape definition is indeed (width, height), while the image shape from cv2.imread is (height, width, channels). img = cv2.resize(img, (width, height)) @@ -136,13 +139,13 @@ def _depthwise_weights(layer_name): return variables["MobilenetV1/" + layer_name + "/depthwise_weights"]['x'] def _conv_to_output(mobile_net_output, output_layer_name): - w = tf.nn.conv2d(mobile_net_output, _weights(output_layer_name), [1, 1, 1, 1], padding='SAME') + w = tf.nn.conv2d(input=mobile_net_output, filters=_weights(output_layer_name), strides=[1, 1, 1, 1], padding='SAME') w = tf.nn.bias_add(w, _biases(output_layer_name), name=output_layer_name) return w def _conv(inputs, stride, block_id): return tf.nn.relu6( - tf.nn.conv2d(inputs, _weights("Conv2d_" + str(block_id)), stride, padding='SAME') + tf.nn.conv2d(input=inputs, filters=_weights("Conv2d_" + str(block_id)), strides=stride, padding='SAME') + _biases("Conv2d_" + str(block_id)) ) @@ -159,11 +162,11 @@ def _separable_conv(inputs, stride, block_id, dilations): # A depthwise convolution uses a filter (kernel) with a depth of 1 instead of the channel depth to get fewer variables that # have to be learned, and so achieve a faster but less accurate network. When the rate (or dilation) is 1, then the strides # must all be 1, see: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/nn/depthwise_conv2d - w = tf.nn.depthwise_conv2d(inputs, _depthwise_weights(dw_layer), stride, 'SAME', rate=dilations, data_format='NHWC') + w = tf.nn.depthwise_conv2d(input=inputs, filter=_depthwise_weights(dw_layer), strides=stride, padding='SAME', dilations=dilations, data_format='NHWC') w = tf.nn.bias_add(w, _biases(dw_layer)) w = tf.nn.relu6(w) - w = tf.nn.conv2d(w, _weights(pw_layer), [1, 1, 1, 1], padding='SAME') + w = tf.nn.conv2d(input=w, filters=_weights(pw_layer), strides=[1, 1, 1, 1], padding='SAME') w = tf.nn.bias_add(w, _biases(pw_layer)) w = tf.nn.relu6(w) @@ -171,7 +174,7 @@ def _separable_conv(inputs, stride, block_id, dilations): x = image buff = [] # remove this buffer, seems like it's not used - with tf.variable_scope(None, 'MobilenetV1'): + with tf.compat.v1.variable_scope(None, 'MobilenetV1'): for m in layers: stride = [1, m['stride'], m['stride'], 1] @@ -196,6 +199,16 @@ def _separable_conv(inputs, stride, block_id, dilations): def convert(model_id, model_dir, check=False): + """ + Download and read the weight and bias variables for MobileNetV1, create the network and instantiate it with those variables. + Then write the instantiated network to a model file and corresponding checkpoint files. + + :param model_id: Refers to the model to load, as defined in the config.yaml file. + :param model_dir: Defines where the model and checkpoint files will be saved. + :param check: Indicates whether or not to verify the model by feeding it a sample image. + :return: Nothing, the model and checkpoint files are written to the filesystem. + """ + cfg = load_config() checkpoints = cfg['checkpoints'] image_size = cfg['imageSize'] @@ -221,12 +234,12 @@ def convert(model_id, model_dir, check=False): layers = to_output_strided_layers(mobile_net_arch, output_stride) variables = load_variables(chkpoint) - init = tf.global_variables_initializer() - with tf.Session() as sess: + init = tf.compat.v1.global_variables_initializer() + with tf.compat.v1.Session() as sess: sess.run(init) - saver = tf.train.Saver() + saver = tf.compat.v1.train.Saver() - image_ph = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='image') + image_ph = tf.compat.v1.placeholder(tf.float32, shape=[1, None, None, 3], name='image') outputs = build_network(image_ph, layers, variables) sess.run( @@ -241,7 +254,7 @@ def convert(model_id, model_dir, check=False): os.makedirs(os.path.dirname(save_path)) checkpoint_path = saver.save(sess, save_path, write_state=False) - tf.train.write_graph(cg, model_dir, "model-%s.pbtxt" % chkpoint) + tf.io.write_graph(cg, model_dir, "model-%s.pbtxt" % chkpoint) # Freeze graph and write our final model file freeze_graph( diff --git a/posenet/model.py b/posenet/model.py index 2b55ba5..b7a2e0c 100644 --- a/posenet/model.py +++ b/posenet/model.py @@ -42,8 +42,8 @@ def load_model(model_id, sess, model_dir=MODEL_DIR): convert(model_ord, model_dir, check=False) assert os.path.exists(model_path) - with tf.gfile.GFile(model_path, 'rb') as f: - graph_def = tf.GraphDef() + with tf.io.gfile.GFile(model_path, 'rb') as f: + graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') diff --git a/upgrade-tf-v2.sh b/upgrade-tf-v2.sh new file mode 100755 index 0000000..e261497 --- /dev/null +++ b/upgrade-tf-v2.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +WORK=$(dirname $(pwd)) + +docker run --gpus all -it -v $WORK:/work posenet-python tf_upgrade_v2 \ + --intree posenet-python/ \ + --outtree posenet-python_v2/ \ + --reportfile posenet-python/report.txt diff --git a/webcam_demo.py b/webcam_demo.py index 8909038..2accdfb 100644 --- a/webcam_demo.py +++ b/webcam_demo.py @@ -16,7 +16,7 @@ def main(): - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] From 4721b7b6ff3451282765588907ac21bc72d28771 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Mon, 23 Dec 2019 14:53:04 +0100 Subject: [PATCH 05/28] The freeze_graph function doesn't exist any longer, we have to use the saved_model instead. --- posenet/converter/tfjs2python.py | 30 +++++++++--------------------- posenet/model.py | 21 +++++++++------------ 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/posenet/converter/tfjs2python.py b/posenet/converter/tfjs2python.py index ff2fd56..0d9f264 100644 --- a/posenet/converter/tfjs2python.py +++ b/posenet/converter/tfjs2python.py @@ -9,7 +9,8 @@ from posenet.converter.config import load_config -BASE_DIR = os.path.join(tempfile.gettempdir(), '_posenet_weights') +# BASE_DIR = os.path.join(tempfile.gettempdir(), '_posenet_weights') +BASE_DIR = os.path.join('.', '_posenet_weights') # Note that this file contains reverse-engineered documentation that contains several notes about points that need to be verified. @@ -237,7 +238,6 @@ def convert(model_id, model_dir, check=False): init = tf.compat.v1.global_variables_initializer() with tf.compat.v1.Session() as sess: sess.run(init) - saver = tf.compat.v1.train.Saver() image_ph = tf.compat.v1.placeholder(tf.float32, shape=[1, None, None, 3], name='image') outputs = build_network(image_ph, layers, variables) @@ -249,25 +249,13 @@ def convert(model_id, model_dir, check=False): } ) - save_path = os.path.join(model_dir, 'checkpoints', 'model-%s.ckpt' % chkpoint) - if not os.path.exists(os.path.dirname(save_path)): - os.makedirs(os.path.dirname(save_path)) - checkpoint_path = saver.save(sess, save_path, write_state=False) - - tf.io.write_graph(cg, model_dir, "model-%s.pbtxt" % chkpoint) - - # Freeze graph and write our final model file - freeze_graph( - input_graph=os.path.join(model_dir, "model-%s.pbtxt" % chkpoint), - input_saver="", - input_binary=False, - input_checkpoint=checkpoint_path, - output_node_names='heatmap,offset_2,displacement_fwd_2,displacement_bwd_2', - restore_op_name="save/restore_all", - filename_tensor_name="save/Const:0", - output_graph=os.path.join(model_dir, "model-%s.pb" % chkpoint), - clear_devices=True, - initializer_nodes="") + save_path = os.path.join(model_dir, 'model-%s' % chkpoint) + if not os.path.exists(save_path): + os.makedirs(save_path) + + builder = tf.compat.v1.saved_model.Builder(save_path) + builder.add_meta_graph_and_variables(sess, tags=[tf.saved_model.SERVING]) + builder.save() if check and os.path.exists("./images/tennis_in_crowd.jpg"): # Result diff --git a/posenet/model.py b/posenet/model.py index b7a2e0c..ddb7c4f 100644 --- a/posenet/model.py +++ b/posenet/model.py @@ -35,25 +35,22 @@ def load_config(model_ord): def load_model(model_id, sess, model_dir=MODEL_DIR): model_ord = model_id_to_ord(model_id) model_cfg = load_config(model_ord) - model_path = os.path.join(model_dir, 'model-%s.pb' % model_cfg['checkpoint_name']) + model_path = os.path.join(model_dir, 'model-%s' % model_cfg['checkpoint_name']) if not os.path.exists(model_path): print('Cannot find model file %s, converting from tfjs...' % model_path) from posenet.converter.tfjs2python import convert convert(model_ord, model_dir, check=False) assert os.path.exists(model_path) - with tf.io.gfile.GFile(model_path, 'rb') as f: - graph_def = tf.compat.v1.GraphDef() - graph_def.ParseFromString(f.read()) sess.graph.as_default() - tf.import_graph_def(graph_def, name='') - - if DEBUG_OUTPUT: - graph_nodes = [n for n in graph_def.node] - names = [] - for t in graph_nodes: - names.append(t.name) - print('Loaded graph node:', t.name) + tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) + + # if DEBUG_OUTPUT: + # graph_nodes = [n for n in graph_def.node] + # names = [] + # for t in graph_nodes: + # names.append(t.name) + # print('Loaded graph node:', t.name) offsets = sess.graph.get_tensor_by_name('offset_2:0') displacement_fwd = sess.graph.get_tensor_by_name('displacement_fwd_2:0') From 5a45406623559cc0e1ee56757cf1527f7fc36ec9 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Tue, 31 Dec 2019 15:43:05 +0100 Subject: [PATCH 06/28] Researching the saved models. --- .gitignore | 1 + Dockerfile | 10 +++++++++- Dockerfile-gpu | 27 +++++++++++++++++++++++++++ docker_img_build.sh | 18 ++++++++++++++++-- docker_run.sh | 24 +++++++++++++++++++++++- posenet/converter/tfjs2tf.py | 8 ++++++++ requirements.txt | 2 ++ test_tfjs2tf.py | 28 ++++++++++++++++++++++++++++ 8 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 Dockerfile-gpu create mode 100644 posenet/converter/tfjs2tf.py create mode 100644 test_tfjs2tf.py diff --git a/.gitignore b/.gitignore index 541df98..17d7482 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ output/* .idea _models/* _posenet_weights/* +env.sh # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/Dockerfile b/Dockerfile index 4423af0..ed02e13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,5 @@ -FROM tensorflow/tensorflow:2.0.0-gpu-py3-jupyter +FROM tensorflow/tensorflow:nightly-py3-jupyter +# This is the CPU version! # see: https://www.tensorflow.org/install/docker # see: https://hub.docker.com/r/tensorflow/tensorflow/ @@ -7,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ bzip2 \ git \ wget && \ + pip install --upgrade pip && \ rm -rf /var/lib/apt/lists/* COPY requirements.txt /work/ @@ -15,6 +17,12 @@ WORKDIR /work RUN pip install -r requirements.txt +RUN git clone https://github.com/patlevin/tfjs-to-tf.git && \ + cd tfjs-to-tf && \ + pip install . && \ + cd .. && \ + rm -r tfjs-to-tf + ENV PYTHONPATH='/work/:$PYTHONPATH' CMD ["bash"] diff --git a/Dockerfile-gpu b/Dockerfile-gpu new file mode 100644 index 0000000..11ed265 --- /dev/null +++ b/Dockerfile-gpu @@ -0,0 +1,27 @@ +FROM tensorflow/tensorflow:2.0.0-gpu-py3-jupyter +# see: https://www.tensorflow.org/install/docker +# see: https://hub.docker.com/r/tensorflow/tensorflow/ + +# Install system packages +RUN apt-get update && apt-get install -y --no-install-recommends \ + bzip2 \ + git \ + wget && \ + pip install --upgrade pip && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /work/ + +WORKDIR /work + +RUN pip install -r requirements.txt + +RUN git clone https://github.com/patlevin/tfjs-to-tf.git && \ + cd tfjs-to-tf && \ + pip install . && \ + cd .. && \ + rm -r tfjs-to-tf + +ENV PYTHONPATH='/work/:$PYTHONPATH' + +CMD ["bash"] diff --git a/docker_img_build.sh b/docker_img_build.sh index 4a5962c..3839400 100755 --- a/docker_img_build.sh +++ b/docker_img_build.sh @@ -1,5 +1,19 @@ #!/usr/bin/env bash -docker rmi -f posenet-python +if [ -z "$1" ]; then + echo "pass CPU or GPU as argument" + echo "Docker image build failed..." + exit 1 +fi -docker build -t posenet-python -f Dockerfile . +if [ "$1" = "GPU" ]; then + image="posenet-python-gpu" + dockerfile="Dockerfile-gpu" +else + image="posenet-python-cpu" + dockerfile="Dockerfile" +fi + +docker rmi -f "$image" + +docker build -t "$image" -f "$dockerfile" . diff --git a/docker_run.sh b/docker_run.sh index e73cf39..7230833 100755 --- a/docker_run.sh +++ b/docker_run.sh @@ -2,4 +2,26 @@ WORK=$(pwd) -docker run --gpus all -it -v $WORK:/work posenet-python python "$@" +if [ -z "$POSENET_PYTHON_DEVICE" ]; then + echo "set the environment variable POSENET_PYTHON_DEVICE to CPU or GPU, or enter your choice below:" + read -p "Enter your device (CPU or GPU): " device + if [ "$device" = "GPU" ]; then + source <(echo "export POSENET_PYTHON_DEVICE=GPU"); + elif [ "$device" = "CPU" ]; then + source <(echo "export POSENET_PYTHON_DEVICE=CPU"); + else + echo "Device configuration failed..." + exit 1 + fi +fi + + +echo "device is: $POSENET_PYTHON_DEVICE" + +if [ "$POSENET_PYTHON_DEVICE" = "GPU" ]; then + image="posenet-python-gpu" +else + image="posenet-python-cpu" +fi + +docker run --gpus all -it --rm -v $WORK:/work "$image" python "$@" diff --git a/posenet/converter/tfjs2tf.py b/posenet/converter/tfjs2tf.py new file mode 100644 index 0000000..9dc297b --- /dev/null +++ b/posenet/converter/tfjs2tf.py @@ -0,0 +1,8 @@ +import tfjs_graph_converter as tfjs +import os + + +def convert(model_id, model_dir, check=False): + tfjsdir = os.path.join('/opt/project/_posenet_weights', 'mobilenet_v1_101') + tfdir = os.path.join('/opt/project/_models', 'model-mobilenet_v1_101_test') + tfjs.api.graph_model_to_saved_model(tfjsdir, tfdir, ['serve']) diff --git a/requirements.txt b/requirements.txt index 10e9cf2..4b7f71e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,5 @@ scipy==1.1.* pyyaml==3.* opencv-python-headless==3.4.5.20 # opencv-python==3.4.5.20 +# tensorflowjs==1.3.2 +tensorflowjs==1.4.0 \ No newline at end of file diff --git a/test_tfjs2tf.py b/test_tfjs2tf.py new file mode 100644 index 0000000..923cd58 --- /dev/null +++ b/test_tfjs2tf.py @@ -0,0 +1,28 @@ +import posenet.converter.tfjs2tf as converter + + +def main(): + converter.convert() + +# have a look at: https://github.com/tensorflow/tfjs/tree/master/tfjs-converter + +# see: https://stackoverflow.com/questions/58841355/bodypix-real-time-person-segmentation/59509874#59509874 +# https://github.com/patlevin/tfjs-to-tf +# https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float/model-stride16.json +# see weightsManifest.paths for the shard names: +# https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float/group1-shard1of23.bin +# ... +# https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float/group1-shard23of23.bin + + +# https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/mobilenet/float/100/model-stride16.json +# see weightsManifest.paths +# "group1-shard1of4.bin", +# "group1-shard2of4.bin", +# "group1-shard3of4.bin", +# "group1-shard4of4.bin" + + +# https://storage.googleapis.com/tfjs-models/weights/posenet/mobilenet_v1_101/manifest.json + + From 7a2f5fc9add2ec8f1a968e2688a529aa6c685baf Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Fri, 3 Jan 2020 00:00:02 +0100 Subject: [PATCH 07/28] Refactoring everything to the new saved model. --- .gitignore | 3 + Dockerfile | 6 +- Dockerfile-gpu | 27 ------ benchmark.py | 3 +- docker_img_build.sh | 6 +- docker_run.sh | 4 +- image_demo.py | 19 +++-- posenet/__init__.py | 1 + posenet/converter/common.py | 6 ++ posenet/converter/config.yaml | 71 ++++++++++++++++ posenet/converter/tfjs2tf.py | 32 +++++-- posenet/converter/tfjsdownload.py | 133 ++++++++++++++++++++++++++++++ posenet/model.py | 25 +++++- posenet/utils.py | 2 +- test_tfjs2tf.py | 23 +++++- webcam_demo.py | 3 +- 16 files changed, 311 insertions(+), 53 deletions(-) delete mode 100644 Dockerfile-gpu create mode 100644 posenet/converter/common.py create mode 100644 posenet/converter/tfjsdownload.py diff --git a/.gitignore b/.gitignore index 17d7482..98ef57c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,11 @@ images/* output/* +output_old/* .idea/* .idea _models/* +_tf_models/* +_tfjs_models/* _posenet_weights/* env.sh diff --git a/Dockerfile b/Dockerfile index ed02e13..075ad9d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ -FROM tensorflow/tensorflow:nightly-py3-jupyter -# This is the CPU version! +# default image version, override using --build-arg IMAGE_VERSION=otherversion +ARG IMAGE_VERSION=nightly-py3-jupyter +FROM tensorflow/tensorflow:$IMAGE_VERSION +# The default version is the CPU version! # see: https://www.tensorflow.org/install/docker # see: https://hub.docker.com/r/tensorflow/tensorflow/ diff --git a/Dockerfile-gpu b/Dockerfile-gpu deleted file mode 100644 index 11ed265..0000000 --- a/Dockerfile-gpu +++ /dev/null @@ -1,27 +0,0 @@ -FROM tensorflow/tensorflow:2.0.0-gpu-py3-jupyter -# see: https://www.tensorflow.org/install/docker -# see: https://hub.docker.com/r/tensorflow/tensorflow/ - -# Install system packages -RUN apt-get update && apt-get install -y --no-install-recommends \ - bzip2 \ - git \ - wget && \ - pip install --upgrade pip && \ - rm -rf /var/lib/apt/lists/* - -COPY requirements.txt /work/ - -WORKDIR /work - -RUN pip install -r requirements.txt - -RUN git clone https://github.com/patlevin/tfjs-to-tf.git && \ - cd tfjs-to-tf && \ - pip install . && \ - cd .. && \ - rm -r tfjs-to-tf - -ENV PYTHONPATH='/work/:$PYTHONPATH' - -CMD ["bash"] diff --git a/benchmark.py b/benchmark.py index fdbba1a..b7a01de 100644 --- a/benchmark.py +++ b/benchmark.py @@ -16,8 +16,7 @@ def main(): with tf.compat.v1.Session() as sess: - model_cfg, model_outputs = posenet.load_model(args.model, sess) - output_stride = model_cfg['output_stride'] + output_stride, model_outputs = posenet.load_model(args.model, sess) num_images = args.num_images filenames = [ diff --git a/docker_img_build.sh b/docker_img_build.sh index 3839400..daa4337 100755 --- a/docker_img_build.sh +++ b/docker_img_build.sh @@ -8,12 +8,12 @@ fi if [ "$1" = "GPU" ]; then image="posenet-python-gpu" - dockerfile="Dockerfile-gpu" + version="--build-arg IMAGE_VERSION=2.0.0-gpu-py3-jupyter" else image="posenet-python-cpu" - dockerfile="Dockerfile" + version="" fi docker rmi -f "$image" -docker build -t "$image" -f "$dockerfile" . +docker build -t "$image" $version . diff --git a/docker_run.sh b/docker_run.sh index 7230833..ab8ece1 100755 --- a/docker_run.sh +++ b/docker_run.sh @@ -20,8 +20,10 @@ echo "device is: $POSENET_PYTHON_DEVICE" if [ "$POSENET_PYTHON_DEVICE" = "GPU" ]; then image="posenet-python-gpu" + gpu_opts="--gpus all" else image="posenet-python-cpu" + gpu_opts="" fi -docker run --gpus all -it --rm -v $WORK:/work "$image" python "$@" +docker run $gpu_opts -it --rm -v $WORK:/work "$image" python "$@" diff --git a/image_demo.py b/image_demo.py index 32b3fad..71767fb 100644 --- a/image_demo.py +++ b/image_demo.py @@ -5,6 +5,8 @@ import os import posenet +import posenet.converter.tfjs2tf as tfjs2tf +import posenet.converter.tfjsdownload as tfjsdownload parser = argparse.ArgumentParser() @@ -18,9 +20,12 @@ def main(): + model = 'posenet' + neuralnet = 'mobilenet_v1_100' # mobilenet_v1_100 resnet50_v1 + model_variant = 'stride16' + with tf.compat.v1.Session() as sess: - model_cfg, model_outputs = posenet.load_model(args.model, sess) - output_stride = model_cfg['output_stride'] + output_stride, model_outputs = posenet.load_tf_model(sess, model, neuralnet, model_variant) if args.output_dir: if not os.path.exists(args.output_dir): @@ -34,10 +39,14 @@ def main(): input_image, draw_image, output_scale = posenet.read_imgfile( f, scale_factor=args.scale_factor, output_stride=output_stride) + model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) + input_tensor_name = model_cfg['input_tensors'][0] + + # ORDER OF THE FEATURES IS DEPENDENT ON THE config.yaml file output_tensors list!!! heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( - model_outputs, - feed_dict={'image:0': input_image} - ) + model_outputs, + feed_dict={input_tensor_name: input_image} + ) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(axis=0), diff --git a/posenet/__init__.py b/posenet/__init__.py index 150a095..3f6bac2 100644 --- a/posenet/__init__.py +++ b/posenet/__init__.py @@ -1,4 +1,5 @@ from posenet.constants import * from posenet.decode_multi import decode_multiple_poses from posenet.model import load_model +from posenet.model import load_tf_model from posenet.utils import * diff --git a/posenet/converter/common.py b/posenet/converter/common.py new file mode 100644 index 0000000..aef0f72 --- /dev/null +++ b/posenet/converter/common.py @@ -0,0 +1,6 @@ +TFJS_OP_KEY = 'op' +TFJS_DILATIONS_KEY = 'dilations' +TFJS_CONV2D_KEY = 'Conv2D' + +TF_NHWC = 'NHWC' +TF_NCHW = 'NCHW' diff --git a/posenet/converter/config.yaml b/posenet/converter/config.yaml index 2f7b52e..058d6dd 100644 --- a/posenet/converter/config.yaml +++ b/posenet/converter/config.yaml @@ -1,3 +1,74 @@ +models: + tfjs: + bodypix: + resnet50_v1: + base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float' + model_variant: + stride16: + filename: 'model-stride16.json' + output_stride: 16 + data_format: 'NHWC' + input_tensors: + - 'sub_2:0' + output_tensors: + - 'float_heatmaps:0' + - 'float_short_offsets:0' + - 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' + - 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' + - 'float_part_heatmaps:0' + - 'float_part_offsets:0' + - 'float_long_offsets:0' + - 'float_segments:0' + mobilenet_v1_100: + base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/mobilenet/float/100' + model_variant: + stride16: + filename: 'model-stride16.json' + output_stride: 16 + data_format: 'NHWC' + input_tensors: + - 'sub_2:0' + output_tensors: + - 'float_heatmaps:0' + - 'float_short_offsets:0' + - 'MobilenetV1/displacement_fwd_2/BiasAdd:0' + - 'MobilenetV1/displacement_bwd_2/BiasAdd:0' + - 'float_part_heatmaps:0' + - 'float_part_offsets:0' + - 'float_long_offsets:0' + - 'float_segments:0' + posenet: + resnet50_v1: + base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/resnet50/float' + model_variant: + stride16: + filename: 'model-stride16.json' + output_stride: 16 + data_format: 'NHWC' + input_tensors: + - 'sub_2:0' + output_tensors: + - 'float_heatmaps:0' + - 'float_short_offsets:0' + - 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' + - 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' + mobilenet_v1_100: + base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100' + model_variant: + stride16: + filename: 'model-stride16.json' + output_stride: 16 + data_format: 'NHWC' + input_tensors: + - 'sub_2:0' + output_tensors: + - 'MobilenetV1/heatmap_2/BiasAdd:0' + - 'MobilenetV1/offset_2/BiasAdd:0' + - 'MobilenetV1/displacement_fwd_2/BiasAdd:0' + - 'MobilenetV1/displacement_bwd_2/BiasAdd:0' + + + chk: 3 # 3=mobilenet_v1_101 imageSize: 513 GOOGLE_CLOUD_STORAGE_DIR: 'https://storage.googleapis.com/tfjs-models/weights/posenet/' diff --git a/posenet/converter/tfjs2tf.py b/posenet/converter/tfjs2tf.py index 9dc297b..e2da790 100644 --- a/posenet/converter/tfjs2tf.py +++ b/posenet/converter/tfjs2tf.py @@ -1,8 +1,30 @@ -import tfjs_graph_converter as tfjs import os +import tensorflow as tf +import tfjs_graph_converter as tfjs +import posenet.converter.tfjsdownload as tfjsdownload + + +def convert(model, neuralnet, model_variant): + model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) + model_file_path = os.path.join(model_cfg['tfjs_dir'], model_cfg['filename']) + if not os.path.exists(model_file_path): + print('Cannot find tfjs model path %s, downloading tfjs model...' % model_file_path) + tfjsdownload.download_tfjs_model(model, neuralnet, model_variant, model_cfg['data_format']) + tfjs.api.graph_model_to_saved_model(model_cfg['tfjs_dir'], model_cfg['tf_dir'], ['serve']) + +def list_tensors(model, neuralnet, model_variant): + model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) + graph = tfjs.api.load_graph_model(model_cfg['tfjs_dir']) + with tf.compat.v1.Session(graph=graph) as sess: + # the module provides some helpers for querying model properties + input_tensor_names = tfjs.util.get_input_tensors(graph) + output_tensor_names = tfjs.util.get_output_tensors(graph) -def convert(model_id, model_dir, check=False): - tfjsdir = os.path.join('/opt/project/_posenet_weights', 'mobilenet_v1_101') - tfdir = os.path.join('/opt/project/_models', 'model-mobilenet_v1_101_test') - tfjs.api.graph_model_to_saved_model(tfjsdir, tfdir, ['serve']) + print('input tensors:') + for it in input_tensor_names: + print(it) + print('--') + print('output tensors:') + for ot in output_tensor_names: + print(ot) diff --git a/posenet/converter/tfjsdownload.py b/posenet/converter/tfjsdownload.py new file mode 100644 index 0000000..492475c --- /dev/null +++ b/posenet/converter/tfjsdownload.py @@ -0,0 +1,133 @@ +import urllib.request +import posixpath +import json +import zlib +import os +import shutil +import tensorflowjs.converters.common as tfjs_common +import tfjs_graph_converter.common as tfjs_converter_common +import posenet.converter.common as common + +from posenet.converter.config import load_config + +TFJS_MODEL_DIR = './_tfjs_models' +TF_MODEL_DIR = './_tf_models' + + +def model_config(model, neuralnet, model_variant): + config = load_config() + tfjs_models = config['models']['tfjs'] + model_cfg = tfjs_models[model][neuralnet] + return { + 'base_url': model_cfg['base_url'], + 'filename': model_cfg['model_variant'][model_variant]['filename'], + 'output_stride': model_cfg['model_variant'][model_variant]['output_stride'], + 'data_format': model_cfg['model_variant'][model_variant]['data_format'], + 'input_tensors': model_cfg['input_tensors'], + 'output_tensors': model_cfg['output_tensors'], + 'tfjs_dir': os.path.join(TFJS_MODEL_DIR, model, neuralnet, model_variant), + 'tf_dir': os.path.join(TF_MODEL_DIR, model, neuralnet, model_variant) + } + + +def _fix_dilations(node, data_format): + attr_key = tfjs_converter_common.TFJS_NODE_ATTR_KEY + dilations_key = common.TFJS_DILATIONS_KEY + + if attr_key in node and dilations_key in node[attr_key]: + dilations = node[attr_key][dilations_key] + values = dilations['list']['i'] + if len(values) == 4 and (int(values[0]) > 1 or int(values[1]) > 1) and int(values[2]) == 1 and int(values[3]) == 1: + if data_format == common.TF_NHWC: + dilation_h = values[0] + dilation_w = values[1] + values[0] = '1' + values[1] = dilation_h + values[2] = dilation_w + values[3] = '1' + if data_format == common.TF_NCHW: + dilation_h = values[0] + dilation_w = values[1] + values[0] = '1' + values[1] = '1' + values[2] = dilation_h + values[3] = dilation_w + + +def _fix_model(json_model_def, data_format): + """ + Input shapes are usually NHWC or NCHW and the corresponding dilations allowed are [1,x,y,1] and [1,1,x,y]. + Some networks, like ResNet50_v1 have dilations for their Conv2D operations that are [2,2,1,1] in the model json + file. This representation is wrong and should be [1,2,2,1] for NHWC input and [1,1,2,2] for NCHW input. + + :param data_format: + :param json_model_def: The JSON model definition. + :return: The fixed JSON model definition. + """ + nodes = json_model_def[tfjs_common.ARTIFACT_MODEL_TOPOLOGY_KEY][tfjs_converter_common.TFJS_NODE_KEY] + for node in nodes: + if common.TFJS_OP_KEY in node and node[common.TFJS_OP_KEY] == common.TFJS_CONV2D_KEY: + _fix_dilations(node, data_format) + + return json_model_def + + +def fix_model_file(model_cfg): + model_file_path = os.path.join(model_cfg['tfjs_dir'], model_cfg['filename']) + with open(model_file_path, 'r') as f: + json_model_def = json.load(f) + + json_model_def = _fix_model(json_model_def, model_cfg['data_format']) + + with open(model_file_path, 'w') as f: + json.dump(json_model_def, f) + + if not model_cfg['filename'] == 'model.json': + # The expected filename for the model json file is 'model.json'. + # See tfjs_common.ARTIFACT_MODEL_JSON_FILE_NAME in the tensorflowjs codebase. + normalized_model_json_file = os.path.join(model_cfg['tfjs_dir'], 'model.json') + shutil.copyfile(model_file_path, normalized_model_json_file) + + return json_model_def + + +def download_single_file(base_url, filename, save_dir): + output_path = os.path.join(save_dir, filename) + url = posixpath.join(base_url, filename) + req = urllib.request.Request(url) + response = urllib.request.urlopen(req) + if response.info().get('Content-Encoding') == 'gzip': + data = zlib.decompress(response.read(), zlib.MAX_WBITS | 32) + else: + # this path not tested since gzip encoding default on google server + # may need additional encoding/text handling if hit in the future + data = response.read() + with open(output_path, 'wb') as f: + f.write(data) + + +def download_tfjs_model(model, neuralnet, model_variant, data_format): + """ + Download a tfjs model with saved weights. + + :param data_format: + :param model: The model, e.g. 'bodypix' + :param neuralnet: The neural net used, e.g. 'resnet50' + :param model_variant: The reference to the model file, e.g. 'stride16' + """ + model_cfg = model_config(model, neuralnet, model_variant) + model_file_path = os.path.join(model_cfg['tfjs_dir'], model_cfg['filename']) + if os.path.exists(model_file_path): + print('Model file already exists: %s...' % model_file_path) + return + if not os.path.exists(model_cfg['tfjs_dir']): + os.makedirs(model_cfg['tfjs_dir']) + + download_single_file(model_cfg['base_url'], model_cfg['filename'], model_cfg['tfjs_dir']) + + json_model_def = fix_model_file(model_cfg) + + shard_paths = json_model_def['weightsManifest'][0]['paths'] + for shard in shard_paths: + download_single_file(model_cfg['base_url'], shard, model_cfg['tfjs_dir']) + diff --git a/posenet/model.py b/posenet/model.py index ddb7c4f..00dc7b5 100644 --- a/posenet/model.py +++ b/posenet/model.py @@ -1,6 +1,9 @@ import tensorflow as tf import os import posenet.converter.config +import posenet.converter.tfjsdownload as tfjsdownload +import posenet.converter.tfjs2tf as tfjs2tf + MODEL_DIR = './_models' DEBUG_OUTPUT = False @@ -32,6 +35,26 @@ def load_config(model_ord): return model_cfg +def load_tf_model(sess, model, neuralnet, model_variant): + model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) + model_path = model_cfg['tf_dir'] + if not os.path.exists(model_path): + print('Cannot find tf model path %s, converting from tfjs...' % model_path) + tfjs2tf.convert(model, neuralnet, model_variant) + assert os.path.exists(model_path) + + sess.graph.as_default() + tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) + + output_tensors_names = model_cfg['output_tensors'] + + output_tensors = [] + for name in output_tensors_names: + output_tensors.append(sess.graph.get_tensor_by_name(name)) + + return model_cfg['output_stride'], output_tensors + + def load_model(model_id, sess, model_dir=MODEL_DIR): model_ord = model_id_to_ord(model_id) model_cfg = load_config(model_ord) @@ -57,4 +80,4 @@ def load_model(model_id, sess, model_dir=MODEL_DIR): displacement_bwd = sess.graph.get_tensor_by_name('displacement_bwd_2:0') heatmaps = sess.graph.get_tensor_by_name('heatmap:0') - return model_cfg, [heatmaps, offsets, displacement_fwd, displacement_bwd] + return model_cfg['output_stride'], [heatmaps, offsets, displacement_fwd, displacement_bwd] diff --git a/posenet/utils.py b/posenet/utils.py index 88dab53..94c5db9 100644 --- a/posenet/utils.py +++ b/posenet/utils.py @@ -1,6 +1,5 @@ import cv2 import numpy as np - import posenet.constants @@ -18,6 +17,7 @@ def _process_input(source_img, scale_factor=1.0, output_stride=16): input_img = cv2.resize(source_img, (target_width, target_height), interpolation=cv2.INTER_LINEAR) input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) input_img = input_img * (2.0 / 255.0) - 1.0 + # NHWC input_img = input_img.reshape(1, target_height, target_width, 3) return input_img, source_img, scale diff --git a/test_tfjs2tf.py b/test_tfjs2tf.py index 923cd58..6e64179 100644 --- a/test_tfjs2tf.py +++ b/test_tfjs2tf.py @@ -1,13 +1,22 @@ -import posenet.converter.tfjs2tf as converter +import posenet.converter.tfjs2tf as tfjs2tf +import posenet.converter.tfjsdownload as tfjsdownload def main(): - converter.convert() + tfjsdownload.download_tfjs_model('bodypix', 'resnet50_v1', 'stride16') + tfjs2tf.convert('bodypix', 'resnet50_v1', 'stride16') + tfjsdownload.fix_model_file(tfjsdownload.model_config('bodypix', 'resnet50_v1', 'stride16')) + tfjs2tf.list_tensors('posenet', 'resnet50_v1', 'stride16') + tfjs2tf.list_tensors('bodypix', 'mobilenet_v1_100', 'stride16') + tfjs2tf.list_tensors('posenet', 'mobilenet_v1_100', 'stride16') # have a look at: https://github.com/tensorflow/tfjs/tree/master/tfjs-converter +# https://github.com/patlevin/tfjs-to-tf + +## BodyPix # see: https://stackoverflow.com/questions/58841355/bodypix-real-time-person-segmentation/59509874#59509874 -# https://github.com/patlevin/tfjs-to-tf +# # https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float/model-stride16.json # see weightsManifest.paths for the shard names: # https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float/group1-shard1of23.bin @@ -23,6 +32,12 @@ def main(): # "group1-shard4of4.bin" -# https://storage.googleapis.com/tfjs-models/weights/posenet/mobilenet_v1_101/manifest.json +## PoseNet +# https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100/model-stride16.json +# https://storage.googleapis.com/tfjs-models/savedmodel/posenet/resnet50/float/model-stride16.json + + +# Old model format +# https://storage.googleapis.com/tfjs-models/weights/posenet/mobilenet_v1_101/manifest.json diff --git a/webcam_demo.py b/webcam_demo.py index 2accdfb..ca6239c 100644 --- a/webcam_demo.py +++ b/webcam_demo.py @@ -17,8 +17,7 @@ def main(): with tf.compat.v1.Session() as sess: - model_cfg, model_outputs = posenet.load_model(args.model, sess) - output_stride = model_cfg['output_stride'] + output_stride, model_outputs = posenet.load_model(args.model, sess) if args.file is not None: cap = cv2.VideoCapture(args.file) From fe27c0f18da8a0a57244cfd13d1c97f18bd88f21 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sat, 11 Jan 2020 15:20:56 +0100 Subject: [PATCH 08/28] It works again with the mobilenet_v1_100 model. The clue was that you have to apply a sigmoid on the heatmaps. --- Dockerfile | 5 +-- docker_img_build.sh | 5 +-- docker_run.sh | 5 ++- exportCPU.sh | 4 +++ exportGPU.sh | 4 +++ image_demo.py | 16 ++++++---- posenet/converter/config.yaml | 60 +++++++++++++++++++---------------- posenet/model.py | 13 +++++--- posenet/utils.py | 9 +++--- requirements.txt | 11 ++++--- 10 files changed, 78 insertions(+), 54 deletions(-) create mode 100755 exportCPU.sh create mode 100755 exportGPU.sh diff --git a/Dockerfile b/Dockerfile index 075ad9d..127df8b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,11 +17,12 @@ COPY requirements.txt /work/ WORKDIR /work -RUN pip install -r requirements.txt +# run pip install with the '--no-deps' argument, to avoid that tensorflowjs installs an old version of tensorflow! +RUN pip install -r requirements.txt --no-deps RUN git clone https://github.com/patlevin/tfjs-to-tf.git && \ cd tfjs-to-tf && \ - pip install . && \ + pip install . --no-deps && \ cd .. && \ rm -r tfjs-to-tf diff --git a/docker_img_build.sh b/docker_img_build.sh index daa4337..b0c5f52 100755 --- a/docker_img_build.sh +++ b/docker_img_build.sh @@ -8,10 +8,11 @@ fi if [ "$1" = "GPU" ]; then image="posenet-python-gpu" - version="--build-arg IMAGE_VERSION=2.0.0-gpu-py3-jupyter" + version="--build-arg IMAGE_VERSION=2.1.0rc2-gpu-py3-jupyter" + # version="--build-arg IMAGE_VERSION=2.0.0-gpu-py3-jupyter" else image="posenet-python-cpu" - version="" + version="--build-arg IMAGE_VERSION=2.1.0-py3-jupyter" fi docker rmi -f "$image" diff --git a/docker_run.sh b/docker_run.sh index ab8ece1..f90e68b 100755 --- a/docker_run.sh +++ b/docker_run.sh @@ -6,16 +6,15 @@ if [ -z "$POSENET_PYTHON_DEVICE" ]; then echo "set the environment variable POSENET_PYTHON_DEVICE to CPU or GPU, or enter your choice below:" read -p "Enter your device (CPU or GPU): " device if [ "$device" = "GPU" ]; then - source <(echo "export POSENET_PYTHON_DEVICE=GPU"); + source exportGPU.sh elif [ "$device" = "CPU" ]; then - source <(echo "export POSENET_PYTHON_DEVICE=CPU"); + source exportCPU.sh else echo "Device configuration failed..." exit 1 fi fi - echo "device is: $POSENET_PYTHON_DEVICE" if [ "$POSENET_PYTHON_DEVICE" = "GPU" ]; then diff --git a/exportCPU.sh b/exportCPU.sh new file mode 100755 index 0000000..c9a3869 --- /dev/null +++ b/exportCPU.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# source this file to set your environment on a CPU device +# $ . exportCPU.sh +export POSENET_PYTHON_DEVICE=CPU \ No newline at end of file diff --git a/exportGPU.sh b/exportGPU.sh new file mode 100755 index 0000000..51162f8 --- /dev/null +++ b/exportGPU.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# source this file to set your environment on a GPU device +# $ . exportGPU.sh +export POSENET_PYTHON_DEVICE=GPU \ No newline at end of file diff --git a/image_demo.py b/image_demo.py index 71767fb..c0bc405 100644 --- a/image_demo.py +++ b/image_demo.py @@ -20,9 +20,12 @@ def main(): - model = 'posenet' - neuralnet = 'mobilenet_v1_100' # mobilenet_v1_100 resnet50_v1 - model_variant = 'stride16' + print('Tensorflow version: %s' % tf.__version__) + assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" + + model = 'posenet' # posenet bodypix + neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 + model_variant = 'stride32' # stride16 stride32 with tf.compat.v1.Session() as sess: output_stride, model_outputs = posenet.load_tf_model(sess, model, neuralnet, model_variant) @@ -40,7 +43,7 @@ def main(): f, scale_factor=args.scale_factor, output_stride=output_stride) model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) - input_tensor_name = model_cfg['input_tensors'][0] + input_tensor_name = model_cfg['input_tensors']['image'] # ORDER OF THE FEATURES IS DEPENDENT ON THE config.yaml file output_tensors list!!! heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( @@ -48,6 +51,7 @@ def main(): feed_dict={input_tensor_name: input_image} ) + min_score = 0.25 pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), @@ -55,14 +59,14 @@ def main(): displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, - min_pose_score=0.25) + min_pose_score=min_score) keypoint_coords *= output_scale if args.output_dir: draw_image = posenet.draw_skel_and_kp( draw_image, pose_scores, keypoint_scores, keypoint_coords, - min_pose_score=0.25, min_part_score=0.25) + min_pose_score=min_score, min_part_score=min_score) cv2.imwrite(os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), draw_image) diff --git a/posenet/converter/config.yaml b/posenet/converter/config.yaml index 058d6dd..9b29697 100644 --- a/posenet/converter/config.yaml +++ b/posenet/converter/config.yaml @@ -9,16 +9,16 @@ models: output_stride: 16 data_format: 'NHWC' input_tensors: - - 'sub_2:0' + image: 'sub_2:0' output_tensors: - - 'float_heatmaps:0' - - 'float_short_offsets:0' - - 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' - - 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' - - 'float_part_heatmaps:0' - - 'float_part_offsets:0' - - 'float_long_offsets:0' - - 'float_segments:0' + heatmaps: 'float_heatmaps:0' + offsets: 'float_short_offsets:0' + displacement_fwd: 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' + displacement_bwd: 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' + part_heatmaps: 'float_part_heatmaps:0' + part_offsets: 'float_part_offsets:0' + long_offsets: 'float_long_offsets:0' + segments: 'float_segments:0' mobilenet_v1_100: base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/mobilenet/float/100' model_variant: @@ -27,16 +27,16 @@ models: output_stride: 16 data_format: 'NHWC' input_tensors: - - 'sub_2:0' + image: 'sub_2:0' output_tensors: - - 'float_heatmaps:0' - - 'float_short_offsets:0' - - 'MobilenetV1/displacement_fwd_2/BiasAdd:0' - - 'MobilenetV1/displacement_bwd_2/BiasAdd:0' - - 'float_part_heatmaps:0' - - 'float_part_offsets:0' - - 'float_long_offsets:0' - - 'float_segments:0' + heatmaps: 'float_heatmaps:0' + offsets: 'float_short_offsets:0' + displacement_fwd: 'MobilenetV1/displacement_fwd_2/BiasAdd:0' + displacement_bwd: 'MobilenetV1/displacement_bwd_2/BiasAdd:0' + part_heatmaps: 'float_part_heatmaps:0' + part_offsets: 'float_part_offsets:0' + long_offsets: 'float_long_offsets:0' + segments: 'float_segments:0' posenet: resnet50_v1: base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/resnet50/float' @@ -45,13 +45,17 @@ models: filename: 'model-stride16.json' output_stride: 16 data_format: 'NHWC' + stride32: + filename: 'model-stride32.json' + output_stride: 32 + data_format: 'NHWC' input_tensors: - - 'sub_2:0' + image: 'sub_2:0' output_tensors: - - 'float_heatmaps:0' - - 'float_short_offsets:0' - - 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' - - 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' + heatmaps: 'float_heatmaps:0' + offsets: 'float_short_offsets:0' + displacement_fwd: 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' + displacement_bwd: 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' mobilenet_v1_100: base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100' model_variant: @@ -60,12 +64,12 @@ models: output_stride: 16 data_format: 'NHWC' input_tensors: - - 'sub_2:0' + image: 'sub_2:0' output_tensors: - - 'MobilenetV1/heatmap_2/BiasAdd:0' - - 'MobilenetV1/offset_2/BiasAdd:0' - - 'MobilenetV1/displacement_fwd_2/BiasAdd:0' - - 'MobilenetV1/displacement_bwd_2/BiasAdd:0' + heatmaps: 'MobilenetV1/heatmap_2/BiasAdd:0' + offsets: 'MobilenetV1/offset_2/BiasAdd:0' + displacement_fwd: 'MobilenetV1/displacement_fwd_2/BiasAdd:0' + displacement_bwd: 'MobilenetV1/displacement_bwd_2/BiasAdd:0' diff --git a/posenet/model.py b/posenet/model.py index 00dc7b5..63f97bb 100644 --- a/posenet/model.py +++ b/posenet/model.py @@ -46,11 +46,14 @@ def load_tf_model(sess, model, neuralnet, model_variant): sess.graph.as_default() tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) - output_tensors_names = model_cfg['output_tensors'] - - output_tensors = [] - for name in output_tensors_names: - output_tensors.append(sess.graph.get_tensor_by_name(name)) + output_tensor_map = model_cfg['output_tensors'] + + output_tensors = [ + tf.sigmoid(sess.graph.get_tensor_by_name(output_tensor_map['heatmaps']), 'heatmaps'), + sess.graph.get_tensor_by_name(output_tensor_map['offsets']), + sess.graph.get_tensor_by_name(output_tensor_map['displacement_fwd']), + sess.graph.get_tensor_by_name(output_tensor_map['displacement_bwd']) + ] return model_cfg['output_stride'], output_tensors diff --git a/posenet/utils.py b/posenet/utils.py index 94c5db9..db7cd3d 100644 --- a/posenet/utils.py +++ b/posenet/utils.py @@ -4,6 +4,7 @@ def valid_resolution(width, height, output_stride=16): + # calculate closest smaller width and height that is divisible by the stride after subtracting 1 (for the bias?) target_width = (int(width) // output_stride) * output_stride + 1 target_height = (int(height) // output_stride) * output_stride + 1 return target_width, target_height @@ -12,13 +13,13 @@ def valid_resolution(width, height, output_stride=16): def _process_input(source_img, scale_factor=1.0, output_stride=16): target_width, target_height = valid_resolution( source_img.shape[1] * scale_factor, source_img.shape[0] * scale_factor, output_stride=output_stride) + # the scale that can get us back to the original width and height: scale = np.array([source_img.shape[0] / target_height, source_img.shape[1] / target_width]) input_img = cv2.resize(source_img, (target_width, target_height), interpolation=cv2.INTER_LINEAR) - input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) - input_img = input_img * (2.0 / 255.0) - 1.0 - # NHWC - input_img = input_img.reshape(1, target_height, target_width, 3) + input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) # to RGB colors + input_img = input_img * (2.0 / 255.0) - 1.0 # normalize to [-1,1] + input_img = input_img.reshape(1, target_height, target_width, 3) # NHWC return input_img, source_img, scale diff --git a/requirements.txt b/requirements.txt index 4b7f71e..896393e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,11 @@ # scipy==1.4.1 -scipy==1.1.* -# pyyaml==5.2 -pyyaml==3.* +# scipy==1.1.* +pyyaml==5.* +# pyyaml==3.* opencv-python-headless==3.4.5.20 # opencv-python==3.4.5.20 + # tensorflowjs==1.3.2 -tensorflowjs==1.4.0 \ No newline at end of file +tensorflowjs==1.4.0 +# tensorflow-hub is needed by tensorflowjs +tensorflow-hub==0.7.0 \ No newline at end of file From 0359c8964df42ebddd9731a1a7fba7d20ad8e89a Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sat, 11 Jan 2020 15:31:08 +0100 Subject: [PATCH 09/28] Commenting on the status of this repo. --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index fd3f29f..e290147 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ ## PoseNet Python +This repository is a clone of rwightman/posenet-python that is under heavy refactoring to get it to work with the latest +tfjs graph serialization and to expand it with the ResNet50 network, all on TF2.0. Heavy cleanup is to be expected when +we get the basics running on all the latest models. Best look at the docker scripts to get things running. + This repository contains a pure Python implementation (multi-pose only) of the Google TensorFlow.js Posenet model. For a (slightly faster) PyTorch implementation that followed from this, see (https://github.com/rwightman/posenet-pytorch) I first adapted the JS code more or less verbatim and found the performance was low so made some vectorized numpy/scipy version of a few key functions (named `_fast`). From 90fb91b47f7f1624b94fdea829e3abf59009ccef Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sat, 11 Jan 2020 20:13:29 +0100 Subject: [PATCH 10/28] Cleaning up. --- .gitignore | 1 - NOTICE.txt | 4 - README.md | 2 +- benchmark.py | 12 +- image_demo.py | 9 +- posenet/__init__.py | 1 - posenet/converter/config.yaml | 67 +------ posenet/converter/tfjs2python.py | 281 ------------------------------ posenet/converter/tfjsdownload.py | 1 - posenet/model.py | 61 +------ webcam_demo.py | 15 +- 11 files changed, 35 insertions(+), 419 deletions(-) delete mode 100644 posenet/converter/tfjs2python.py diff --git a/.gitignore b/.gitignore index 98ef57c..59c548b 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,6 @@ _models/* _tf_models/* _tfjs_models/* _posenet_weights/* -env.sh # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/NOTICE.txt b/NOTICE.txt index d01bd39..f102c3b 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,10 +1,6 @@ PoseNet Python Copyright 2018 Ross Wightman -Posenet tfjs converter (code in posenet/converter) -Copyright (c) 2017 Infocom TPO (https://lab.infocom.co.jp/) -Modified (c) 2018 Ross Wightman - tfjs PoseNet weights and original JS code Copyright 2018 Google LLC. All Rights Reserved. (https://github.com/tensorflow/tfjs-models | Apache License 2.0) diff --git a/README.md b/README.md index e290147..9dce001 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ requirement is that you have a Linux machine with a GPU set up with docker, the up, you can make as many images as you want with different depencencies without touching your host OS (or fiddling with conda). ```bash -./docker_img_build.sh +./docker_img_build.sh GPU ./get_test_images_run.sh ./image_demo_run.sh ``` diff --git a/benchmark.py b/benchmark.py index b7a01de..985838c 100644 --- a/benchmark.py +++ b/benchmark.py @@ -4,6 +4,7 @@ import os import posenet +import posenet.converter.tfjsdownload as tfjsdownload parser = argparse.ArgumentParser() @@ -15,8 +16,12 @@ def main(): + model = 'posenet' # posenet bodypix + neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 + model_variant = 'stride32' # stride16 stride32 + with tf.compat.v1.Session() as sess: - output_stride, model_outputs = posenet.load_model(args.model, sess) + output_stride, model_outputs = posenet.load_tf_model(sess, model, neuralnet, model_variant) num_images = args.num_images filenames = [ @@ -26,11 +31,14 @@ def main(): images = {f: posenet.read_imgfile(f, 1.0, output_stride)[0] for f in filenames} + model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) + input_tensor_name = model_cfg['input_tensors']['image'] + start = time.time() for i in range(num_images): heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, - feed_dict={'image:0': images[filenames[i % len(filenames)]]} + feed_dict={input_tensor_name: images[filenames[i % len(filenames)]]} ) output = posenet.decode_multiple_poses( diff --git a/image_demo.py b/image_demo.py index c0bc405..2456a5e 100644 --- a/image_demo.py +++ b/image_demo.py @@ -5,7 +5,6 @@ import os import posenet -import posenet.converter.tfjs2tf as tfjs2tf import posenet.converter.tfjsdownload as tfjsdownload @@ -24,8 +23,8 @@ def main(): assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" model = 'posenet' # posenet bodypix - neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 - model_variant = 'stride32' # stride16 stride32 + neuralnet = 'mobilenet_v1_100' # mobilenet_v1_100 resnet50_v1 + model_variant = 'stride16' # stride16 stride32 with tf.compat.v1.Session() as sess: output_stride, model_outputs = posenet.load_tf_model(sess, model, neuralnet, model_variant) @@ -46,14 +45,14 @@ def main(): input_tensor_name = model_cfg['input_tensors']['image'] # ORDER OF THE FEATURES IS DEPENDENT ON THE config.yaml file output_tensors list!!! - heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( + heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={input_tensor_name: input_image} ) min_score = 0.25 pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( - heatmaps_result.squeeze(axis=0), + heatmap_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), diff --git a/posenet/__init__.py b/posenet/__init__.py index 3f6bac2..4b132e8 100644 --- a/posenet/__init__.py +++ b/posenet/__init__.py @@ -1,5 +1,4 @@ from posenet.constants import * from posenet.decode_multi import decode_multiple_poses -from posenet.model import load_model from posenet.model import load_tf_model from posenet.utils import * diff --git a/posenet/converter/config.yaml b/posenet/converter/config.yaml index 9b29697..c951efc 100644 --- a/posenet/converter/config.yaml +++ b/posenet/converter/config.yaml @@ -11,11 +11,11 @@ models: input_tensors: image: 'sub_2:0' output_tensors: - heatmaps: 'float_heatmaps:0' + heatmap: 'float_heatmaps:0' offsets: 'float_short_offsets:0' displacement_fwd: 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' displacement_bwd: 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' - part_heatmaps: 'float_part_heatmaps:0' + part_heatmap: 'float_part_heatmaps:0' part_offsets: 'float_part_offsets:0' long_offsets: 'float_long_offsets:0' segments: 'float_segments:0' @@ -29,11 +29,11 @@ models: input_tensors: image: 'sub_2:0' output_tensors: - heatmaps: 'float_heatmaps:0' + heatmap: 'float_heatmaps:0' offsets: 'float_short_offsets:0' displacement_fwd: 'MobilenetV1/displacement_fwd_2/BiasAdd:0' displacement_bwd: 'MobilenetV1/displacement_bwd_2/BiasAdd:0' - part_heatmaps: 'float_part_heatmaps:0' + part_heatmap: 'float_part_heatmaps:0' part_offsets: 'float_part_offsets:0' long_offsets: 'float_long_offsets:0' segments: 'float_segments:0' @@ -52,7 +52,7 @@ models: input_tensors: image: 'sub_2:0' output_tensors: - heatmaps: 'float_heatmaps:0' + heatmap: 'float_heatmaps:0' offsets: 'float_short_offsets:0' displacement_fwd: 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' displacement_bwd: 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' @@ -66,63 +66,8 @@ models: input_tensors: image: 'sub_2:0' output_tensors: - heatmaps: 'MobilenetV1/heatmap_2/BiasAdd:0' + heatmap: 'MobilenetV1/heatmap_2/BiasAdd:0' offsets: 'MobilenetV1/offset_2/BiasAdd:0' displacement_fwd: 'MobilenetV1/displacement_fwd_2/BiasAdd:0' displacement_bwd: 'MobilenetV1/displacement_bwd_2/BiasAdd:0' - - -chk: 3 # 3=mobilenet_v1_101 -imageSize: 513 -GOOGLE_CLOUD_STORAGE_DIR: 'https://storage.googleapis.com/tfjs-models/weights/posenet/' -checkpoints: [ 'mobilenet_v1_050', 'mobilenet_v1_075', 'mobilenet_v1_100', 'mobilenet_v1_101'] -outputStride: 16 -mobileNet100Architecture: [ - ['conv2d', 2], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1] -] -mobileNet75Architecture: [ - ['conv2d', 2], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1] -] -mobileNet50Architecture: [ - ['conv2d', 2], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1], - ['separableConv', 2], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1], - ['separableConv', 1] -] diff --git a/posenet/converter/tfjs2python.py b/posenet/converter/tfjs2python.py deleted file mode 100644 index 0d9f264..0000000 --- a/posenet/converter/tfjs2python.py +++ /dev/null @@ -1,281 +0,0 @@ -import json -import struct -import tensorflow as tf -from tensorflow.python.tools.freeze_graph import freeze_graph -import cv2 -import numpy as np -import os -import tempfile - -from posenet.converter.config import load_config - -# BASE_DIR = os.path.join(tempfile.gettempdir(), '_posenet_weights') -BASE_DIR = os.path.join('.', '_posenet_weights') - -# Note that this file contains reverse-engineered documentation that contains several notes about points that need to be verified. - - -def to_output_strided_layers(convolution_def, output_stride): - """ - There seem to be some magic formulas used in this function. The output magically aligns with the details of the layer definition - for MobilenetV1. Not sure how reusable this is for other networks that use depthwise convolutions. - - Note: Verify whether we can reuse this function for other networks, like MobilenetV2. - - :param convolution_def: A MobileNet convolution definition selection from the config.yaml file. - :param output_stride: The chosen output stride. Note to check how the output stride is coupled to the chosen network - variables (see the load_variables function). - :return: An array containing an element for each layer with the detailed layer specs defined in each of them. - """ - - current_stride = 1 - rate = 1 - block_id = 0 - buff = [] - for _a in convolution_def: - conv_type = _a[0] - stride = _a[1] - - if current_stride == output_stride: # How often do we get here? - layer_stride = 1 # tf.nn.depthwise_conv2d nets require the strides to be 1 when the rate (dilation) is >1 - layer_rate = rate - rate *= stride # why is this? - else: - layer_stride = stride - layer_rate = 1 # tf.nn.depthwise_conv2d nets require the rate (dilation) to be 1 when the strides are >1 - current_stride *= stride # why is this? - - buff.append({ - 'blockId': block_id, - 'convType': conv_type, - 'stride': layer_stride, - 'rate': layer_rate, - 'outputStride': current_stride # Looks like the variable 'outputStride' is never used anywhere. - }) - block_id += 1 - - return buff - - -def load_variables(chkpoint, base_dir=BASE_DIR): - """ - Load all weights and biases from the C-struct binary files the manifest.json file refers to into tensorflow variables and - attach those to the manifest data structure as property 'x' under their corresponding variable name. - If no manifest is found, it will be downloaded first together with all the variable files it refers to. - - :param chkpoint: The checkpoint name. This name is important because it is part of the URL structure where the variables - are downloaded from, and the name is reused on the local filesystem for consistency. - :param base_dir: The local folder name where the posenet weights are downloaded in (usually in a temp folder). - :return: The loaded content of the manifest is used as a data structure where the tensorflow variables created in this - function are added to and hashed under the 'x' property of each variable. - - Note for refactoring: To make this function reusable for other networks, the weights downloader should be either - 1/ more generic, or 2/ extracted outside this function. Apart from this, this function is likely very reusable for other networks. - """ - - manifest_path = os.path.join(base_dir, chkpoint, "manifest.json") - if not os.path.exists(manifest_path): - print('Weights for checkpoint %s are not downloaded. Downloading to %s ...' % (chkpoint, base_dir)) - from posenet.converter.wget import download - download(chkpoint, base_dir) - assert os.path.exists(manifest_path) - - with open(manifest_path) as f: - variables = json.load(f) - - # with tf.variable_scope(None, 'MobilenetV1'): - for x in variables: - filename = variables[x]["filename"] - byte = open(os.path.join(base_dir, chkpoint, filename), 'rb').read() - fmt = str(int(len(byte) / struct.calcsize('f'))) + 'f' - d = struct.unpack(fmt, byte) - d = tf.cast(d, tf.float32) - d = tf.reshape(d, variables[x]["shape"]) - variables[x]["x"] = tf.Variable(d, name=x) - - return variables - - -def _read_imgfile(path, width, height): - """ - Read an image file, resize it and normalize its values to match the MobileNetV1's expected input features. - - :param path: The path on the fs where the image is located. - :param width: The requested image target width. - :param height: The requested image target height. - :return: The resized image with normalized pixels as a 3D array (height, width, channels). - """ - - img = cv2.imread(path) - # The cv2.resize shape definition is indeed (width, height), while the image shape from cv2.imread is (height, width, channels). - img = cv2.resize(img, (width, height)) - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = img.astype(float) - img = img * (2.0 / 255.0) - 1.0 - return img - - -def build_network(image, layers, variables): - """ - Build a tensorflow network instance based on the definition in the 'layers' parameter and the given variables. - The layer names used are MobileNetV1 specific. - - Note: See how/if this can be made more generic to build other networks like MobileNetV2 / ResNet50 / ... - - :param image: The tensor placeholder that will be used to feed image data into the network. It's the starting point for the network. - :param layers: The layer definitions as defined by the 'to_output_strided_layers' function. - :param variables: The variables that instantiate the requested network. This parameter represents the network's manifest that - was loaded from the manifest.json file and that was enriched with tensorflow variables that were loaded from the variable - snapshot files the manifest refers to (by the 'load_variables' function). - :return: The built tensorflow network. - """ - - def _weights(layer_name): - return variables["MobilenetV1/" + layer_name + "/weights"]['x'] - - def _biases(layer_name): - return variables["MobilenetV1/" + layer_name + "/biases"]['x'] - - def _depthwise_weights(layer_name): - return variables["MobilenetV1/" + layer_name + "/depthwise_weights"]['x'] - - def _conv_to_output(mobile_net_output, output_layer_name): - w = tf.nn.conv2d(input=mobile_net_output, filters=_weights(output_layer_name), strides=[1, 1, 1, 1], padding='SAME') - w = tf.nn.bias_add(w, _biases(output_layer_name), name=output_layer_name) - return w - - def _conv(inputs, stride, block_id): - return tf.nn.relu6( - tf.nn.conv2d(input=inputs, filters=_weights("Conv2d_" + str(block_id)), strides=stride, padding='SAME') - + - _biases("Conv2d_" + str(block_id)) - ) - - def _separable_conv(inputs, stride, block_id, dilations): - if dilations is None: - dilations = [1, 1] - - dw_layer = "Conv2d_" + str(block_id) + "_depthwise" - pw_layer = "Conv2d_" + str(block_id) + "_pointwise" - - # 'NHWC' = data format [batch, height, width, channels] - # The dilations are the number of repeated values in the height and width dimension to get a depthwise convolution. - # A depthwise convolution uses a filter (kernel) with a depth of 1 instead of the channel depth to get fewer variables that - # have to be learned, and so achieve a faster but less accurate network. When the rate (or dilation) is 1, then the strides - # must all be 1, see: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/nn/depthwise_conv2d - w = tf.nn.depthwise_conv2d(input=inputs, filter=_depthwise_weights(dw_layer), strides=stride, padding='SAME', dilations=dilations, data_format='NHWC') - w = tf.nn.bias_add(w, _biases(dw_layer)) - w = tf.nn.relu6(w) - - w = tf.nn.conv2d(input=w, filters=_weights(pw_layer), strides=[1, 1, 1, 1], padding='SAME') - w = tf.nn.bias_add(w, _biases(pw_layer)) - w = tf.nn.relu6(w) - - return w - - x = image - buff = [] # remove this buffer, seems like it's not used - with tf.compat.v1.variable_scope(None, 'MobilenetV1'): - - for m in layers: - stride = [1, m['stride'], m['stride'], 1] - rate = [m['rate'], m['rate']] - if m['convType'] == "conv2d": - x = _conv(x, stride, m['blockId']) - buff.append(x) # remove this buffer - elif m['convType'] == "separableConv": - x = _separable_conv(x, stride, m['blockId'], rate) - buff.append(x) # remove this buffer - - heatmaps = _conv_to_output(x, 'heatmap_2') - offsets = _conv_to_output(x, 'offset_2') - displacement_fwd = _conv_to_output(x, 'displacement_fwd_2') - displacement_bwd = _conv_to_output(x, 'displacement_bwd_2') - heatmaps = tf.sigmoid(heatmaps, 'heatmap') - # It looks like the outputs 'partheat', 'partoff' and 'segment' are not used. - # It looks like only the '_2' variant is used of 'heatmap', 'offset', 'displacement_fwd' and 'displacement_bwd'. - # To verify: Are the '_2' variants coupled to the choice of the outputstride of 16 in the config.yaml file? - - return heatmaps, offsets, displacement_fwd, displacement_bwd - - -def convert(model_id, model_dir, check=False): - """ - Download and read the weight and bias variables for MobileNetV1, create the network and instantiate it with those variables. - Then write the instantiated network to a model file and corresponding checkpoint files. - - :param model_id: Refers to the model to load, as defined in the config.yaml file. - :param model_dir: Defines where the model and checkpoint files will be saved. - :param check: Indicates whether or not to verify the model by feeding it a sample image. - :return: Nothing, the model and checkpoint files are written to the filesystem. - """ - - cfg = load_config() - checkpoints = cfg['checkpoints'] - image_size = cfg['imageSize'] - output_stride = cfg['outputStride'] # to verify: is this output_stride coupled to the downloaded weights? (current assumption is 'yes') - chkpoint = checkpoints[model_id] - - if chkpoint == 'mobilenet_v1_050': - mobile_net_arch = cfg['mobileNet50Architecture'] - elif chkpoint == 'mobilenet_v1_075': - mobile_net_arch = cfg['mobileNet75Architecture'] - else: - mobile_net_arch = cfg['mobileNet100Architecture'] - # The 'mobilenet_v1_101' seems to have the same architecture as 'mobileNet100Architecture'. - - width = image_size - height = image_size - - if not os.path.exists(model_dir): - os.makedirs(model_dir) - - cg = tf.Graph() - with cg.as_default(): - layers = to_output_strided_layers(mobile_net_arch, output_stride) - variables = load_variables(chkpoint) - - init = tf.compat.v1.global_variables_initializer() - with tf.compat.v1.Session() as sess: - sess.run(init) - - image_ph = tf.compat.v1.placeholder(tf.float32, shape=[1, None, None, 3], name='image') - outputs = build_network(image_ph, layers, variables) - - sess.run( - [outputs], - feed_dict={ - image_ph: [np.ndarray(shape=(height, width, 3), dtype=np.float32)] - } - ) - - save_path = os.path.join(model_dir, 'model-%s' % chkpoint) - if not os.path.exists(save_path): - os.makedirs(save_path) - - builder = tf.compat.v1.saved_model.Builder(save_path) - builder.add_meta_graph_and_variables(sess, tags=[tf.saved_model.SERVING]) - builder.save() - - if check and os.path.exists("./images/tennis_in_crowd.jpg"): - # Result - input_image = _read_imgfile("./images/tennis_in_crowd.jpg", width, height) - input_image = np.array(input_image, dtype=np.float32) - input_image = input_image.reshape(1, height, width, 3) - - heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( - outputs, - feed_dict={image_ph: input_image} - ) - - print("Test image stats") - print(input_image) - print(input_image.shape) - print(np.mean(input_image)) - - heatmaps_result = heatmaps_result[0] - - print("Heatmaps") - print(heatmaps_result[0:1, 0:1, :]) - print(heatmaps_result.shape) - print(np.mean(heatmaps_result)) diff --git a/posenet/converter/tfjsdownload.py b/posenet/converter/tfjsdownload.py index 492475c..b2c8eda 100644 --- a/posenet/converter/tfjsdownload.py +++ b/posenet/converter/tfjsdownload.py @@ -130,4 +130,3 @@ def download_tfjs_model(model, neuralnet, model_variant, data_format): shard_paths = json_model_def['weightsManifest'][0]['paths'] for shard in shard_paths: download_single_file(model_cfg['base_url'], shard, model_cfg['tfjs_dir']) - diff --git a/posenet/model.py b/posenet/model.py index 63f97bb..ced1800 100644 --- a/posenet/model.py +++ b/posenet/model.py @@ -1,40 +1,9 @@ import tensorflow as tf import os -import posenet.converter.config import posenet.converter.tfjsdownload as tfjsdownload import posenet.converter.tfjs2tf as tfjs2tf -MODEL_DIR = './_models' -DEBUG_OUTPUT = False - - -def model_id_to_ord(model_id): - if 0 <= model_id < 4: - return model_id # id is already ordinal - elif model_id == 50: - return 0 - elif model_id == 75: - return 1 - elif model_id == 100: - return 2 - else: # 101 - return 3 - - -def load_config(model_ord): - converter_cfg = posenet.converter.config.load_config() - checkpoints = converter_cfg['checkpoints'] - output_stride = converter_cfg['outputStride'] - checkpoint_name = checkpoints[model_ord] - - model_cfg = { - 'output_stride': output_stride, - 'checkpoint_name': checkpoint_name, - } - return model_cfg - - def load_tf_model(sess, model, neuralnet, model_variant): model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) model_path = model_cfg['tf_dir'] @@ -49,38 +18,10 @@ def load_tf_model(sess, model, neuralnet, model_variant): output_tensor_map = model_cfg['output_tensors'] output_tensors = [ - tf.sigmoid(sess.graph.get_tensor_by_name(output_tensor_map['heatmaps']), 'heatmaps'), + tf.sigmoid(sess.graph.get_tensor_by_name(output_tensor_map['heatmap']), 'heatmap'), sess.graph.get_tensor_by_name(output_tensor_map['offsets']), sess.graph.get_tensor_by_name(output_tensor_map['displacement_fwd']), sess.graph.get_tensor_by_name(output_tensor_map['displacement_bwd']) ] return model_cfg['output_stride'], output_tensors - - -def load_model(model_id, sess, model_dir=MODEL_DIR): - model_ord = model_id_to_ord(model_id) - model_cfg = load_config(model_ord) - model_path = os.path.join(model_dir, 'model-%s' % model_cfg['checkpoint_name']) - if not os.path.exists(model_path): - print('Cannot find model file %s, converting from tfjs...' % model_path) - from posenet.converter.tfjs2python import convert - convert(model_ord, model_dir, check=False) - assert os.path.exists(model_path) - - sess.graph.as_default() - tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) - - # if DEBUG_OUTPUT: - # graph_nodes = [n for n in graph_def.node] - # names = [] - # for t in graph_nodes: - # names.append(t.name) - # print('Loaded graph node:', t.name) - - offsets = sess.graph.get_tensor_by_name('offset_2:0') - displacement_fwd = sess.graph.get_tensor_by_name('displacement_fwd_2:0') - displacement_bwd = sess.graph.get_tensor_by_name('displacement_bwd_2:0') - heatmaps = sess.graph.get_tensor_by_name('heatmap:0') - - return model_cfg['output_stride'], [heatmaps, offsets, displacement_fwd, displacement_bwd] diff --git a/webcam_demo.py b/webcam_demo.py index ca6239c..10de46c 100644 --- a/webcam_demo.py +++ b/webcam_demo.py @@ -4,6 +4,7 @@ import argparse import posenet +import posenet.converter.tfjsdownload as tfjsdownload parser = argparse.ArgumentParser() parser.add_argument('--model', type=int, default=101) @@ -16,8 +17,14 @@ def main(): + + model = 'posenet' # posenet bodypix + neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 + model_variant = 'stride32' # stride16 stride32 + with tf.compat.v1.Session() as sess: - output_stride, model_outputs = posenet.load_model(args.model, sess) + + output_stride, model_outputs = posenet.load_tf_model(sess, model, neuralnet, model_variant) if args.file is not None: cap = cv2.VideoCapture(args.file) @@ -28,13 +35,17 @@ def main(): start = time.time() frame_count = 0 + + model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) + input_tensor_name = model_cfg['input_tensors']['image'] + while True: input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, - feed_dict={'image:0': input_image} + feed_dict={input_tensor_name: input_image} ) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( From 4c401c77456655df4e16a23f0c8dcb91e26d8ef8 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sat, 11 Jan 2020 23:32:39 +0100 Subject: [PATCH 11/28] Refactoring... --- posenet/base_model.py | 18 +++++++++++ posenet/converter/tfjs2tf.py | 2 +- posenet/converter/tfjsdownload.py | 3 +- posenet/converter/wget.py | 51 ------------------------------- posenet/mobilenet.py | 13 ++++++++ posenet/resnet.py | 13 ++++++++ 6 files changed, 46 insertions(+), 54 deletions(-) create mode 100644 posenet/base_model.py delete mode 100644 posenet/converter/wget.py create mode 100644 posenet/mobilenet.py create mode 100644 posenet/resnet.py diff --git a/posenet/base_model.py b/posenet/base_model.py new file mode 100644 index 0000000..26de306 --- /dev/null +++ b/posenet/base_model.py @@ -0,0 +1,18 @@ +from abc import ABC, abstractmethod + + +class BaseModel(ABC): + + def __init__(self, output_stride): + self.output_stride = output_stride + + @abstractmethod + def preprocess_input(self): + pass + + @abstractmethod + def name_output_results(self, graph): + return graph + + def predict(self, nhwc_images): + return nhwc_images diff --git a/posenet/converter/tfjs2tf.py b/posenet/converter/tfjs2tf.py index e2da790..6025885 100644 --- a/posenet/converter/tfjs2tf.py +++ b/posenet/converter/tfjs2tf.py @@ -9,7 +9,7 @@ def convert(model, neuralnet, model_variant): model_file_path = os.path.join(model_cfg['tfjs_dir'], model_cfg['filename']) if not os.path.exists(model_file_path): print('Cannot find tfjs model path %s, downloading tfjs model...' % model_file_path) - tfjsdownload.download_tfjs_model(model, neuralnet, model_variant, model_cfg['data_format']) + tfjsdownload.download_tfjs_model(model, neuralnet, model_variant) tfjs.api.graph_model_to_saved_model(model_cfg['tfjs_dir'], model_cfg['tf_dir'], ['serve']) diff --git a/posenet/converter/tfjsdownload.py b/posenet/converter/tfjsdownload.py index b2c8eda..f0f55ed 100644 --- a/posenet/converter/tfjsdownload.py +++ b/posenet/converter/tfjsdownload.py @@ -106,11 +106,10 @@ def download_single_file(base_url, filename, save_dir): f.write(data) -def download_tfjs_model(model, neuralnet, model_variant, data_format): +def download_tfjs_model(model, neuralnet, model_variant): """ Download a tfjs model with saved weights. - :param data_format: :param model: The model, e.g. 'bodypix' :param neuralnet: The neural net used, e.g. 'resnet50' :param model_variant: The reference to the model file, e.g. 'stride16' diff --git a/posenet/converter/wget.py b/posenet/converter/wget.py deleted file mode 100644 index 9de0a5d..0000000 --- a/posenet/converter/wget.py +++ /dev/null @@ -1,51 +0,0 @@ -import urllib.request -import posixpath -import json -import zlib -import os - -from posenet.converter.config import load_config - -CFG = load_config() -GOOGLE_CLOUD_STORAGE_DIR = CFG['GOOGLE_CLOUD_STORAGE_DIR'] -CHECKPOINTS = CFG['checkpoints'] -CHK = CFG['chk'] - - -def download_file(checkpoint, filename, base_dir): - output_path = os.path.join(base_dir, checkpoint, filename) - url = posixpath.join(GOOGLE_CLOUD_STORAGE_DIR, checkpoint, filename) - req = urllib.request.Request(url) - response = urllib.request.urlopen(req) - if response.info().get('Content-Encoding') == 'gzip': - data = zlib.decompress(response.read(), zlib.MAX_WBITS | 32) - else: - # this path not tested since gzip encoding default on google server - # may need additional encoding/text handling if hit in the future - data = response.read() - with open(output_path, 'wb') as f: - f.write(data) - - -def download(checkpoint, base_dir='./weights/'): - save_dir = os.path.join(base_dir, checkpoint) - if not os.path.exists(save_dir): - os.makedirs(save_dir) - - download_file(checkpoint, 'manifest.json', base_dir) - with open(os.path.join(save_dir, 'manifest.json'), 'r') as f: - json_dict = json.load(f) - - for x in json_dict: - filename = json_dict[x]['filename'] - print('Downloading', filename) - download_file(checkpoint, filename, base_dir) - - -def main(): - checkpoint = CHECKPOINTS[CHK] - download(checkpoint) - - -if __name__ == "__main__": - main() diff --git a/posenet/mobilenet.py b/posenet/mobilenet.py new file mode 100644 index 0000000..a65939f --- /dev/null +++ b/posenet/mobilenet.py @@ -0,0 +1,13 @@ +from posenet.base_model import BaseModel + + +class MobileNet(BaseModel): + + def __init__(self, output_stride): + super().__init__(output_stride) + + def preprocess_input(self): + return self + + def name_output_results(self, graph): + return graph diff --git a/posenet/resnet.py b/posenet/resnet.py new file mode 100644 index 0000000..71f94c5 --- /dev/null +++ b/posenet/resnet.py @@ -0,0 +1,13 @@ +from posenet.base_model import BaseModel + + +class ResNet(BaseModel): + + def __init__(self, output_stride): + super().__init__(output_stride) + + def preprocess_input(self): + return self + + def name_output_results(self, graph): + return graph From d3d3c1ac3516b4936ae062a2969e0fdb4a373186 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Mon, 13 Jan 2020 21:57:06 +0100 Subject: [PATCH 12/28] Refactoring. --- posenet/base_model.py | 32 ++++++++++++++++++++++++-------- posenet/mobilenet.py | 19 +++++++++++++------ posenet/posenet.py | 21 +++++++++++++++++++++ posenet/posenet_factory.py | 32 ++++++++++++++++++++++++++++++++ posenet/resnet.py | 20 ++++++++++++++------ 5 files changed, 104 insertions(+), 20 deletions(-) create mode 100644 posenet/posenet.py create mode 100644 posenet/posenet_factory.py diff --git a/posenet/base_model.py b/posenet/base_model.py index 26de306..79e2686 100644 --- a/posenet/base_model.py +++ b/posenet/base_model.py @@ -1,18 +1,34 @@ from abc import ABC, abstractmethod +import tensorflow as tf class BaseModel(ABC): - def __init__(self, output_stride): + def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride): self.output_stride = output_stride + self.sess = sess + self.input_tensor_name = input_tensor_name + self.output_tensors = [ + tf.sigmoid(sess.graph.get_tensor_by_name(output_tensor_names['heatmap']), 'heatmap'), # sigmoid!!! + sess.graph.get_tensor_by_name(output_tensor_names['offsets']), + sess.graph.get_tensor_by_name(output_tensor_names['displacement_fwd']), + sess.graph.get_tensor_by_name(output_tensor_names['displacement_bwd']) + ] - @abstractmethod - def preprocess_input(self): - pass + def valid_resolution(self, width, height): + # calculate closest smaller width and height that is divisible by the stride after subtracting 1 (for the bias?) + target_width = (int(width) // self.output_stride) * self.output_stride + 1 + target_height = (int(height) // self.output_stride) * self.output_stride + 1 + return target_width, target_height @abstractmethod - def name_output_results(self, graph): - return graph + def preprocess_input(self, image): + pass - def predict(self, nhwc_images): - return nhwc_images + def predict(self, image): + input_image, image_scale = self.preprocess_input(image) + heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result = self.sess.run( + self.output_tensors, + feed_dict={self.input_tensor_name: input_image} + ) + return heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale diff --git a/posenet/mobilenet.py b/posenet/mobilenet.py index a65939f..7c5e905 100644 --- a/posenet/mobilenet.py +++ b/posenet/mobilenet.py @@ -1,13 +1,20 @@ from posenet.base_model import BaseModel +import numpy as np +import cv2 class MobileNet(BaseModel): - def __init__(self, output_stride): - super().__init__(output_stride) + def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride): + super().__init__(sess, input_tensor_name, output_tensor_names, output_stride) - def preprocess_input(self): - return self + def preprocess_input(self, image): + target_width, target_height = self.valid_resolution(image.shape[1], image.shape[0]) + # the scale that can get us back to the original width and height: + scale = np.array([image.shape[0] / target_height, image.shape[1] / target_width]) + input_img = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR) + input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) # to RGB colors - def name_output_results(self, graph): - return graph + input_img = input_img * (2.0 / 255.0) - 1.0 # normalize to [-1,1] + input_img = input_img.reshape(1, target_height, target_width, 3) # NHWC + return input_img, scale diff --git a/posenet/posenet.py b/posenet/posenet.py new file mode 100644 index 0000000..48d3d30 --- /dev/null +++ b/posenet/posenet.py @@ -0,0 +1,21 @@ +from posenet.base_model import BaseModel + + +class PoseNet: + + def __init__(self, model: BaseModel): + self.model = model + + def estimate_multiple_poses(self, image): + heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale = \ + self.model.predict(image) + + return self + + def estimate_single_pose(self, image): + heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale = \ + self.model.predict(image) + + # poses = [{'nose': {'x': 0.0, 'y': 0.0, 'score': 0}}] + + return self diff --git a/posenet/posenet_factory.py b/posenet/posenet_factory.py new file mode 100644 index 0000000..de51812 --- /dev/null +++ b/posenet/posenet_factory.py @@ -0,0 +1,32 @@ +import tensorflow as tf +import os +import posenet.converter.tfjsdownload as tfjsdownload +import posenet.converter.tfjs2tf as tfjs2tf +from posenet.resnet import ResNet +from posenet.mobilenet import MobileNet +from posenet.posenet import PoseNet + + +def load_model(model, neuralnet, model_variant): + + model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) + model_path = model_cfg['tf_dir'] + if not os.path.exists(model_path): + print('Cannot find tf model path %s, converting from tfjs...' % model_path) + tfjs2tf.convert(model, neuralnet, model_variant) + assert os.path.exists(model_path) + + with tf.compat.v1.Session() as sess: + + sess.graph.as_default() + tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) + + output_tensor_names = model_cfg['output_tensors'] + input_tensor_name = model_cfg['input_tensors']['image'] + + if neuralnet == 'resnet50_v1': + net = ResNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride']) + else: + net = MobileNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride']) + + return PoseNet(net) diff --git a/posenet/resnet.py b/posenet/resnet.py index 71f94c5..b381703 100644 --- a/posenet/resnet.py +++ b/posenet/resnet.py @@ -1,13 +1,21 @@ from posenet.base_model import BaseModel +import numpy as np +import cv2 class ResNet(BaseModel): - def __init__(self, output_stride): - super().__init__(output_stride) + def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride): + super().__init__(sess, input_tensor_name, output_tensor_names, output_stride) + self.image_net_mean = [-123.15, -115.90, -103.06] - def preprocess_input(self): - return self + def preprocess_input(self, image): + target_width, target_height = self.valid_resolution(image.shape[1], image.shape[0]) + # the scale that can get us back to the original width and height: + scale = np.array([image.shape[0] / target_height, image.shape[1] / target_width]) + input_img = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR) + input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) # to RGB colors - def name_output_results(self, graph): - return graph + input_img = input_img + self.image_net_mean + input_img = input_img.reshape(1, target_height, target_width, 3) # NHWC + return input_img, scale From 2292300b7114113032421b3623fe6109a2d7c3d3 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Thu, 16 Jan 2020 18:41:30 +0100 Subject: [PATCH 13/28] Wiring the new design together. --- image_test.py | 51 ++++++++++++++++++++++++++++++++++++++ image_test_run.sh | 3 +++ posenet/base_model.py | 3 +++ posenet/posenet.py | 44 ++++++++++++++++++++++++++------ posenet/posenet_factory.py | 20 +++++++-------- 5 files changed, 103 insertions(+), 18 deletions(-) create mode 100644 image_test.py create mode 100755 image_test_run.sh diff --git a/image_test.py b/image_test.py new file mode 100644 index 0000000..c695c19 --- /dev/null +++ b/image_test.py @@ -0,0 +1,51 @@ +import tensorflow as tf +import cv2 +import time +import argparse +import os + +import posenet +from posenet.posenet_factory import load_model +import posenet.converter.tfjsdownload as tfjsdownload + +parser = argparse.ArgumentParser() +parser.add_argument('--model', type=int, default=101) +parser.add_argument('--scale_factor', type=float, default=1.0) +parser.add_argument('--notxt', action='store_true') +parser.add_argument('--image_dir', type=str, default='./images') +parser.add_argument('--output_dir', type=str, default='./output') +args = parser.parse_args() + + +def main(): + + print('Tensorflow version: %s' % tf.__version__) + assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" + + if args.output_dir: + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + + model = 'posenet' # posenet bodypix + neuralnet = 'mobilenet_v1_100' # mobilenet_v1_100 resnet50_v1 + model_variant = 'stride16' # stride16 stride32 + + posenet = load_model(model, neuralnet, model_variant) + + filenames = [f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg'))] + + start = time.time() + for f in filenames: + img = cv2.imread(f) + pose_scores, keypoint_scores, keypoint_coords = posenet.estimate_multiple_poses(img) + img_poses = posenet.draw_poses(img, pose_scores, keypoint_scores, keypoint_coords) + posenet.print_scores(img, pose_scores, keypoint_scores, keypoint_coords) + cv2.imwrite(os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), img_poses) + + print('Average FPS:', len(filenames) / (time.time() - start)) + + posenet.close() + + +if __name__ == "__main__": + main() diff --git a/image_test_run.sh b/image_test_run.sh new file mode 100755 index 0000000..5bd5689 --- /dev/null +++ b/image_test_run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +./docker_run.sh image_test.py --model 101 --image_dir ./images --output_dir ./output diff --git a/posenet/base_model.py b/posenet/base_model.py index 79e2686..58dc2d0 100644 --- a/posenet/base_model.py +++ b/posenet/base_model.py @@ -32,3 +32,6 @@ def predict(self, image): feed_dict={self.input_tensor_name: input_image} ) return heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale + + def close(self): + self.sess.close() diff --git a/posenet/posenet.py b/posenet/posenet.py index 48d3d30..3d0e9df 100644 --- a/posenet/posenet.py +++ b/posenet/posenet.py @@ -1,21 +1,49 @@ from posenet.base_model import BaseModel +import posenet class PoseNet: - def __init__(self, model: BaseModel): + def __init__(self, model: BaseModel, min_score=0.25): self.model = model + self.min_score = min_score - def estimate_multiple_poses(self, image): + def estimate_multiple_poses(self, image, max_pose_detections=10): heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale = \ self.model.predict(image) - return self + pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( + heatmap_result.squeeze(axis=0), + offsets_result.squeeze(axis=0), + displacement_fwd_result.squeeze(axis=0), + displacement_bwd_result.squeeze(axis=0), + output_stride=self.model.output_stride, + max_pose_detections=max_pose_detections, + min_pose_score=self.min_score) - def estimate_single_pose(self, image): - heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale = \ - self.model.predict(image) + keypoint_coords *= image_scale - # poses = [{'nose': {'x': 0.0, 'y': 0.0, 'score': 0}}] + return pose_scores, keypoint_scores, keypoint_coords - return self + def estimate_single_pose(self, image): + return self.estimate_multiple_poses(image, max_pose_detections=1) + + def draw_poses(self, image, pose_scores, keypoint_scores, keypoint_coords): + draw_image = posenet.draw_skel_and_kp( + image, pose_scores, keypoint_scores, keypoint_coords, + min_pose_score=self.min_score, min_part_score=self.min_score) + + return draw_image + + def print_scores(self, image_name, pose_scores, keypoint_scores, keypoint_coords): + print() + print("Results for image: %s" % image_name) + for pi in range(len(pose_scores)): + if pose_scores[pi] == 0.: + break + print('Pose #%d, score = %f' % (pi, pose_scores[pi])) + for ki, (s, c) in enumerate(zip(keypoint_scores[pi, :], keypoint_coords[pi, :, :])): + print('Keypoint %s, score = %f, coord = %s' % (posenet.PART_NAMES[ki], s, c)) + + def close(self): + self.model.close() diff --git a/posenet/posenet_factory.py b/posenet/posenet_factory.py index de51812..73ba179 100644 --- a/posenet/posenet_factory.py +++ b/posenet/posenet_factory.py @@ -16,17 +16,17 @@ def load_model(model, neuralnet, model_variant): tfjs2tf.convert(model, neuralnet, model_variant) assert os.path.exists(model_path) - with tf.compat.v1.Session() as sess: + sess = tf.compat.v1.Session() - sess.graph.as_default() - tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) + sess.graph.as_default() + tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) - output_tensor_names = model_cfg['output_tensors'] - input_tensor_name = model_cfg['input_tensors']['image'] + output_tensor_names = model_cfg['output_tensors'] + input_tensor_name = model_cfg['input_tensors']['image'] - if neuralnet == 'resnet50_v1': - net = ResNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride']) - else: - net = MobileNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride']) + if neuralnet == 'resnet50_v1': + net = ResNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride']) + else: + net = MobileNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride']) - return PoseNet(net) + return PoseNet(net) From bddd7f8fcaa877b1a7a5df4bd788c0e17ddccb72 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Fri, 17 Jan 2020 15:11:55 +0100 Subject: [PATCH 14/28] Towards TF2.0 --- docker_run.sh | 2 +- get_test_images_run.sh | 2 +- image_demo_run.sh | 2 +- image_test.py | 2 +- image_test_run.sh | 2 +- inspect_saved_model.sh | 6 +++++ posenet/base_model.py | 46 ++++++++++++++++++++++++------------ posenet/converter/config.py | 2 +- posenet/converter/tfjs2tf.py | 35 ++++++++++++++++++++++++++- posenet/mobilenet.py | 4 ++-- posenet/posenet.py | 8 +++---- posenet/posenet_factory.py | 33 +++++++++++++++++++++----- posenet/resnet.py | 6 +++-- 13 files changed, 114 insertions(+), 36 deletions(-) create mode 100755 inspect_saved_model.sh diff --git a/docker_run.sh b/docker_run.sh index f90e68b..c28bc8d 100755 --- a/docker_run.sh +++ b/docker_run.sh @@ -25,4 +25,4 @@ else gpu_opts="" fi -docker run $gpu_opts -it --rm -v $WORK:/work "$image" python "$@" +docker run $gpu_opts -it --rm -v $WORK:/work "$image" "$@" diff --git a/get_test_images_run.sh b/get_test_images_run.sh index 446801b..4242d30 100755 --- a/get_test_images_run.sh +++ b/get_test_images_run.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash -./docker_run.sh get_test_images.py +./docker_run.sh python get_test_images.py diff --git a/image_demo_run.sh b/image_demo_run.sh index 44b2cb2..50392f1 100755 --- a/image_demo_run.sh +++ b/image_demo_run.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash -./docker_run.sh image_demo.py --model 101 --image_dir ./images --output_dir ./output +./docker_run.sh python image_demo.py --model 101 --image_dir ./images --output_dir ./output diff --git a/image_test.py b/image_test.py index c695c19..55700b2 100644 --- a/image_test.py +++ b/image_test.py @@ -39,7 +39,7 @@ def main(): img = cv2.imread(f) pose_scores, keypoint_scores, keypoint_coords = posenet.estimate_multiple_poses(img) img_poses = posenet.draw_poses(img, pose_scores, keypoint_scores, keypoint_coords) - posenet.print_scores(img, pose_scores, keypoint_scores, keypoint_coords) + posenet.print_scores(f, pose_scores, keypoint_scores, keypoint_coords) cv2.imwrite(os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), img_poses) print('Average FPS:', len(filenames) / (time.time() - start)) diff --git a/image_test_run.sh b/image_test_run.sh index 5bd5689..b38627b 100755 --- a/image_test_run.sh +++ b/image_test_run.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash -./docker_run.sh image_test.py --model 101 --image_dir ./images --output_dir ./output +./docker_run.sh python image_test.py --model 101 --image_dir ./images --output_dir ./output diff --git a/inspect_saved_model.sh b/inspect_saved_model.sh new file mode 100755 index 0000000..539e69b --- /dev/null +++ b/inspect_saved_model.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +FOLDER=$1 + +# e.g.: $> ./inspect_saved_model.sh _tf_models/posenet/mobilenet_v1_100/stride16 +./docker_run.sh saved_model_cli show --dir "$FOLDER" --all diff --git a/posenet/base_model.py b/posenet/base_model.py index 58dc2d0..d69857b 100644 --- a/posenet/base_model.py +++ b/posenet/base_model.py @@ -4,16 +4,19 @@ class BaseModel(ABC): - def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride): + # keys for the output_tensor_names map + HEATMAP_KEY = "heatmap" + OFFSETS_KEY = "offsets" + DISPLACEMENT_FWD_KEY = "displacement_fwd" + DISPLACEMENT_BWD_KEY = "displacement_bwd" + + def __init__(self, model_function, output_tensor_names, output_stride): self.output_stride = output_stride - self.sess = sess - self.input_tensor_name = input_tensor_name - self.output_tensors = [ - tf.sigmoid(sess.graph.get_tensor_by_name(output_tensor_names['heatmap']), 'heatmap'), # sigmoid!!! - sess.graph.get_tensor_by_name(output_tensor_names['offsets']), - sess.graph.get_tensor_by_name(output_tensor_names['displacement_fwd']), - sess.graph.get_tensor_by_name(output_tensor_names['displacement_bwd']) - ] + self.output_tensor_names = output_tensor_names + self.model_function = model_function + # self.sess = sess + # self.input_tensor_name = input_tensor_name + # self.output_tensors = output_tensors def valid_resolution(self, width, height): # calculate closest smaller width and height that is divisible by the stride after subtracting 1 (for the bias?) @@ -27,11 +30,24 @@ def preprocess_input(self, image): def predict(self, image): input_image, image_scale = self.preprocess_input(image) - heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result = self.sess.run( - self.output_tensors, - feed_dict={self.input_tensor_name: input_image} - ) - return heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale + + input_image = tf.convert_to_tensor(input_image, dtype=tf.float32) + + result = self.model_function(input_image) + + heatmap_result = result[self.output_tensor_names[self.HEATMAP_KEY]] + offsets_result = result[self.output_tensor_names[self.OFFSETS_KEY]] + displacement_fwd_result = result[self.output_tensor_names[self.DISPLACEMENT_FWD_KEY]] + displacement_bwd_result = result[self.output_tensor_names[self.DISPLACEMENT_BWD_KEY]] + + + # self.sess.run( + # self.output_tensors, + # feed_dict={self.input_tensor_name: input_image} + # ) + + return tf.sigmoid(heatmap_result), offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale def close(self): - self.sess.close() + # self.sess.close() + return diff --git a/posenet/converter/config.py b/posenet/converter/config.py index a3b4736..4cac0bb 100644 --- a/posenet/converter/config.py +++ b/posenet/converter/config.py @@ -6,5 +6,5 @@ def load_config(config_name='config.yaml'): cfg_f = open(os.path.join(BASE_DIR, config_name), "r+") - cfg = yaml.load(cfg_f) + cfg = yaml.load(cfg_f, Loader=yaml.FullLoader) return cfg diff --git a/posenet/converter/tfjs2tf.py b/posenet/converter/tfjs2tf.py index 6025885..ce19686 100644 --- a/posenet/converter/tfjs2tf.py +++ b/posenet/converter/tfjs2tf.py @@ -4,13 +4,46 @@ import posenet.converter.tfjsdownload as tfjsdownload +def __tensor_info_def(sess, tensor_names): + signatures = {} + for tensor_name in tensor_names: + tensor = sess.graph.get_tensor_by_name(tensor_name) + tensor_info = tf.compat.v1.saved_model.build_tensor_info(tensor) + signatures[tensor_name] = tensor_info + return signatures + + def convert(model, neuralnet, model_variant): model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) model_file_path = os.path.join(model_cfg['tfjs_dir'], model_cfg['filename']) if not os.path.exists(model_file_path): print('Cannot find tfjs model path %s, downloading tfjs model...' % model_file_path) tfjsdownload.download_tfjs_model(model, neuralnet, model_variant) - tfjs.api.graph_model_to_saved_model(model_cfg['tfjs_dir'], model_cfg['tf_dir'], ['serve']) + + # 'graph_model_to_saved_model' doesn't store the signature for the model! + # tfjs.api.graph_model_to_saved_model(model_cfg['tfjs_dir'], model_cfg['tf_dir'], ['serve']) + # so we do it manually here: + + # see: https://www.programcreek.com/python/example/104885/tensorflow.python.saved_model.signature_def_utils.build_signature_def + graph = tfjs.api.load_graph_model(model_cfg['tfjs_dir']) + builder = tf.compat.v1.saved_model.Builder(model_cfg['tf_dir']) + + with tf.compat.v1.Session(graph=graph) as sess: + input_tensor_names = tfjs.util.get_input_tensors(graph) + output_tensor_names = tfjs.util.get_output_tensors(graph) + + signature_inputs = __tensor_info_def(sess, input_tensor_names) + signature_outputs = __tensor_info_def(sess, output_tensor_names) + + method_name = tf.compat.v1.saved_model.signature_constants.PREDICT_METHOD_NAME + signature_def = tf.compat.v1.saved_model.build_signature_def(inputs=signature_inputs, + outputs=signature_outputs, + method_name=method_name) + signature_map = {tf.compat.v1.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def} + builder.add_meta_graph_and_variables(sess=sess, + tags=['serve'], + signature_def_map=signature_map) + return builder.save() def list_tensors(model, neuralnet, model_variant): diff --git a/posenet/mobilenet.py b/posenet/mobilenet.py index 7c5e905..dfaf970 100644 --- a/posenet/mobilenet.py +++ b/posenet/mobilenet.py @@ -5,8 +5,8 @@ class MobileNet(BaseModel): - def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride): - super().__init__(sess, input_tensor_name, output_tensor_names, output_stride) + def __init__(self, model_function, output_tensor_names, output_stride): + super().__init__(model_function, output_tensor_names, output_stride) def preprocess_input(self, image): target_width, target_height = self.valid_resolution(image.shape[1], image.shape[0]) diff --git a/posenet/posenet.py b/posenet/posenet.py index 3d0e9df..0a90617 100644 --- a/posenet/posenet.py +++ b/posenet/posenet.py @@ -13,10 +13,10 @@ def estimate_multiple_poses(self, image, max_pose_detections=10): self.model.predict(image) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( - heatmap_result.squeeze(axis=0), - offsets_result.squeeze(axis=0), - displacement_fwd_result.squeeze(axis=0), - displacement_bwd_result.squeeze(axis=0), + heatmap_result.numpy().squeeze(axis=0), + offsets_result.numpy().squeeze(axis=0), + displacement_fwd_result.numpy().squeeze(axis=0), + displacement_bwd_result.numpy().squeeze(axis=0), output_stride=self.model.output_stride, max_pose_detections=max_pose_detections, min_pose_score=self.min_score) diff --git a/posenet/posenet_factory.py b/posenet/posenet_factory.py index 73ba179..248be96 100644 --- a/posenet/posenet_factory.py +++ b/posenet/posenet_factory.py @@ -16,17 +16,38 @@ def load_model(model, neuralnet, model_variant): tfjs2tf.convert(model, neuralnet, model_variant) assert os.path.exists(model_path) - sess = tf.compat.v1.Session() + # sess = tf.compat.v1.Session() + # output_tensors = load_tensors(sess, model_path, input_tensor_name, output_tensor_names) - sess.graph.as_default() - tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) + loaded_model = tf.saved_model.load(model_path) + print('type of model_function: %s ' % type(loaded_model)) + for sig in loaded_model.signatures.keys(): + print('key: %s' % sig) + model_function = loaded_model.signatures["serving_default"] + print(model_function.structured_outputs) output_tensor_names = model_cfg['output_tensors'] - input_tensor_name = model_cfg['input_tensors']['image'] if neuralnet == 'resnet50_v1': - net = ResNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride']) + net = ResNet(model_function, output_tensor_names, model_cfg['output_stride']) # sess, input_tensor_name, output_tensors, else: - net = MobileNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride']) + # net = MobileNet(sess, input_tensor_name, output_tensors, model_cfg['output_stride']) + net = MobileNet(model_function, output_tensor_names, model_cfg['output_stride']) return PoseNet(net) + + +def __unused_load_tensors(sess, model_path, input_tensor_name, output_tensor_names): + + sess.graph.as_default() + # tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) + tf.saved_model.load(model_path, ["serve"]) + + output_tensors = [ + tf.sigmoid(sess.graph.get_tensor_by_name(output_tensor_names['heatmap']), 'heatmap'), # sigmoid!!! + sess.graph.get_tensor_by_name(output_tensor_names['offsets']), + sess.graph.get_tensor_by_name(output_tensor_names['displacement_fwd']), + sess.graph.get_tensor_by_name(output_tensor_names['displacement_bwd']) + ] + + return output_tensors \ No newline at end of file diff --git a/posenet/resnet.py b/posenet/resnet.py index b381703..80be20a 100644 --- a/posenet/resnet.py +++ b/posenet/resnet.py @@ -5,8 +5,10 @@ class ResNet(BaseModel): - def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride): - super().__init__(sess, input_tensor_name, output_tensor_names, output_stride) + # def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride): + def __init__(self, model_function, output_tensor_names, output_stride): + super().__init__(model_function, output_tensor_names, output_stride) + # super().__init__(sess, input_tensor_name, output_tensor_names, output_stride) self.image_net_mean = [-123.15, -115.90, -103.06] def preprocess_input(self, image): From 2a62bd55c68d01553970e7c7861155942b1ea68f Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Fri, 17 Jan 2020 17:05:27 +0100 Subject: [PATCH 15/28] TF2.0 compatible version with posenet v2 and ResNet50 support ready and working. --- Dockerfile | 4 +- NOTICE.txt | 3 ++ README.md | 83 ++++++++++++++++++------------------ benchmark.py | 55 +++++++++--------------- image_demo.py | 74 ++++++++------------------------ image_test.py | 51 ---------------------- image_test_run.sh | 3 -- posenet/__init__.py | 1 - posenet/base_model.py | 9 ---- posenet/converter/tfjs2tf.py | 7 +-- posenet/model.py | 27 ------------ posenet/posenet_factory.py | 36 +++++----------- posenet/resnet.py | 2 - posenet/utils.py | 32 -------------- requirements.txt | 15 +++---- test_tfjs2tf.py | 43 ------------------- webcam_demo.py | 70 ++++++++++++------------------ 17 files changed, 133 insertions(+), 382 deletions(-) delete mode 100644 image_test.py delete mode 100755 image_test_run.sh delete mode 100644 posenet/model.py delete mode 100644 test_tfjs2tf.py diff --git a/Dockerfile b/Dockerfile index 127df8b..4943709 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # default image version, override using --build-arg IMAGE_VERSION=otherversion -ARG IMAGE_VERSION=nightly-py3-jupyter +ARG IMAGE_VERSION=2.1.0-py3-jupyter FROM tensorflow/tensorflow:$IMAGE_VERSION # The default version is the CPU version! # see: https://www.tensorflow.org/install/docker @@ -18,6 +18,8 @@ COPY requirements.txt /work/ WORKDIR /work # run pip install with the '--no-deps' argument, to avoid that tensorflowjs installs an old version of tensorflow! +# It also ensures that we know and controll the transitive dependencies (although the tensorflow docker image comes +# with a lot of packages pre-installed). RUN pip install -r requirements.txt --no-deps RUN git clone https://github.com/patlevin/tfjs-to-tf.git && \ diff --git a/NOTICE.txt b/NOTICE.txt index f102c3b..976960c 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,6 +1,9 @@ PoseNet Python Copyright 2018 Ross Wightman +PoseNet Python numerous refactorings +Copyright 2020 Peter Rigole + tfjs PoseNet weights and original JS code Copyright 2018 Google LLC. All Rights Reserved. (https://github.com/tensorflow/tfjs-models | Apache License 2.0) diff --git a/README.md b/README.md index 9dce001..30a1cdd 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,45 @@ ## PoseNet Python -This repository is a clone of rwightman/posenet-python that is under heavy refactoring to get it to work with the latest -tfjs graph serialization and to expand it with the ResNet50 network, all on TF2.0. Heavy cleanup is to be expected when -we get the basics running on all the latest models. Best look at the docker scripts to get things running. - -This repository contains a pure Python implementation (multi-pose only) of the Google TensorFlow.js Posenet model. For a (slightly faster) PyTorch implementation that followed from this, see (https://github.com/rwightman/posenet-pytorch) - -I first adapted the JS code more or less verbatim and found the performance was low so made some vectorized numpy/scipy version of a few key functions (named `_fast`). - -Further optimization is possible -* The base MobileNet models have a throughput of 200-300 fps on a GTX 1080 Ti (or better) -* The multi-pose post processing code brings this rate down significantly. With a fast CPU and a GTX 1080+: - * A literal translation of the JS post processing code dropped performance to approx 30fps - * My 'fast' post processing results in 90-110fps -* A Cython or pure C++ port would be even better... +This repository originates from rwightman/posenet-python and has been heavily refactored to + * make it run the posenet v2 networks + * get it to work with the latest tfjs graph serialization + * extend it with the ResNet50 network + * make the code run on TF2.0 + * get all code running in docker containers for ease of use and installation (no conda necessary) + +This repository contains a pure Python implementation (multi-pose only) of the Google TensorFlow.js Posenet model. +For a (slightly faster) PyTorch implementation that followed from this, +see (https://github.com/rwightman/posenet-pytorch) + ### Install -A suitable Python 3.x environment with a recent version of Tensorflow is required. - -Development and testing was done with Conda Python 3.6.8 and Tensorflow 1.12.0 on Linux. +A suitable Python 3.x environment with Tensorflow 2.x. -Windows 10 with the latest (as of 2019-01-19) 64-bit Python 3.7 Anaconda installer was also tested. +If you want to use the webcam demo, a pip version of opencv (`pip install opencv-python`) is required instead of +the conda version. Anaconda's default opencv does not include ffpmeg/VideoCapture support. Also, you may have to +force install version 3.4.x as 4.x has a broken drawKeypoints binding. -If you want to use the webcam demo, a pip version of opencv (`pip install opencv-python`) is required instead of the conda version. Anaconda's default opencv does not include ffpmeg/VideoCapture support. Also, you may have to force install version 3.4.x as 4.x has a broken drawKeypoints binding. - -A conda environment setup as below should suffice: -``` -conda install tensorflow-gpu scipy pyyaml python=3.6 -pip install opencv-python==3.4.5.20 - -``` +Have a look at the docker configuration for a quick setup. If you want conda, have a look at the `requirements.txt` +file to see what you should install. ### Using Docker -A convenient way to run this project is by building and running the docker image, because it has all the requirements built-in. The main -requirement is that you have a Linux machine with a GPU set up with docker, the nvidia host driver and the nvidia-docker toolkit. Once set -up, you can make as many images as you want with different depencencies without touching your host OS (or fiddling with conda). +A convenient way to run this project is by building and running the docker image, because it has all the requirements +built-in. +The GPU version is tested on a Linux machine. You need to install the nvidia host driver and the nvidia-docker toolkit. +Once set up, you can make as many images as you want with different dependencies without touching your host OS +(or fiddling with conda). ```bash ./docker_img_build.sh GPU +. ./exportGPU.sh ./get_test_images_run.sh ./image_demo_run.sh ``` -Some pointers to get you going on the Linux machine setup. Most links are based on Ubuntu, but other distributions should work fine as well. +Some pointers to get you going on the Linux machine setup. Most links are based on Ubuntu, but other distributions +should work fine as well. * [Install docker](https://docs.docker.com/install/linux/docker-ce/ubuntu/ ) * [Install the NVIDIA host driver](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu-installation) * remember to reboot here @@ -54,15 +49,16 @@ Some pointers to get you going on the Linux machine setup. Most links are based ### Usage -There are three demo apps in the root that utilize the PoseNet model. They are very basic and could definitely be improved. - -The first time these apps are run (or the library is used) model weights will be downloaded from the TensorFlow.js version and converted on the fly. +There are three demo apps in the root that utilize the PoseNet model. They are very basic and could definitely be +improved. -For all demos, the model can be specified with the '--model` argument by using its ordinal id (0-3) or integer depth multiplier (50, 75, 100, 101). The default is the 101 model. +The first time these apps are run (or the library is used) model weights will be downloaded from the TensorFlow.js +version and converted on the fly. #### image_demo.py -Image demo runs inference on an input folder of images and outputs those images with the keypoints and skeleton overlayed. +Image demo runs inference on an input folder of images and outputs those images with the keypoints and skeleton +overlayed. `python image_demo.py --model 101 --image_dir ./images --output_dir ./output` @@ -70,23 +66,26 @@ A folder of suitable test images can be downloaded by first running the `get_tes #### benchmark.py -A minimal performance benchmark based on image_demo. Images in `--image_dir` are pre-loaded and inference is run `--num_images` times with no drawing and no text output. +A minimal performance benchmark based on image_demo. Images in `--image_dir` are pre-loaded and inference is +run `--num_images` times with no drawing and no text output. #### webcam_demo.py -The webcam demo uses OpenCV to capture images from a connected webcam. The result is overlayed with the keypoints and skeletons and rendered to the screen. The default args for the webcam_demo assume device_id=0 for the camera and that 1280x720 resolution is possible. +The webcam demo uses OpenCV to capture images from a connected webcam. The result is overlayed with the keypoints and +skeletons and rendered to the screen. The default args for the webcam_demo assume device_id=0 for the camera and +that 1280x720 resolution is possible. ### Credits -The original model, weights, code, etc. was created by Google and can be found at https://github.com/tensorflow/tfjs-models/tree/master/posenet +The original model, weights, code, etc. was created by Google and can be found at +https://github.com/tensorflow/tfjs-models/tree/master/posenet -This port is initially created by Ross Wightman and is in no way related to Google. +This port is initially created by Ross Wightman and later upgraded by Peter Rigole and is in no way related to Google. -The Python conversion code that started me on my way was adapted from the CoreML port at https://github.com/infocom-tpo/PoseNet-CoreML +The Python conversion code that started me on my way was adapted from the CoreML port at +https://github.com/infocom-tpo/PoseNet-CoreML ### TODO -* Migration to Tensorflow 2.x -* Adding ResNet50 (PoseNet 2) * Performance improvements (especially edge loops in 'decode.py') * OpenGL rendering/drawing * Comment interfaces, tensor dimensions, etc diff --git a/benchmark.py b/benchmark.py index 985838c..29c70ec 100644 --- a/benchmark.py +++ b/benchmark.py @@ -1,10 +1,9 @@ import tensorflow as tf +import cv2 import time import argparse import os - -import posenet -import posenet.converter.tfjsdownload as tfjsdownload +from posenet.posenet_factory import load_model parser = argparse.ArgumentParser() @@ -16,41 +15,29 @@ def main(): + print('Tensorflow version: %s' % tf.__version__) + assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" + model = 'posenet' # posenet bodypix neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 model_variant = 'stride32' # stride16 stride32 - with tf.compat.v1.Session() as sess: - output_stride, model_outputs = posenet.load_tf_model(sess, model, neuralnet, model_variant) - num_images = args.num_images - - filenames = [ - f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg'))] - if len(filenames) > num_images: - filenames = filenames[:num_images] - - images = {f: posenet.read_imgfile(f, 1.0, output_stride)[0] for f in filenames} - - model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) - input_tensor_name = model_cfg['input_tensors']['image'] - - start = time.time() - for i in range(num_images): - heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( - model_outputs, - feed_dict={input_tensor_name: images[filenames[i % len(filenames)]]} - ) - - output = posenet.decode_multiple_poses( - heatmaps_result.squeeze(axis=0), - offsets_result.squeeze(axis=0), - displacement_fwd_result.squeeze(axis=0), - displacement_bwd_result.squeeze(axis=0), - output_stride=output_stride, - max_pose_detections=10, - min_pose_score=0.25) - - print('Average FPS:', num_images / (time.time() - start)) + posenet = load_model(model, neuralnet, model_variant) + + num_images = args.num_images + filenames = [ + f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg'))] + if len(filenames) > num_images: + filenames = filenames[:num_images] + + images = {f: cv2.imread(f) for f in filenames} + + start = time.time() + for i in range(num_images): + image = images[filenames[i % len(filenames)]] + posenet.estimate_multiple_poses(image) + + print('Average FPS:', num_images / (time.time() - start)) if __name__ == "__main__": diff --git a/image_demo.py b/image_demo.py index 2456a5e..1353959 100644 --- a/image_demo.py +++ b/image_demo.py @@ -3,10 +3,7 @@ import time import argparse import os - -import posenet -import posenet.converter.tfjsdownload as tfjsdownload - +from posenet.posenet_factory import load_model parser = argparse.ArgumentParser() parser.add_argument('--model', type=int, default=101) @@ -22,64 +19,29 @@ def main(): print('Tensorflow version: %s' % tf.__version__) assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" - model = 'posenet' # posenet bodypix - neuralnet = 'mobilenet_v1_100' # mobilenet_v1_100 resnet50_v1 - model_variant = 'stride16' # stride16 stride32 - - with tf.compat.v1.Session() as sess: - output_stride, model_outputs = posenet.load_tf_model(sess, model, neuralnet, model_variant) - - if args.output_dir: - if not os.path.exists(args.output_dir): - os.makedirs(args.output_dir) - - filenames = [ - f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg'))] - - start = time.time() - for f in filenames: - input_image, draw_image, output_scale = posenet.read_imgfile( - f, scale_factor=args.scale_factor, output_stride=output_stride) - - model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) - input_tensor_name = model_cfg['input_tensors']['image'] - - # ORDER OF THE FEATURES IS DEPENDENT ON THE config.yaml file output_tensors list!!! - heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( - model_outputs, - feed_dict={input_tensor_name: input_image} - ) + if args.output_dir: + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) - min_score = 0.25 - pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( - heatmap_result.squeeze(axis=0), - offsets_result.squeeze(axis=0), - displacement_fwd_result.squeeze(axis=0), - displacement_bwd_result.squeeze(axis=0), - output_stride=output_stride, - max_pose_detections=10, - min_pose_score=min_score) + model = 'posenet' # posenet bodypix + neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 + model_variant = 'stride32' # stride16 stride32 - keypoint_coords *= output_scale + posenet = load_model(model, neuralnet, model_variant) - if args.output_dir: - draw_image = posenet.draw_skel_and_kp( - draw_image, pose_scores, keypoint_scores, keypoint_coords, - min_pose_score=min_score, min_part_score=min_score) + filenames = [f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg'))] - cv2.imwrite(os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), draw_image) + start = time.time() + for f in filenames: + img = cv2.imread(f) + pose_scores, keypoint_scores, keypoint_coords = posenet.estimate_multiple_poses(img) + img_poses = posenet.draw_poses(img, pose_scores, keypoint_scores, keypoint_coords) + posenet.print_scores(f, pose_scores, keypoint_scores, keypoint_coords) + cv2.imwrite(os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), img_poses) - if not args.notxt: - print() - print("Results for image: %s" % f) - for pi in range(len(pose_scores)): - if pose_scores[pi] == 0.: - break - print('Pose #%d, score = %f' % (pi, pose_scores[pi])) - for ki, (s, c) in enumerate(zip(keypoint_scores[pi, :], keypoint_coords[pi, :, :])): - print('Keypoint %s, score = %f, coord = %s' % (posenet.PART_NAMES[ki], s, c)) + print('Average FPS:', len(filenames) / (time.time() - start)) - print('Average FPS:', len(filenames) / (time.time() - start)) + posenet.close() if __name__ == "__main__": diff --git a/image_test.py b/image_test.py deleted file mode 100644 index 55700b2..0000000 --- a/image_test.py +++ /dev/null @@ -1,51 +0,0 @@ -import tensorflow as tf -import cv2 -import time -import argparse -import os - -import posenet -from posenet.posenet_factory import load_model -import posenet.converter.tfjsdownload as tfjsdownload - -parser = argparse.ArgumentParser() -parser.add_argument('--model', type=int, default=101) -parser.add_argument('--scale_factor', type=float, default=1.0) -parser.add_argument('--notxt', action='store_true') -parser.add_argument('--image_dir', type=str, default='./images') -parser.add_argument('--output_dir', type=str, default='./output') -args = parser.parse_args() - - -def main(): - - print('Tensorflow version: %s' % tf.__version__) - assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" - - if args.output_dir: - if not os.path.exists(args.output_dir): - os.makedirs(args.output_dir) - - model = 'posenet' # posenet bodypix - neuralnet = 'mobilenet_v1_100' # mobilenet_v1_100 resnet50_v1 - model_variant = 'stride16' # stride16 stride32 - - posenet = load_model(model, neuralnet, model_variant) - - filenames = [f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg'))] - - start = time.time() - for f in filenames: - img = cv2.imread(f) - pose_scores, keypoint_scores, keypoint_coords = posenet.estimate_multiple_poses(img) - img_poses = posenet.draw_poses(img, pose_scores, keypoint_scores, keypoint_coords) - posenet.print_scores(f, pose_scores, keypoint_scores, keypoint_coords) - cv2.imwrite(os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), img_poses) - - print('Average FPS:', len(filenames) / (time.time() - start)) - - posenet.close() - - -if __name__ == "__main__": - main() diff --git a/image_test_run.sh b/image_test_run.sh deleted file mode 100755 index b38627b..0000000 --- a/image_test_run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -./docker_run.sh python image_test.py --model 101 --image_dir ./images --output_dir ./output diff --git a/posenet/__init__.py b/posenet/__init__.py index 4b132e8..e1c8ad6 100644 --- a/posenet/__init__.py +++ b/posenet/__init__.py @@ -1,4 +1,3 @@ from posenet.constants import * from posenet.decode_multi import decode_multiple_poses -from posenet.model import load_tf_model from posenet.utils import * diff --git a/posenet/base_model.py b/posenet/base_model.py index d69857b..0eb759b 100644 --- a/posenet/base_model.py +++ b/posenet/base_model.py @@ -14,9 +14,6 @@ def __init__(self, model_function, output_tensor_names, output_stride): self.output_stride = output_stride self.output_tensor_names = output_tensor_names self.model_function = model_function - # self.sess = sess - # self.input_tensor_name = input_tensor_name - # self.output_tensors = output_tensors def valid_resolution(self, width, height): # calculate closest smaller width and height that is divisible by the stride after subtracting 1 (for the bias?) @@ -40,12 +37,6 @@ def predict(self, image): displacement_fwd_result = result[self.output_tensor_names[self.DISPLACEMENT_FWD_KEY]] displacement_bwd_result = result[self.output_tensor_names[self.DISPLACEMENT_BWD_KEY]] - - # self.sess.run( - # self.output_tensors, - # feed_dict={self.input_tensor_name: input_image} - # ) - return tf.sigmoid(heatmap_result), offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale def close(self): diff --git a/posenet/converter/tfjs2tf.py b/posenet/converter/tfjs2tf.py index ce19686..9fd8e28 100644 --- a/posenet/converter/tfjs2tf.py +++ b/posenet/converter/tfjs2tf.py @@ -21,10 +21,11 @@ def convert(model, neuralnet, model_variant): tfjsdownload.download_tfjs_model(model, neuralnet, model_variant) # 'graph_model_to_saved_model' doesn't store the signature for the model! - # tfjs.api.graph_model_to_saved_model(model_cfg['tfjs_dir'], model_cfg['tf_dir'], ['serve']) - # so we do it manually here: + # tfjs.api.graph_model_to_saved_model(model_cfg['tfjs_dir'], model_cfg['tf_dir'], ['serve']) + # So we do it manually below. + # This link was a great help to do this: + # https://www.programcreek.com/python/example/104885/tensorflow.python.saved_model.signature_def_utils.build_signature_def - # see: https://www.programcreek.com/python/example/104885/tensorflow.python.saved_model.signature_def_utils.build_signature_def graph = tfjs.api.load_graph_model(model_cfg['tfjs_dir']) builder = tf.compat.v1.saved_model.Builder(model_cfg['tf_dir']) diff --git a/posenet/model.py b/posenet/model.py deleted file mode 100644 index ced1800..0000000 --- a/posenet/model.py +++ /dev/null @@ -1,27 +0,0 @@ -import tensorflow as tf -import os -import posenet.converter.tfjsdownload as tfjsdownload -import posenet.converter.tfjs2tf as tfjs2tf - - -def load_tf_model(sess, model, neuralnet, model_variant): - model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) - model_path = model_cfg['tf_dir'] - if not os.path.exists(model_path): - print('Cannot find tf model path %s, converting from tfjs...' % model_path) - tfjs2tf.convert(model, neuralnet, model_variant) - assert os.path.exists(model_path) - - sess.graph.as_default() - tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) - - output_tensor_map = model_cfg['output_tensors'] - - output_tensors = [ - tf.sigmoid(sess.graph.get_tensor_by_name(output_tensor_map['heatmap']), 'heatmap'), - sess.graph.get_tensor_by_name(output_tensor_map['offsets']), - sess.graph.get_tensor_by_name(output_tensor_map['displacement_fwd']), - sess.graph.get_tensor_by_name(output_tensor_map['displacement_bwd']) - ] - - return model_cfg['output_stride'], output_tensors diff --git a/posenet/posenet_factory.py b/posenet/posenet_factory.py index 248be96..48716e3 100644 --- a/posenet/posenet_factory.py +++ b/posenet/posenet_factory.py @@ -16,38 +16,22 @@ def load_model(model, neuralnet, model_variant): tfjs2tf.convert(model, neuralnet, model_variant) assert os.path.exists(model_path) - # sess = tf.compat.v1.Session() - # output_tensors = load_tensors(sess, model_path, input_tensor_name, output_tensor_names) - loaded_model = tf.saved_model.load(model_path) - print('type of model_function: %s ' % type(loaded_model)) + + signature_key = tf.compat.v1.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY + print('We use the signature key %s It should be in the keys list:' % signature_key) for sig in loaded_model.signatures.keys(): - print('key: %s' % sig) - model_function = loaded_model.signatures["serving_default"] - print(model_function.structured_outputs) + print('signature key: %s' % sig) + + model_function = loaded_model.signatures[signature_key] + print('model outputs: %s' % model_function.structured_outputs) output_tensor_names = model_cfg['output_tensors'] + output_stride = model_cfg['output_stride'] if neuralnet == 'resnet50_v1': - net = ResNet(model_function, output_tensor_names, model_cfg['output_stride']) # sess, input_tensor_name, output_tensors, + net = ResNet(model_function, output_tensor_names, output_stride) else: - # net = MobileNet(sess, input_tensor_name, output_tensors, model_cfg['output_stride']) - net = MobileNet(model_function, output_tensor_names, model_cfg['output_stride']) + net = MobileNet(model_function, output_tensor_names, output_stride) return PoseNet(net) - - -def __unused_load_tensors(sess, model_path, input_tensor_name, output_tensor_names): - - sess.graph.as_default() - # tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path) - tf.saved_model.load(model_path, ["serve"]) - - output_tensors = [ - tf.sigmoid(sess.graph.get_tensor_by_name(output_tensor_names['heatmap']), 'heatmap'), # sigmoid!!! - sess.graph.get_tensor_by_name(output_tensor_names['offsets']), - sess.graph.get_tensor_by_name(output_tensor_names['displacement_fwd']), - sess.graph.get_tensor_by_name(output_tensor_names['displacement_bwd']) - ] - - return output_tensors \ No newline at end of file diff --git a/posenet/resnet.py b/posenet/resnet.py index 80be20a..5e77b23 100644 --- a/posenet/resnet.py +++ b/posenet/resnet.py @@ -5,10 +5,8 @@ class ResNet(BaseModel): - # def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride): def __init__(self, model_function, output_tensor_names, output_stride): super().__init__(model_function, output_tensor_names, output_stride) - # super().__init__(sess, input_tensor_name, output_tensor_names, output_stride) self.image_net_mean = [-123.15, -115.90, -103.06] def preprocess_input(self, image): diff --git a/posenet/utils.py b/posenet/utils.py index db7cd3d..fd35fd0 100644 --- a/posenet/utils.py +++ b/posenet/utils.py @@ -3,38 +3,6 @@ import posenet.constants -def valid_resolution(width, height, output_stride=16): - # calculate closest smaller width and height that is divisible by the stride after subtracting 1 (for the bias?) - target_width = (int(width) // output_stride) * output_stride + 1 - target_height = (int(height) // output_stride) * output_stride + 1 - return target_width, target_height - - -def _process_input(source_img, scale_factor=1.0, output_stride=16): - target_width, target_height = valid_resolution( - source_img.shape[1] * scale_factor, source_img.shape[0] * scale_factor, output_stride=output_stride) - # the scale that can get us back to the original width and height: - scale = np.array([source_img.shape[0] / target_height, source_img.shape[1] / target_width]) - - input_img = cv2.resize(source_img, (target_width, target_height), interpolation=cv2.INTER_LINEAR) - input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) # to RGB colors - input_img = input_img * (2.0 / 255.0) - 1.0 # normalize to [-1,1] - input_img = input_img.reshape(1, target_height, target_width, 3) # NHWC - return input_img, source_img, scale - - -def read_cap(cap, scale_factor=1.0, output_stride=16): - res, img = cap.read() - if not res: - raise IOError("webcam failure") - return _process_input(img, scale_factor, output_stride) - - -def read_imgfile(path, scale_factor=1.0, output_stride=16): - img = cv2.imread(path) - return _process_input(img, scale_factor, output_stride) - - def draw_keypoints( img, instance_scores, keypoint_scores, keypoint_coords, min_pose_confidence=0.5, min_part_confidence=0.5): diff --git a/requirements.txt b/requirements.txt index 896393e..89e4e29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,8 @@ -# scipy==1.4.1 -# scipy==1.1.* -pyyaml==5.* -# pyyaml==3.* -opencv-python-headless==3.4.5.20 -# opencv-python==3.4.5.20 - -# tensorflowjs==1.3.2 +numpy==1.18.1 +tfjs_graph_converter==0.2.0 +opencv_python_headless==3.4.5.20 +scipy==1.4.1 tensorflowjs==1.4.0 -# tensorflow-hub is needed by tensorflowjs +tensorflow==2.1.0 +PyYAML==5.3 tensorflow-hub==0.7.0 \ No newline at end of file diff --git a/test_tfjs2tf.py b/test_tfjs2tf.py deleted file mode 100644 index 6e64179..0000000 --- a/test_tfjs2tf.py +++ /dev/null @@ -1,43 +0,0 @@ -import posenet.converter.tfjs2tf as tfjs2tf -import posenet.converter.tfjsdownload as tfjsdownload - - -def main(): - tfjsdownload.download_tfjs_model('bodypix', 'resnet50_v1', 'stride16') - tfjs2tf.convert('bodypix', 'resnet50_v1', 'stride16') - tfjsdownload.fix_model_file(tfjsdownload.model_config('bodypix', 'resnet50_v1', 'stride16')) - tfjs2tf.list_tensors('posenet', 'resnet50_v1', 'stride16') - tfjs2tf.list_tensors('bodypix', 'mobilenet_v1_100', 'stride16') - tfjs2tf.list_tensors('posenet', 'mobilenet_v1_100', 'stride16') - -# have a look at: https://github.com/tensorflow/tfjs/tree/master/tfjs-converter -# https://github.com/patlevin/tfjs-to-tf - -## BodyPix - -# see: https://stackoverflow.com/questions/58841355/bodypix-real-time-person-segmentation/59509874#59509874 -# -# https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float/model-stride16.json -# see weightsManifest.paths for the shard names: -# https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float/group1-shard1of23.bin -# ... -# https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float/group1-shard23of23.bin - - -# https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/mobilenet/float/100/model-stride16.json -# see weightsManifest.paths -# "group1-shard1of4.bin", -# "group1-shard2of4.bin", -# "group1-shard3of4.bin", -# "group1-shard4of4.bin" - - -## PoseNet - -# https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100/model-stride16.json -# https://storage.googleapis.com/tfjs-models/savedmodel/posenet/resnet50/float/model-stride16.json - - -# Old model format -# https://storage.googleapis.com/tfjs-models/weights/posenet/mobilenet_v1_101/manifest.json - diff --git a/webcam_demo.py b/webcam_demo.py index 10de46c..b46f9a5 100644 --- a/webcam_demo.py +++ b/webcam_demo.py @@ -3,8 +3,8 @@ import time import argparse -import posenet -import posenet.converter.tfjsdownload as tfjsdownload +from posenet.posenet_factory import load_model +from posenet.utils import draw_skel_and_kp parser = argparse.ArgumentParser() parser.add_argument('--model', type=int, default=101) @@ -18,58 +18,42 @@ def main(): + print('Tensorflow version: %s' % tf.__version__) + assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" + model = 'posenet' # posenet bodypix neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 model_variant = 'stride32' # stride16 stride32 - with tf.compat.v1.Session() as sess: - - output_stride, model_outputs = posenet.load_tf_model(sess, model, neuralnet, model_variant) - - if args.file is not None: - cap = cv2.VideoCapture(args.file) - else: - cap = cv2.VideoCapture(args.cam_id) - cap.set(3, args.cam_width) - cap.set(4, args.cam_height) - - start = time.time() - frame_count = 0 - - model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) - input_tensor_name = model_cfg['input_tensors']['image'] + posenet = load_model(model, neuralnet, model_variant) - while True: - input_image, display_image, output_scale = posenet.read_cap( - cap, scale_factor=args.scale_factor, output_stride=output_stride) + if args.file is not None: + cap = cv2.VideoCapture(args.file) + else: + cap = cv2.VideoCapture(args.cam_id) + cap.set(3, args.cam_width) + cap.set(4, args.cam_height) - heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( - model_outputs, - feed_dict={input_tensor_name: input_image} - ) + start = time.time() + frame_count = 0 - pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( - heatmaps_result.squeeze(axis=0), - offsets_result.squeeze(axis=0), - displacement_fwd_result.squeeze(axis=0), - displacement_bwd_result.squeeze(axis=0), - output_stride=output_stride, - max_pose_detections=10, - min_pose_score=0.15) + while True: + res, img = cap.read() + if not res: + raise IOError("webcam failure") - keypoint_coords *= output_scale + pose_scores, keypoint_scores, keypoint_coords = posenet.estimate_multiple_poses(img) - # TODO this isn't particularly fast, use GL for drawing and display someday... - overlay_image = posenet.draw_skel_and_kp( - display_image, pose_scores, keypoint_scores, keypoint_coords, - min_pose_score=0.15, min_part_score=0.1) + overlay_image = draw_skel_and_kp( + img, pose_scores, keypoint_scores, keypoint_coords, + min_pose_score=0.15, min_part_score=0.1) - cv2.imshow('posenet', overlay_image) - frame_count += 1 - if cv2.waitKey(1) & 0xFF == ord('q'): - break + cv2.imshow('posenet', overlay_image) + frame_count += 1 + if cv2.waitKey(1) & 0xFF == ord('q'): + break - print('Average FPS: ', frame_count / (time.time() - start)) + print('Average FPS: ', frame_count / (time.time() - start)) if __name__ == "__main__": From 98b79358ad71592f5c8563d5061dc05e3f4acfbb Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Fri, 17 Jan 2020 17:10:38 +0100 Subject: [PATCH 16/28] We don't have to close the model any longer, sessions are managed by TF2 --- README.md | 5 +++-- image_demo.py | 2 -- posenet/base_model.py | 4 ---- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 30a1cdd..651af3c 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ ## PoseNet Python -This repository originates from rwightman/posenet-python and has been heavily refactored to +This repository originates from [rwightman/posenet-python](https://github.com/rwightman/posenet-python) and has been +heavily refactored to: * make it run the posenet v2 networks * get it to work with the latest tfjs graph serialization * extend it with the ResNet50 network - * make the code run on TF2.0 + * make the code run on TF 2.x * get all code running in docker containers for ease of use and installation (no conda necessary) This repository contains a pure Python implementation (multi-pose only) of the Google TensorFlow.js Posenet model. diff --git a/image_demo.py b/image_demo.py index 1353959..bd1f44b 100644 --- a/image_demo.py +++ b/image_demo.py @@ -41,8 +41,6 @@ def main(): print('Average FPS:', len(filenames) / (time.time() - start)) - posenet.close() - if __name__ == "__main__": main() diff --git a/posenet/base_model.py b/posenet/base_model.py index 0eb759b..020e900 100644 --- a/posenet/base_model.py +++ b/posenet/base_model.py @@ -38,7 +38,3 @@ def predict(self, image): displacement_bwd_result = result[self.output_tensor_names[self.DISPLACEMENT_BWD_KEY]] return tf.sigmoid(heatmap_result), offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale - - def close(self): - # self.sess.close() - return From e8b8d541629345017ab942ca2c9394b1ef69969f Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Fri, 17 Jan 2020 17:40:21 +0100 Subject: [PATCH 17/28] Cleanup. --- README.md | 3 ++ posenet/converter/config.yaml | 1 - posenet/converter/tfjs2tf.py | 17 ---------- posenet/converter/tfjsdownload.py | 52 ++----------------------------- 4 files changed, 6 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 651af3c..dbec853 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,9 @@ The GPU version is tested on a Linux machine. You need to install the nvidia hos Once set up, you can make as many images as you want with different dependencies without touching your host OS (or fiddling with conda). +If you just want to test this code, you can run everything on a CPU just as well. You still get 8fps on mobilenet and +4fps on resnet50. Replace `GPU` below with `CPU` to test on a CPU. + ```bash ./docker_img_build.sh GPU . ./exportGPU.sh diff --git a/posenet/converter/config.yaml b/posenet/converter/config.yaml index c951efc..5f63795 100644 --- a/posenet/converter/config.yaml +++ b/posenet/converter/config.yaml @@ -70,4 +70,3 @@ models: offsets: 'MobilenetV1/offset_2/BiasAdd:0' displacement_fwd: 'MobilenetV1/displacement_fwd_2/BiasAdd:0' displacement_bwd: 'MobilenetV1/displacement_bwd_2/BiasAdd:0' - diff --git a/posenet/converter/tfjs2tf.py b/posenet/converter/tfjs2tf.py index 9fd8e28..90939b4 100644 --- a/posenet/converter/tfjs2tf.py +++ b/posenet/converter/tfjs2tf.py @@ -45,20 +45,3 @@ def convert(model, neuralnet, model_variant): tags=['serve'], signature_def_map=signature_map) return builder.save() - - -def list_tensors(model, neuralnet, model_variant): - model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) - graph = tfjs.api.load_graph_model(model_cfg['tfjs_dir']) - with tf.compat.v1.Session(graph=graph) as sess: - # the module provides some helpers for querying model properties - input_tensor_names = tfjs.util.get_input_tensors(graph) - output_tensor_names = tfjs.util.get_output_tensors(graph) - - print('input tensors:') - for it in input_tensor_names: - print(it) - print('--') - print('output tensors:') - for ot in output_tensor_names: - print(ot) diff --git a/posenet/converter/tfjsdownload.py b/posenet/converter/tfjsdownload.py index f0f55ed..1e0049b 100644 --- a/posenet/converter/tfjsdownload.py +++ b/posenet/converter/tfjsdownload.py @@ -30,57 +30,8 @@ def model_config(model, neuralnet, model_variant): } -def _fix_dilations(node, data_format): - attr_key = tfjs_converter_common.TFJS_NODE_ATTR_KEY - dilations_key = common.TFJS_DILATIONS_KEY - - if attr_key in node and dilations_key in node[attr_key]: - dilations = node[attr_key][dilations_key] - values = dilations['list']['i'] - if len(values) == 4 and (int(values[0]) > 1 or int(values[1]) > 1) and int(values[2]) == 1 and int(values[3]) == 1: - if data_format == common.TF_NHWC: - dilation_h = values[0] - dilation_w = values[1] - values[0] = '1' - values[1] = dilation_h - values[2] = dilation_w - values[3] = '1' - if data_format == common.TF_NCHW: - dilation_h = values[0] - dilation_w = values[1] - values[0] = '1' - values[1] = '1' - values[2] = dilation_h - values[3] = dilation_w - - -def _fix_model(json_model_def, data_format): - """ - Input shapes are usually NHWC or NCHW and the corresponding dilations allowed are [1,x,y,1] and [1,1,x,y]. - Some networks, like ResNet50_v1 have dilations for their Conv2D operations that are [2,2,1,1] in the model json - file. This representation is wrong and should be [1,2,2,1] for NHWC input and [1,1,2,2] for NCHW input. - - :param data_format: - :param json_model_def: The JSON model definition. - :return: The fixed JSON model definition. - """ - nodes = json_model_def[tfjs_common.ARTIFACT_MODEL_TOPOLOGY_KEY][tfjs_converter_common.TFJS_NODE_KEY] - for node in nodes: - if common.TFJS_OP_KEY in node and node[common.TFJS_OP_KEY] == common.TFJS_CONV2D_KEY: - _fix_dilations(node, data_format) - - return json_model_def - - def fix_model_file(model_cfg): model_file_path = os.path.join(model_cfg['tfjs_dir'], model_cfg['filename']) - with open(model_file_path, 'r') as f: - json_model_def = json.load(f) - - json_model_def = _fix_model(json_model_def, model_cfg['data_format']) - - with open(model_file_path, 'w') as f: - json.dump(json_model_def, f) if not model_cfg['filename'] == 'model.json': # The expected filename for the model json file is 'model.json'. @@ -88,6 +39,9 @@ def fix_model_file(model_cfg): normalized_model_json_file = os.path.join(model_cfg['tfjs_dir'], 'model.json') shutil.copyfile(model_file_path, normalized_model_json_file) + with open(model_file_path, 'r') as f: + json_model_def = json.load(f) + return json_model_def From d9aa09036fdfb40e5e9ec5328e7b4c933e8639a1 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sun, 19 Jan 2020 18:13:32 +0100 Subject: [PATCH 18/28] Refactoring the configuration setup. --- benchmark.py | 9 +- image_demo.py | 9 +- posenet/__init__.py | 1 + posenet/converter/config.py | 85 ++++++++++++++++-- posenet/converter/config.yaml | 143 +++++++++++++++--------------- posenet/converter/tfjs2tf.py | 6 +- posenet/converter/tfjsdownload.py | 32 +------ posenet/posenet_factory.py | 16 ++-- webcam_demo.py | 9 +- 9 files changed, 185 insertions(+), 125 deletions(-) diff --git a/benchmark.py b/benchmark.py index 29c70ec..1281756 100644 --- a/benchmark.py +++ b/benchmark.py @@ -18,11 +18,12 @@ def main(): print('Tensorflow version: %s' % tf.__version__) assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" - model = 'posenet' # posenet bodypix - neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 - model_variant = 'stride32' # stride16 stride32 + model = 'resnet50' # mobilenet resnet50 + stride = 32 # 8, 16, 32 + quant_bytes = 4 # float + multiplier = 1.0 # only for mobilenet - posenet = load_model(model, neuralnet, model_variant) + posenet = load_model(model, stride, quant_bytes, multiplier) num_images = args.num_images filenames = [ diff --git a/image_demo.py b/image_demo.py index bd1f44b..b600526 100644 --- a/image_demo.py +++ b/image_demo.py @@ -23,11 +23,12 @@ def main(): if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) - model = 'posenet' # posenet bodypix - neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 - model_variant = 'stride32' # stride16 stride32 + model = 'resnet50' # mobilenet resnet50 + stride = 32 # 8, 16, 32 (max 16 for mobilenet) + quant_bytes = 4 # float + multiplier = 1.0 # only for mobilenet - posenet = load_model(model, neuralnet, model_variant) + posenet = load_model(model, stride, quant_bytes, multiplier) filenames = [f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg'))] diff --git a/posenet/__init__.py b/posenet/__init__.py index e1c8ad6..66403a1 100644 --- a/posenet/__init__.py +++ b/posenet/__init__.py @@ -1,3 +1,4 @@ from posenet.constants import * from posenet.decode_multi import decode_multiple_poses from posenet.utils import * +from posenet import converter diff --git a/posenet/converter/config.py b/posenet/converter/config.py index 4cac0bb..8e9a819 100644 --- a/posenet/converter/config.py +++ b/posenet/converter/config.py @@ -1,10 +1,85 @@ -import yaml import os BASE_DIR = os.path.dirname(__file__) +TFJS_MODEL_DIR = './_tfjs_models' +TF_MODEL_DIR = './_tf_models' +MOBILENET_BASE_URL = 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/' +RESNET50_BASE_URL = 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/resnet50/' -def load_config(config_name='config.yaml'): - cfg_f = open(os.path.join(BASE_DIR, config_name), "r+") - cfg = yaml.load(cfg_f, Loader=yaml.FullLoader) - return cfg +POSENET_ARCHITECTURE = 'posenet' + +RESNET50_MODEL = 'resnet50' +MOBILENET_MODEL = 'mobilenet' + + +def bodypix_resnet50_config(stride, quant_bytes=4): + + graph_json = 'model-stride' + str(stride) + '.json' + + # quantBytes = 4 corresponding to the non - quantized full - precision checkpoints. + if quant_bytes == 4: + base_url = RESNET50_BASE_URL + 'float' + model_dir = RESNET50_MODEL + '_float' + else: + base_url = RESNET50_BASE_URL + 'quant' + str(quant_bytes) + '/' + model_dir = RESNET50_MODEL + '_quant' + str(quant_bytes) + + stride_dir = 'stride' + str(stride) + + return { + 'base_url': base_url, + 'filename': graph_json, + 'output_stride': stride, + 'data_format': 'NHWC', + 'input_tensors': { + 'image': 'sub_2:0' + }, + 'output_tensors': { + 'heatmap': 'float_heatmaps:0', + 'offsets': 'float_short_offsets:0', + 'displacement_fwd': 'resnet_v1_50/displacement_fwd_2/BiasAdd:0', + 'displacement_bwd': 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' + }, + 'tfjs_dir': os.path.join(TFJS_MODEL_DIR, POSENET_ARCHITECTURE, model_dir, stride_dir), + 'tf_dir': os.path.join(TF_MODEL_DIR, POSENET_ARCHITECTURE, model_dir, stride_dir) + } + + +def bodypix_mobilenet_config(stride, quant_bytes=4, multiplier=1.0): + + graph_json = 'model-stride' + str(stride) + '.json' + + multiplier_map = { + 1.0: "100", + 0.75: "075", + 0.5: "050" + } + + # quantBytes = 4 corresponding to the non - quantized full - precision checkpoints. + if quant_bytes == 4: + base_url = MOBILENET_BASE_URL + 'float/' + multiplier_map[multiplier] + '/' + model_dir = MOBILENET_MODEL + '_float_' + multiplier_map[multiplier] + else: + base_url = MOBILENET_BASE_URL + 'quant' + str(quant_bytes) + '/' + multiplier_map[multiplier] + '/' + model_dir = MOBILENET_MODEL + '_quant' + str(quant_bytes) + '_' + multiplier_map[multiplier] + + stride_dir = 'stride' + str(stride) + + return { + 'base_url': base_url, + 'filename': graph_json, + 'output_stride': stride, + 'data_format': 'NHWC', + 'input_tensors': { + 'image': 'sub_2:0' + }, + 'output_tensors': { + 'heatmap': 'MobilenetV1/heatmap_2/BiasAdd:0', + 'offsets': 'MobilenetV1/offset_2/BiasAdd:0', + 'displacement_fwd': 'MobilenetV1/displacement_fwd_2/BiasAdd:0', + 'displacement_bwd': 'MobilenetV1/displacement_bwd_2/BiasAdd:0' + }, + 'tfjs_dir': os.path.join(TFJS_MODEL_DIR, POSENET_ARCHITECTURE, model_dir, stride_dir), + 'tf_dir': os.path.join(TF_MODEL_DIR, POSENET_ARCHITECTURE, model_dir, stride_dir) + } diff --git a/posenet/converter/config.yaml b/posenet/converter/config.yaml index 5f63795..065df8f 100644 --- a/posenet/converter/config.yaml +++ b/posenet/converter/config.yaml @@ -1,72 +1,73 @@ +# This config file is no longer used. +# It is left in place as a reference for future integration of the bodypix architecture. models: - tfjs: - bodypix: - resnet50_v1: - base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float' - model_variant: - stride16: - filename: 'model-stride16.json' - output_stride: 16 - data_format: 'NHWC' - input_tensors: - image: 'sub_2:0' - output_tensors: - heatmap: 'float_heatmaps:0' - offsets: 'float_short_offsets:0' - displacement_fwd: 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' - displacement_bwd: 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' - part_heatmap: 'float_part_heatmaps:0' - part_offsets: 'float_part_offsets:0' - long_offsets: 'float_long_offsets:0' - segments: 'float_segments:0' - mobilenet_v1_100: - base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/mobilenet/float/100' - model_variant: - stride16: - filename: 'model-stride16.json' - output_stride: 16 - data_format: 'NHWC' - input_tensors: - image: 'sub_2:0' - output_tensors: - heatmap: 'float_heatmaps:0' - offsets: 'float_short_offsets:0' - displacement_fwd: 'MobilenetV1/displacement_fwd_2/BiasAdd:0' - displacement_bwd: 'MobilenetV1/displacement_bwd_2/BiasAdd:0' - part_heatmap: 'float_part_heatmaps:0' - part_offsets: 'float_part_offsets:0' - long_offsets: 'float_long_offsets:0' - segments: 'float_segments:0' - posenet: - resnet50_v1: - base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/resnet50/float' - model_variant: - stride16: - filename: 'model-stride16.json' - output_stride: 16 - data_format: 'NHWC' - stride32: - filename: 'model-stride32.json' - output_stride: 32 - data_format: 'NHWC' - input_tensors: - image: 'sub_2:0' - output_tensors: - heatmap: 'float_heatmaps:0' - offsets: 'float_short_offsets:0' - displacement_fwd: 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' - displacement_bwd: 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' - mobilenet_v1_100: - base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100' - model_variant: - stride16: - filename: 'model-stride16.json' - output_stride: 16 - data_format: 'NHWC' - input_tensors: - image: 'sub_2:0' - output_tensors: - heatmap: 'MobilenetV1/heatmap_2/BiasAdd:0' - offsets: 'MobilenetV1/offset_2/BiasAdd:0' - displacement_fwd: 'MobilenetV1/displacement_fwd_2/BiasAdd:0' - displacement_bwd: 'MobilenetV1/displacement_bwd_2/BiasAdd:0' + bodypix: + resnet50_v1: + base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/resnet50/float' + model_variant: + stride16: + filename: 'model-stride16.json' + output_stride: 16 + data_format: 'NHWC' + input_tensors: + image: 'sub_2:0' + output_tensors: + heatmap: 'float_heatmaps:0' + offsets: 'float_short_offsets:0' + displacement_fwd: 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' + displacement_bwd: 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' + part_heatmap: 'float_part_heatmaps:0' + part_offsets: 'float_part_offsets:0' + long_offsets: 'float_long_offsets:0' + segments: 'float_segments:0' + mobilenet_v1_100: + base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/bodypix/mobilenet/float/100' + model_variant: + stride16: + filename: 'model-stride16.json' + output_stride: 16 + data_format: 'NHWC' + input_tensors: + image: 'sub_2:0' + output_tensors: + heatmap: 'float_heatmaps:0' + offsets: 'float_short_offsets:0' + displacement_fwd: 'MobilenetV1/displacement_fwd_2/BiasAdd:0' + displacement_bwd: 'MobilenetV1/displacement_bwd_2/BiasAdd:0' + part_heatmap: 'float_part_heatmaps:0' + part_offsets: 'float_part_offsets:0' + long_offsets: 'float_long_offsets:0' + segments: 'float_segments:0' + posenet: + resnet50_v1: + base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/resnet50/float' + model_variant: + stride16: + filename: 'model-stride16.json' + output_stride: 16 + data_format: 'NHWC' + stride32: + filename: 'model-stride32.json' + output_stride: 32 + data_format: 'NHWC' + input_tensors: + image: 'sub_2:0' + output_tensors: + heatmap: 'float_heatmaps:0' + offsets: 'float_short_offsets:0' + displacement_fwd: 'resnet_v1_50/displacement_fwd_2/BiasAdd:0' + displacement_bwd: 'resnet_v1_50/displacement_bwd_2/BiasAdd:0' + mobilenet_v1_100: + base_url: 'https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100' + model_variant: + stride16: + filename: 'model-stride16.json' + output_stride: 16 + data_format: 'NHWC' + input_tensors: + image: 'sub_2:0' + output_tensors: + heatmap: 'MobilenetV1/heatmap_2/BiasAdd:0' + offsets: 'MobilenetV1/offset_2/BiasAdd:0' + displacement_fwd: 'MobilenetV1/displacement_fwd_2/BiasAdd:0' + displacement_bwd: 'MobilenetV1/displacement_bwd_2/BiasAdd:0' diff --git a/posenet/converter/tfjs2tf.py b/posenet/converter/tfjs2tf.py index 90939b4..422734a 100644 --- a/posenet/converter/tfjs2tf.py +++ b/posenet/converter/tfjs2tf.py @@ -1,6 +1,7 @@ import os import tensorflow as tf import tfjs_graph_converter as tfjs +import posenet.converter.config as config import posenet.converter.tfjsdownload as tfjsdownload @@ -13,12 +14,11 @@ def __tensor_info_def(sess, tensor_names): return signatures -def convert(model, neuralnet, model_variant): - model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) +def convert(model_cfg): model_file_path = os.path.join(model_cfg['tfjs_dir'], model_cfg['filename']) if not os.path.exists(model_file_path): print('Cannot find tfjs model path %s, downloading tfjs model...' % model_file_path) - tfjsdownload.download_tfjs_model(model, neuralnet, model_variant) + tfjsdownload.download_tfjs_model(model_cfg) # 'graph_model_to_saved_model' doesn't store the signature for the model! # tfjs.api.graph_model_to_saved_model(model_cfg['tfjs_dir'], model_cfg['tf_dir'], ['serve']) diff --git a/posenet/converter/tfjsdownload.py b/posenet/converter/tfjsdownload.py index 1e0049b..34ff3fc 100644 --- a/posenet/converter/tfjsdownload.py +++ b/posenet/converter/tfjsdownload.py @@ -4,30 +4,7 @@ import zlib import os import shutil -import tensorflowjs.converters.common as tfjs_common -import tfjs_graph_converter.common as tfjs_converter_common -import posenet.converter.common as common - -from posenet.converter.config import load_config - -TFJS_MODEL_DIR = './_tfjs_models' -TF_MODEL_DIR = './_tf_models' - - -def model_config(model, neuralnet, model_variant): - config = load_config() - tfjs_models = config['models']['tfjs'] - model_cfg = tfjs_models[model][neuralnet] - return { - 'base_url': model_cfg['base_url'], - 'filename': model_cfg['model_variant'][model_variant]['filename'], - 'output_stride': model_cfg['model_variant'][model_variant]['output_stride'], - 'data_format': model_cfg['model_variant'][model_variant]['data_format'], - 'input_tensors': model_cfg['input_tensors'], - 'output_tensors': model_cfg['output_tensors'], - 'tfjs_dir': os.path.join(TFJS_MODEL_DIR, model, neuralnet, model_variant), - 'tf_dir': os.path.join(TF_MODEL_DIR, model, neuralnet, model_variant) - } +import posenet.converter.config as config def fix_model_file(model_cfg): @@ -60,15 +37,12 @@ def download_single_file(base_url, filename, save_dir): f.write(data) -def download_tfjs_model(model, neuralnet, model_variant): +def download_tfjs_model(model_cfg): """ Download a tfjs model with saved weights. - :param model: The model, e.g. 'bodypix' - :param neuralnet: The neural net used, e.g. 'resnet50' - :param model_variant: The reference to the model file, e.g. 'stride16' + :param model_cfg: The model configuration """ - model_cfg = model_config(model, neuralnet, model_variant) model_file_path = os.path.join(model_cfg['tfjs_dir'], model_cfg['filename']) if os.path.exists(model_file_path): print('Model file already exists: %s...' % model_file_path) diff --git a/posenet/posenet_factory.py b/posenet/posenet_factory.py index 48716e3..8b5458c 100644 --- a/posenet/posenet_factory.py +++ b/posenet/posenet_factory.py @@ -1,19 +1,25 @@ import tensorflow as tf import os -import posenet.converter.tfjsdownload as tfjsdownload +import posenet.converter.config as config import posenet.converter.tfjs2tf as tfjs2tf from posenet.resnet import ResNet from posenet.mobilenet import MobileNet from posenet.posenet import PoseNet -def load_model(model, neuralnet, model_variant): +def load_model(model, stride, quant_bytes=4, multiplier=1.0): + + if model == config.RESNET50_MODEL: + model_cfg = config.bodypix_resnet50_config(stride, quant_bytes) + print('Loading ResNet50 model') + else: + model_cfg = config.bodypix_mobilenet_config(stride, quant_bytes, multiplier) + print('Loading MobileNet model') - model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant) model_path = model_cfg['tf_dir'] if not os.path.exists(model_path): print('Cannot find tf model path %s, converting from tfjs...' % model_path) - tfjs2tf.convert(model, neuralnet, model_variant) + tfjs2tf.convert(model_cfg) assert os.path.exists(model_path) loaded_model = tf.saved_model.load(model_path) @@ -29,7 +35,7 @@ def load_model(model, neuralnet, model_variant): output_tensor_names = model_cfg['output_tensors'] output_stride = model_cfg['output_stride'] - if neuralnet == 'resnet50_v1': + if model == config.RESNET50_MODEL: net = ResNet(model_function, output_tensor_names, output_stride) else: net = MobileNet(model_function, output_tensor_names, output_stride) diff --git a/webcam_demo.py b/webcam_demo.py index b46f9a5..9125913 100644 --- a/webcam_demo.py +++ b/webcam_demo.py @@ -21,11 +21,12 @@ def main(): print('Tensorflow version: %s' % tf.__version__) assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" - model = 'posenet' # posenet bodypix - neuralnet = 'resnet50_v1' # mobilenet_v1_100 resnet50_v1 - model_variant = 'stride32' # stride16 stride32 + model = 'resnet50' # mobilenet resnet50 + stride = 32 # 8, 16, 32 + quant_bytes = 4 # float + multiplier = 1.0 # only for mobilenet - posenet = load_model(model, neuralnet, model_variant) + posenet = load_model(model, stride, quant_bytes, multiplier) if args.file is not None: cap = cv2.VideoCapture(args.file) From b43e7e4c22f28571c96c7d3c2fbd5cefba6aea23 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sun, 19 Jan 2020 18:24:33 +0100 Subject: [PATCH 19/28] Some extra installation tips. --- README.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dbec853..dca20ff 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,25 @@ see (https://github.com/rwightman/posenet-pytorch) ### Install -A suitable Python 3.x environment with Tensorflow 2.x. +A suitable Python 3.x environment with Tensorflow 2.x. For a quick setup, use docker. If you want to use the webcam demo, a pip version of opencv (`pip install opencv-python`) is required instead of the conda version. Anaconda's default opencv does not include ffpmeg/VideoCapture support. Also, you may have to force install version 3.4.x as 4.x has a broken drawKeypoints binding. Have a look at the docker configuration for a quick setup. If you want conda, have a look at the `requirements.txt` -file to see what you should install. +file to see what you should install. Know that we rely on https://github.com/patlevin/tfjs-to-tf for +converting the tensorflow.js serialization to the tensorflow saved model. So you have to install this package: + +```bash +git clone https://github.com/patlevin/tfjs-to-tf.git +cd tfjs-to-tf +pip install . --no-deps +``` + +Use the `--no-deps` flag to prevent tfjs-to-tf from installing Tensorflow 1.x as this would uninstall your +Tensorflow 2.x! + ### Using Docker From a41c6ff956a381cd37ff46af4934d9edd258b161 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sun, 19 Jan 2020 18:33:11 +0100 Subject: [PATCH 20/28] Wiring in the cli parameters. --- README.md | 2 +- benchmark.py | 13 ++++++++----- image_demo.py | 14 ++++++++------ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index dca20ff..df8cee1 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ version and converted on the fly. Image demo runs inference on an input folder of images and outputs those images with the keypoints and skeleton overlayed. -`python image_demo.py --model 101 --image_dir ./images --output_dir ./output` +`python image_demo.py --model resnet50 --stride 32 --image_dir ./images --output_dir ./output` A folder of suitable test images can be downloaded by first running the `get_test_images.py` script. diff --git a/benchmark.py b/benchmark.py index 1281756..93cc0e9 100644 --- a/benchmark.py +++ b/benchmark.py @@ -7,7 +7,10 @@ parser = argparse.ArgumentParser() -parser.add_argument('--model', type=int, default=101) +parser.add_argument('--model', type=str, default='resnet50') # mobilenet resnet50 +parser.add_argument('--stride', type=int, default=16) # 8, 16, 32 (max 16 for mobilenet) +parser.add_argument('--quant_bytes', type=int, default=4) # 4 = float +parser.add_argument('--multiplier', type=float, default=1.0) # only for mobilenet parser.add_argument('--image_dir', type=str, default='./images') parser.add_argument('--num_images', type=int, default=1000) args = parser.parse_args() @@ -18,10 +21,10 @@ def main(): print('Tensorflow version: %s' % tf.__version__) assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" - model = 'resnet50' # mobilenet resnet50 - stride = 32 # 8, 16, 32 - quant_bytes = 4 # float - multiplier = 1.0 # only for mobilenet + model = args.model # mobilenet resnet50 + stride = args.stride # 8, 16, 32 (max 16 for mobilenet) + quant_bytes = args.quant_bytes # float + multiplier = args.multiplier # only for mobilenet posenet = load_model(model, stride, quant_bytes, multiplier) diff --git a/image_demo.py b/image_demo.py index b600526..374a2e5 100644 --- a/image_demo.py +++ b/image_demo.py @@ -6,8 +6,10 @@ from posenet.posenet_factory import load_model parser = argparse.ArgumentParser() -parser.add_argument('--model', type=int, default=101) -parser.add_argument('--scale_factor', type=float, default=1.0) +parser.add_argument('--model', type=str, default='resnet50') # mobilenet resnet50 +parser.add_argument('--stride', type=int, default=16) # 8, 16, 32 (max 16 for mobilenet) +parser.add_argument('--quant_bytes', type=int, default=4) # 4 = float +parser.add_argument('--multiplier', type=float, default=1.0) # only for mobilenet parser.add_argument('--notxt', action='store_true') parser.add_argument('--image_dir', type=str, default='./images') parser.add_argument('--output_dir', type=str, default='./output') @@ -23,10 +25,10 @@ def main(): if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) - model = 'resnet50' # mobilenet resnet50 - stride = 32 # 8, 16, 32 (max 16 for mobilenet) - quant_bytes = 4 # float - multiplier = 1.0 # only for mobilenet + model = args.model # mobilenet resnet50 + stride = args.stride # 8, 16, 32 (max 16 for mobilenet) + quant_bytes = args.quant_bytes # float + multiplier = args.multiplier # only for mobilenet posenet = load_model(model, stride, quant_bytes, multiplier) From 06b485fb12044a7b4038ab437cba8c979ad4a31f Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sun, 19 Jan 2020 18:55:47 +0100 Subject: [PATCH 21/28] Minor cleanup. --- posenet/posenet.py | 3 --- posenet/resnet.py | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/posenet/posenet.py b/posenet/posenet.py index 0a90617..a2dddbc 100644 --- a/posenet/posenet.py +++ b/posenet/posenet.py @@ -44,6 +44,3 @@ def print_scores(self, image_name, pose_scores, keypoint_scores, keypoint_coords print('Pose #%d, score = %f' % (pi, pose_scores[pi])) for ki, (s, c) in enumerate(zip(keypoint_scores[pi, :], keypoint_coords[pi, :, :])): print('Keypoint %s, score = %f, coord = %s' % (posenet.PART_NAMES[ki], s, c)) - - def close(self): - self.model.close() diff --git a/posenet/resnet.py b/posenet/resnet.py index 5e77b23..4c8951a 100644 --- a/posenet/resnet.py +++ b/posenet/resnet.py @@ -16,6 +16,7 @@ def preprocess_input(self, image): input_img = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR) input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) # to RGB colors + # See: https://github.com/tensorflow/tfjs-models/blob/master/body-pix/src/resnet.ts input_img = input_img + self.image_net_mean input_img = input_img.reshape(1, target_height, target_width, 3) # NHWC return input_img, scale From 0ed68dad8f5f575b9eb8126fc0d7ca64b862a2f4 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Tue, 21 Jan 2020 21:42:46 +0100 Subject: [PATCH 22/28] The TF 2.1 RC1 docker image lacks some nvidia library, so went back to 2.0.0. --- benchmark_run.sh | 3 +++ docker_img_build.sh | 4 ++-- image_demo_run.sh | 2 +- requirements.txt | 3 +-- 4 files changed, 7 insertions(+), 5 deletions(-) create mode 100755 benchmark_run.sh diff --git a/benchmark_run.sh b/benchmark_run.sh new file mode 100755 index 0000000..d518760 --- /dev/null +++ b/benchmark_run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +./docker_run.sh python benchmark.py --model mobilenet --stride 16 --image_dir ./images --num_images 1000 diff --git a/docker_img_build.sh b/docker_img_build.sh index b0c5f52..a706c30 100755 --- a/docker_img_build.sh +++ b/docker_img_build.sh @@ -8,8 +8,8 @@ fi if [ "$1" = "GPU" ]; then image="posenet-python-gpu" - version="--build-arg IMAGE_VERSION=2.1.0rc2-gpu-py3-jupyter" - # version="--build-arg IMAGE_VERSION=2.0.0-gpu-py3-jupyter" +# version="--build-arg IMAGE_VERSION=2.1.0rc2-gpu-py3-jupyter" + version="--build-arg IMAGE_VERSION=2.0.0-gpu-py3-jupyter" else image="posenet-python-cpu" version="--build-arg IMAGE_VERSION=2.1.0-py3-jupyter" diff --git a/image_demo_run.sh b/image_demo_run.sh index 50392f1..21d8638 100755 --- a/image_demo_run.sh +++ b/image_demo_run.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash -./docker_run.sh python image_demo.py --model 101 --image_dir ./images --output_dir ./output +./docker_run.sh python image_demo.py --model resnet50 --image_dir ./images --output_dir ./output diff --git a/requirements.txt b/requirements.txt index 89e4e29..71c8089 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,7 @@ numpy==1.18.1 -tfjs_graph_converter==0.2.0 opencv_python_headless==3.4.5.20 scipy==1.4.1 tensorflowjs==1.4.0 -tensorflow==2.1.0 +# tensorflow==2.1.0 # uncomment when installing from scratch (not in docker) PyYAML==5.3 tensorflow-hub==0.7.0 \ No newline at end of file From 3ee248b030141fa9685731d80fd60e9b3bc5548d Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Tue, 21 Jan 2020 21:49:29 +0100 Subject: [PATCH 23/28] Adding some benchmark results. --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index df8cee1..ae215ae 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,20 @@ A folder of suitable test images can be downloaded by first running the `get_tes A minimal performance benchmark based on image_demo. Images in `--image_dir` are pre-loaded and inference is run `--num_images` times with no drawing and no text output. +Running the benchmark cycling 1000 times through the example images on a Geforce GTX 1080ti gives these average FPS +using TF 2.0.0: + +``` +ResNet50 stride 16: 32.41 FPS +ResNet50 stride 32: 38.70 FPS (strange this is faster than with stride 16) +MobileNet stride 8: 37.90 FPS (surprisingly slow for mobilenet, ran this several times, same result) +MobileNet stride 16: 58.64 FPS +``` + +I can't explain why the larger stride gives a faster result. It was expected that MobileNet would be faster than +ResNet50, but the MobileNet quality is visibly lower on the rendered images (running image_demo.py). + + #### webcam_demo.py The webcam demo uses OpenCV to capture images from a connected webcam. The result is overlayed with the keypoints and From 2fe815e2b9ca9dfd7536e38175e2ef0002693ae5 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sun, 9 Feb 2020 12:44:27 +0100 Subject: [PATCH 24/28] Restructured the binary and docker files. --- README.md | 8 ++++---- benchmark_run.sh | 3 --- bin/benchmark_run.sh | 3 +++ docker_run.sh => bin/docker_run.sh | 0 exportCPU.sh => bin/exportCPU.sh | 2 +- exportGPU.sh => bin/exportGPU.sh | 0 bin/get_test_images_run.sh | 3 +++ bin/image_demo_run.sh | 3 +++ inspect_saved_model.sh => bin/inspect_saved_model.sh | 2 +- upgrade-tf-v2.sh => bin/upgrade-tf-v2.sh | 2 ++ Dockerfile => docker/Dockerfile | 2 +- docker_img_build.sh => docker/docker_img_build.sh | 0 get_test_images_run.sh | 3 --- image_demo.py | 2 +- image_demo_run.sh | 3 --- 15 files changed, 19 insertions(+), 17 deletions(-) delete mode 100755 benchmark_run.sh create mode 100755 bin/benchmark_run.sh rename docker_run.sh => bin/docker_run.sh (100%) rename exportCPU.sh => bin/exportCPU.sh (74%) rename exportGPU.sh => bin/exportGPU.sh (100%) create mode 100755 bin/get_test_images_run.sh create mode 100755 bin/image_demo_run.sh rename inspect_saved_model.sh => bin/inspect_saved_model.sh (64%) rename upgrade-tf-v2.sh => bin/upgrade-tf-v2.sh (80%) rename Dockerfile => docker/Dockerfile (97%) rename docker_img_build.sh => docker/docker_img_build.sh (100%) delete mode 100755 get_test_images_run.sh delete mode 100755 image_demo_run.sh diff --git a/README.md b/README.md index ae215ae..2c4ddba 100644 --- a/README.md +++ b/README.md @@ -47,10 +47,10 @@ If you just want to test this code, you can run everything on a CPU just as well 4fps on resnet50. Replace `GPU` below with `CPU` to test on a CPU. ```bash -./docker_img_build.sh GPU -. ./exportGPU.sh -./get_test_images_run.sh -./image_demo_run.sh +./bin/docker_img_build.sh GPU +. ./bin/exportGPU.sh +./bin/get_test_images_run.sh +./bin/image_demo_run.sh ``` Some pointers to get you going on the Linux machine setup. Most links are based on Ubuntu, but other distributions diff --git a/benchmark_run.sh b/benchmark_run.sh deleted file mode 100755 index d518760..0000000 --- a/benchmark_run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -./docker_run.sh python benchmark.py --model mobilenet --stride 16 --image_dir ./images --num_images 1000 diff --git a/bin/benchmark_run.sh b/bin/benchmark_run.sh new file mode 100755 index 0000000..e7f084f --- /dev/null +++ b/bin/benchmark_run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +./bin/docker_run.sh python benchmark.py --model mobilenet --stride 16 --image_dir ./images --num_images 1000 diff --git a/docker_run.sh b/bin/docker_run.sh similarity index 100% rename from docker_run.sh rename to bin/docker_run.sh diff --git a/exportCPU.sh b/bin/exportCPU.sh similarity index 74% rename from exportCPU.sh rename to bin/exportCPU.sh index c9a3869..4a543c5 100755 --- a/exportCPU.sh +++ b/bin/exportCPU.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash # source this file to set your environment on a CPU device # $ . exportCPU.sh -export POSENET_PYTHON_DEVICE=CPU \ No newline at end of file +export POSENET_PYTHON_DEVICE=CPU diff --git a/exportGPU.sh b/bin/exportGPU.sh similarity index 100% rename from exportGPU.sh rename to bin/exportGPU.sh diff --git a/bin/get_test_images_run.sh b/bin/get_test_images_run.sh new file mode 100755 index 0000000..92c51df --- /dev/null +++ b/bin/get_test_images_run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +./bin/docker_run.sh python get_test_images.py diff --git a/bin/image_demo_run.sh b/bin/image_demo_run.sh new file mode 100755 index 0000000..d88f9a0 --- /dev/null +++ b/bin/image_demo_run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +./bin/docker_run.sh python image_demo.py --model resnet50 --stride 16 --image_dir ./images --output_dir ./output diff --git a/inspect_saved_model.sh b/bin/inspect_saved_model.sh similarity index 64% rename from inspect_saved_model.sh rename to bin/inspect_saved_model.sh index 539e69b..b8b97ae 100755 --- a/inspect_saved_model.sh +++ b/bin/inspect_saved_model.sh @@ -3,4 +3,4 @@ FOLDER=$1 # e.g.: $> ./inspect_saved_model.sh _tf_models/posenet/mobilenet_v1_100/stride16 -./docker_run.sh saved_model_cli show --dir "$FOLDER" --all +./bin/docker_run.sh saved_model_cli show --dir "$FOLDER" --all diff --git a/upgrade-tf-v2.sh b/bin/upgrade-tf-v2.sh similarity index 80% rename from upgrade-tf-v2.sh rename to bin/upgrade-tf-v2.sh index e261497..b48a3b4 100755 --- a/upgrade-tf-v2.sh +++ b/bin/upgrade-tf-v2.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +# run this from the top-level folder of the project + WORK=$(dirname $(pwd)) docker run --gpus all -it -v $WORK:/work posenet-python tf_upgrade_v2 \ diff --git a/Dockerfile b/docker/Dockerfile similarity index 97% rename from Dockerfile rename to docker/Dockerfile index 4943709..2101144 100644 --- a/Dockerfile +++ b/docker/Dockerfile @@ -13,7 +13,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ pip install --upgrade pip && \ rm -rf /var/lib/apt/lists/* -COPY requirements.txt /work/ +COPY ../requirements.txt /work/ WORKDIR /work diff --git a/docker_img_build.sh b/docker/docker_img_build.sh similarity index 100% rename from docker_img_build.sh rename to docker/docker_img_build.sh diff --git a/get_test_images_run.sh b/get_test_images_run.sh deleted file mode 100755 index 4242d30..0000000 --- a/get_test_images_run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -./docker_run.sh python get_test_images.py diff --git a/image_demo.py b/image_demo.py index 374a2e5..44e5f79 100644 --- a/image_demo.py +++ b/image_demo.py @@ -26,7 +26,7 @@ def main(): os.makedirs(args.output_dir) model = args.model # mobilenet resnet50 - stride = args.stride # 8, 16, 32 (max 16 for mobilenet) + stride = args.stride # 8, 16, 32 (max 16 for mobilenet, min 16 for resnet50) quant_bytes = args.quant_bytes # float multiplier = args.multiplier # only for mobilenet diff --git a/image_demo_run.sh b/image_demo_run.sh deleted file mode 100755 index 21d8638..0000000 --- a/image_demo_run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -./docker_run.sh python image_demo.py --model resnet50 --image_dir ./images --output_dir ./output From d3344623e1904208d816b97d43cb078748a905ed Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sun, 9 Feb 2020 12:59:44 +0100 Subject: [PATCH 25/28] Reorganizing the docker files. --- .gitignore | 3 ++- README.md | 4 +++- docker/Dockerfile | 3 ++- docker/docker_img_build.sh | 2 ++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 59c548b..4d95ce6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ _models/* _tf_models/* _tfjs_models/* _posenet_weights/* +docker/requirements.txt # Byte-compiled / optimized / DLL files __pycache__/ @@ -199,4 +200,4 @@ fabric.properties .idea/httpRequests # Android studio 3.1+ serialized cache file -.idea/caches/build_file_checksums.ser \ No newline at end of file +.idea/caches/build_file_checksums.ser diff --git a/README.md b/README.md index 2c4ddba..6e0533e 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,9 @@ If you just want to test this code, you can run everything on a CPU just as well 4fps on resnet50. Replace `GPU` below with `CPU` to test on a CPU. ```bash -./bin/docker_img_build.sh GPU +cd docker +./docker_img_build.sh GPU +cd .. . ./bin/exportGPU.sh ./bin/get_test_images_run.sh ./bin/image_demo_run.sh diff --git a/docker/Dockerfile b/docker/Dockerfile index 2101144..844c020 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -13,7 +13,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ pip install --upgrade pip && \ rm -rf /var/lib/apt/lists/* -COPY ../requirements.txt /work/ +COPY requirements.txt /work/ WORKDIR /work @@ -24,6 +24,7 @@ RUN pip install -r requirements.txt --no-deps RUN git clone https://github.com/patlevin/tfjs-to-tf.git && \ cd tfjs-to-tf && \ + git checkout v0.3.0 && \ pip install . --no-deps && \ cd .. && \ rm -r tfjs-to-tf diff --git a/docker/docker_img_build.sh b/docker/docker_img_build.sh index a706c30..afcc0bc 100755 --- a/docker/docker_img_build.sh +++ b/docker/docker_img_build.sh @@ -15,6 +15,8 @@ else version="--build-arg IMAGE_VERSION=2.1.0-py3-jupyter" fi +cp ../requirements.txt . + docker rmi -f "$image" docker build -t "$image" $version . From 2d7dc7474b4d3079f2b0e32211ea9d9da3bacbba Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sun, 9 Feb 2020 13:08:51 +0100 Subject: [PATCH 26/28] Updated README. --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6e0533e..2a5a197 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ version and converted on the fly. Image demo runs inference on an input folder of images and outputs those images with the keypoints and skeleton overlayed. -`python image_demo.py --model resnet50 --stride 32 --image_dir ./images --output_dir ./output` +`python image_demo.py --model resnet50 --stride 16 --image_dir ./images --output_dir ./output` A folder of suitable test images can be downloaded by first running the `get_test_images.py` script. @@ -91,14 +91,14 @@ using TF 2.0.0: ``` ResNet50 stride 16: 32.41 FPS -ResNet50 stride 32: 38.70 FPS (strange this is faster than with stride 16) -MobileNet stride 8: 37.90 FPS (surprisingly slow for mobilenet, ran this several times, same result) +ResNet50 stride 32: 38.70 FPS +MobileNet stride 8: 37.90 FPS (this is surprisingly slow for mobilenet, ran this several times, same result) MobileNet stride 16: 58.64 FPS ``` -I can't explain why the larger stride gives a faster result. It was expected that MobileNet would be faster than -ResNet50, but the MobileNet quality is visibly lower on the rendered images (running image_demo.py). - +Faster FPS have been reported by Ross Wightmann on the original codebase in +[rwightman/posenet-python](https://github.com/rwightman/posenet-python), so if anyone has a pull request that +improves the performance of this codebase, feel free to let me know! #### webcam_demo.py From 7b7ccd4eea98a1158f7c7adaaff0023f6cf11818 Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Fri, 21 Feb 2020 15:47:27 +0100 Subject: [PATCH 27/28] Adding video file processing. --- .gitignore | 1 + bin/video_demo_run.sh | 3 ++ bin/webcam_demo_run.sh | 3 ++ posenet/base_model.py | 2 +- posenet/resnet.py | 3 +- video_demo.py | 85 ++++++++++++++++++++++++++++++++++++++++++ webcam_demo.py | 13 ++++--- 7 files changed, 103 insertions(+), 7 deletions(-) create mode 100755 bin/video_demo_run.sh create mode 100755 bin/webcam_demo_run.sh create mode 100644 video_demo.py diff --git a/.gitignore b/.gitignore index 4d95ce6..1458270 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ _tf_models/* _tfjs_models/* _posenet_weights/* docker/requirements.txt +*.mp4 # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/bin/video_demo_run.sh b/bin/video_demo_run.sh new file mode 100755 index 0000000..de17ce2 --- /dev/null +++ b/bin/video_demo_run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +./bin/docker_run.sh python video_demo.py --model resnet50 --stride 16 --input_file "Pexels Videos 3552510.mp4" --output_file "Pexels Videos 3552510-with_pose.mp4" diff --git a/bin/webcam_demo_run.sh b/bin/webcam_demo_run.sh new file mode 100755 index 0000000..2eca06c --- /dev/null +++ b/bin/webcam_demo_run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +./bin/docker_run.sh python webcam_demo.py --model resnet50 --stride 16 --image_dir ./images --output_dir ./output diff --git a/posenet/base_model.py b/posenet/base_model.py index 020e900..b0977cc 100644 --- a/posenet/base_model.py +++ b/posenet/base_model.py @@ -35,6 +35,6 @@ def predict(self, image): heatmap_result = result[self.output_tensor_names[self.HEATMAP_KEY]] offsets_result = result[self.output_tensor_names[self.OFFSETS_KEY]] displacement_fwd_result = result[self.output_tensor_names[self.DISPLACEMENT_FWD_KEY]] - displacement_bwd_result = result[self.output_tensor_names[self.DISPLACEMENT_BWD_KEY]] + displacement_bwd_result = result[self.output_tensor_names[self.DISPLACEMENT_BWD_KEY]] return tf.sigmoid(heatmap_result), offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale diff --git a/posenet/resnet.py b/posenet/resnet.py index 4c8951a..33e4951 100644 --- a/posenet/resnet.py +++ b/posenet/resnet.py @@ -15,8 +15,9 @@ def preprocess_input(self, image): scale = np.array([image.shape[0] / target_height, image.shape[1] / target_width]) input_img = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR) input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) # to RGB colors + # todo: test a variant that adds black bars to the image to match it to a valid resolution # See: https://github.com/tensorflow/tfjs-models/blob/master/body-pix/src/resnet.ts input_img = input_img + self.image_net_mean - input_img = input_img.reshape(1, target_height, target_width, 3) # NHWC + input_img = input_img.reshape(1, target_height, target_width, 3) # HWC to NHWC return input_img, scale diff --git a/video_demo.py b/video_demo.py new file mode 100644 index 0000000..5c7cf56 --- /dev/null +++ b/video_demo.py @@ -0,0 +1,85 @@ +import tensorflow as tf +import cv2 +import time +import argparse + +from posenet.posenet_factory import load_model +from posenet.utils import draw_skel_and_kp + +parser = argparse.ArgumentParser() +parser.add_argument('--model', type=str, default='resnet50') # mobilenet resnet50 +parser.add_argument('--stride', type=int, default=16) # 8, 16, 32 (max 16 for mobilenet) +parser.add_argument('--quant_bytes', type=int, default=4) # 4 = float +parser.add_argument('--multiplier', type=float, default=1.0) # only for mobilenet +parser.add_argument('--scale_factor', type=float, default=0.7125) +parser.add_argument('--input_file', type=str, help="Give the video file location") +parser.add_argument('--output_file', type=str, help="Give the video file location") +args = parser.parse_args() + + +def main(): + + print('Tensorflow version: %s' % tf.__version__) + assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" + + model = args.model # mobilenet resnet50 + stride = args.stride # 8, 16, 32 (max 16 for mobilenet, min 16 for resnet50) + quant_bytes = args.quant_bytes # float + multiplier = args.multiplier # only for mobilenet + + posenet = load_model(model, stride, quant_bytes, multiplier) + + # for inspiration, see: https://www.programcreek.com/python/example/72134/cv2.VideoWriter + if args.input_file is not None: + cap = cv2.VideoCapture(args.input_file) + else: + raise IOError("video file not found") + + fps = cap.get(cv2.CAP_PROP_FPS) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') + video_writer = cv2.VideoWriter(args.output_file, fourcc, fps, (width, height)) + + max_pose_detections = 20 + + # Scaling the input image reduces the quality of the pose detections! + # The speed gain is about the square of the scale factor. + posenet_input_scale = 1.0 + # posenet_input_height = 540 # scale factor for the posenet input + # posenet_input_scale = posenet_input_height / height + # posenet_input_width = int(width * posenet_input_scale) + # print("posenet_input_scale: %3.4f" % (posenet_input_scale)) + + + start = time.time() + frame_count = 0 + + ret, frame = cap.read() + + while ret: + frame_rescaled = frame # no scaling + # frame_rescaled = + # cv2.resize(frame, (posenet_input_width, posenet_input_height), interpolation=cv2.INTER_LINEAR) + pose_scores, keypoint_scores, keypoint_coords = posenet.estimate_multiple_poses(frame_rescaled, max_pose_detections) + + keypoint_coords_upscaled = keypoint_coords / posenet_input_scale + overlay_frame = draw_skel_and_kp( + frame, pose_scores, keypoint_scores, keypoint_coords_upscaled, + min_pose_score=0.15, min_part_score=0.1) + + frame_count += 1 + # This is uncompressed video. cv2 has no way to write compressed videos, so we'll have to use ffmpeg to + # compress it afterwards! See: + # https://stackoverflow.com/questions/25998799/specify-compression-quality-in-python-for-opencv-video-object + video_writer.write(overlay_frame) + ret, frame = cap.read() + + print('Average FPS: ', frame_count / (time.time() - start)) + + video_writer.release() + cap.release() + +if __name__ == "__main__": + main() diff --git a/webcam_demo.py b/webcam_demo.py index 9125913..5b4feb0 100644 --- a/webcam_demo.py +++ b/webcam_demo.py @@ -7,7 +7,10 @@ from posenet.utils import draw_skel_and_kp parser = argparse.ArgumentParser() -parser.add_argument('--model', type=int, default=101) +parser.add_argument('--model', type=str, default='resnet50') # mobilenet resnet50 +parser.add_argument('--stride', type=int, default=16) # 8, 16, 32 (max 16 for mobilenet) +parser.add_argument('--quant_bytes', type=int, default=4) # 4 = float +parser.add_argument('--multiplier', type=float, default=1.0) # only for mobilenet parser.add_argument('--cam_id', type=int, default=0) parser.add_argument('--cam_width', type=int, default=1280) parser.add_argument('--cam_height', type=int, default=720) @@ -21,10 +24,10 @@ def main(): print('Tensorflow version: %s' % tf.__version__) assert tf.__version__.startswith('2.'), "Tensorflow version 2.x must be used!" - model = 'resnet50' # mobilenet resnet50 - stride = 32 # 8, 16, 32 - quant_bytes = 4 # float - multiplier = 1.0 # only for mobilenet + model = args.model # mobilenet resnet50 + stride = args.stride # 8, 16, 32 (max 16 for mobilenet, min 16 for resnet50) + quant_bytes = args.quant_bytes # float + multiplier = args.multiplier # only for mobilenet posenet = load_model(model, stride, quant_bytes, multiplier) From eef813ad16488812817b344afa1f390f0c22623d Mon Sep 17 00:00:00 2001 From: Peter Rigole Date: Sun, 23 Feb 2020 15:33:47 +0100 Subject: [PATCH 28/28] Tweaking the video out examples. --- bin/video_demo_run.sh | 8 +++++++- posenet/utils.py | 4 ++-- video_demo.py | 18 ++++++++++-------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/bin/video_demo_run.sh b/bin/video_demo_run.sh index de17ce2..40f8c0c 100755 --- a/bin/video_demo_run.sh +++ b/bin/video_demo_run.sh @@ -1,3 +1,9 @@ #!/usr/bin/env bash -./bin/docker_run.sh python video_demo.py --model resnet50 --stride 16 --input_file "Pexels Videos 3552510.mp4" --output_file "Pexels Videos 3552510-with_pose.mp4" +#./bin/docker_run.sh python video_demo.py --model resnet50 --stride 16 --input_file "Pexels Videos 3552510.mp4" --output_file "Pexels Videos 3552510-with_pose.mp4" +./bin/docker_run.sh python video_demo.py --model resnet50 --stride 16 --input_file "exki.mp4" --output_file "exki_with_pose.mp4" +./bin/docker_run.sh python video_demo.py --model resnet50 --stride 16 --input_file "night-bridge.mp4" --output_file "night-bridge_with_pose.mp4" +./bin/docker_run.sh python video_demo.py --model resnet50 --stride 16 --input_file "night-colorful.mp4" --output_file "night-colorful_with_pose.mp4" +./bin/docker_run.sh python video_demo.py --model resnet50 --stride 16 --input_file "night-street.mp4" --output_file "night-street_with_pose.mp4" +./bin/docker_run.sh python video_demo.py --model resnet50 --stride 16 --input_file "pedestrians.mp4" --output_file "pedestrians_with_pose.mp4" +./bin/docker_run.sh python video_demo.py --model resnet50 --stride 16 --input_file "sidewalk.mp4" --output_file "sidewalk_with_pose.mp4" diff --git a/posenet/utils.py b/posenet/utils.py index fd35fd0..a662374 100644 --- a/posenet/utils.py +++ b/posenet/utils.py @@ -64,7 +64,7 @@ def draw_skel_and_kp( cv_keypoints.append(cv2.KeyPoint(kc[1], kc[0], 10. * ks)) out_img = cv2.drawKeypoints( - out_img, cv_keypoints, outImage=np.array([]), color=(255, 255, 0), + out_img, cv_keypoints, outImage=np.array([]), color=(255, 255, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) - out_img = cv2.polylines(out_img, adjacent_keypoints, isClosed=False, color=(255, 255, 0)) + out_img = cv2.polylines(out_img, adjacent_keypoints, isClosed=False, color=(255, 255, 0), thickness=2) return out_img diff --git a/video_demo.py b/video_demo.py index 5c7cf56..978d639 100644 --- a/video_demo.py +++ b/video_demo.py @@ -46,11 +46,10 @@ def main(): # Scaling the input image reduces the quality of the pose detections! # The speed gain is about the square of the scale factor. - posenet_input_scale = 1.0 - # posenet_input_height = 540 # scale factor for the posenet input - # posenet_input_scale = posenet_input_height / height - # posenet_input_width = int(width * posenet_input_scale) - # print("posenet_input_scale: %3.4f" % (posenet_input_scale)) + posenet_input_height = 540 # scale factor for the posenet input + posenet_input_scale = 1.0 # posenet_input_height / height # 1.0 + posenet_input_width = int(width * posenet_input_scale) + print("posenet_input_scale: %3.4f" % (posenet_input_scale)) start = time.time() @@ -59,9 +58,12 @@ def main(): ret, frame = cap.read() while ret: - frame_rescaled = frame # no scaling - # frame_rescaled = - # cv2.resize(frame, (posenet_input_width, posenet_input_height), interpolation=cv2.INTER_LINEAR) + if posenet_input_scale == 1.0: + frame_rescaled = frame # no scaling + else: + frame_rescaled = \ + cv2.resize(frame, (posenet_input_width, posenet_input_height), interpolation=cv2.INTER_LINEAR) + pose_scores, keypoint_scores, keypoint_coords = posenet.estimate_multiple_poses(frame_rescaled, max_pose_detections) keypoint_coords_upscaled = keypoint_coords / posenet_input_scale