diff --git a/research/object_detection/object_detection_tutorial.cpp b/research/object_detection/object_detection_tutorial.cpp new file mode 100644 index 00000000000..ccaaab9db10 --- /dev/null +++ b/research/object_detection/object_detection_tutorial.cpp @@ -0,0 +1,291 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// A minimal but useful C++ example showing how to load an Imagenet-style object +// recognition TensorFlow model, prepare input images for it, run them through +// the graph, and interpret the results. +// +// It's designed to have as few dependencies and be as clear as possible, so +// it's more verbose than it could be in production code. In particular, using +// auto for the types of a lot of the returned values from TensorFlow calls can +// remove a lot of boilerplate, but I find the explicit types useful in sample +// code to make it simple to look up the classes involved. +// +// To use it, compile and then run in a working directory with the +// learning/brain/tutorials/label_image/data/ folder below it, and you should +// see the top five labels for the example Lena image output. You can then +// customize it to use your own models or images by changing the file names at +// the top of the main() function. +// +// The googlenet_graph.pb file included by default is created from Inception. +// +// Note that, for GIF inputs, to reuse existing code, only single-frame ones +// are supported. + +#include +#include +#include +#include + +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/image_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/graph/default_device.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/public/session.h" +#include "tensorflow/core/util/command_line_flags.h" + +// These are all common classes it's handy to reference with no namespace. +using tensorflow::Flag; +using tensorflow::Tensor; +using tensorflow::Status; +using tensorflow::string; +using tensorflow::int32; + +using namespace std; + +// Takes a file name, and loads a list of labels from it, one per line, and +// returns a vector of the strings. It pads with empty strings so the length +// of the result is a multiple of 16, because our model expects that. +Status ReadLabelsFile(const string& file_name, std::vector* result, + size_t* found_label_count) { + std::ifstream file(file_name); + if (!file) { + return tensorflow::errors::NotFound("Labels file ", file_name, + " not found."); + } + result->clear(); + string line; + while (std::getline(file, line)) { + result->push_back(line); + } + *found_label_count = result->size(); + const int padding = 16; + while (result->size() % padding) { + result->emplace_back(); + } + return Status::OK(); +} + +static Status ReadEntireFile(tensorflow::Env* env, const string& filename, + Tensor* output) { + tensorflow::uint64 file_size = 0; + TF_RETURN_IF_ERROR(env->GetFileSize(filename, &file_size)); + + string contents; + contents.resize(file_size); + + std::unique_ptr file; + TF_RETURN_IF_ERROR(env->NewRandomAccessFile(filename, &file)); + + tensorflow::StringPiece data; + TF_RETURN_IF_ERROR(file->Read(0, file_size, &data, &(contents)[0])); + if (data.size() != file_size) { + return tensorflow::errors::DataLoss("Truncated read of '", filename, + "' expected ", file_size, " got ", + data.size()); + } + output->scalar()() = data.ToString(); + return Status::OK(); +} + +// Given an image file name, read in the data, try to decode it as an image, +// resize it to the requested size, and then scale the values as desired. +Status ReadTensorFromImageFile(const string& file_name, const int input_height, + const int input_width, const float input_mean, + const float input_std, + std::vector* out_tensors) { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + string input_name = "file_reader"; + string output_name = "normalized"; + + // read file_name into a tensor named input + Tensor input(tensorflow::DT_STRING, tensorflow::TensorShape()); + TF_RETURN_IF_ERROR( + ReadEntireFile(tensorflow::Env::Default(), file_name, &input)); + + // use a placeholder to read input data + auto file_reader = + Placeholder(root.WithOpName("input"), tensorflow::DataType::DT_STRING); + + std::vector> inputs = { + {"input", input}, + }; + + // Now try to figure out what kind of file it is and decode it. + const int wanted_channels = 3; + tensorflow::Output image_reader; + if (tensorflow::StringPiece(file_name).ends_with(".png")) { + image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, + DecodePng::Channels(wanted_channels)); + } else if (tensorflow::StringPiece(file_name).ends_with(".gif")) { + // gif decoder returns 4-D tensor, remove the first dim + image_reader = + Squeeze(root.WithOpName("squeeze_first_dim"), + DecodeGif(root.WithOpName("gif_reader"), file_reader)); + } else { + // Assume if it's neither a PNG nor a GIF then it must be a JPEG. + image_reader = DecodeJpeg(root.WithOpName("jpeg_reader"), file_reader, + DecodeJpeg::Channels(wanted_channels)); + } + // Now cast the image data to float so we can do normal math on it. + // auto float_caster = + // Cast(root.WithOpName("float_caster"), image_reader, tensorflow::DT_FLOAT); + + auto uint8_caster = Cast(root.WithOpName("uint8_caster"), image_reader, tensorflow::DT_UINT8); + + // The convention for image ops in TensorFlow is that all images are expected + // to be in batches, so that they're four-dimensional arrays with indices of + // [batch, height, width, channel]. Because we only have a single image, we + // have to add a batch dimension of 1 to the start with ExpandDims(). + auto dims_expander = ExpandDims(root.WithOpName("dim"), uint8_caster, 0); + + // Bilinearly resize the image to fit the required dimensions. + // auto resized = ResizeBilinear( + // root, dims_expander, + // Const(root.WithOpName("size"), {input_height, input_width})); + + + // Subtract the mean and divide by the scale. + // auto div = Div(root.WithOpName(output_name), Sub(root, dims_expander, {input_mean}), + // {input_std}); + + + //cast to int + //auto uint8_caster = Cast(root.WithOpName("uint8_caster"), div, tensorflow::DT_UINT8); + + // This runs the GraphDef network definition that we've just constructed, and + // returns the results in the output tensor. + tensorflow::GraphDef graph; + TF_RETURN_IF_ERROR(root.ToGraphDef(&graph)); + + std::unique_ptr session( + tensorflow::NewSession(tensorflow::SessionOptions())); + TF_RETURN_IF_ERROR(session->Create(graph)); + TF_RETURN_IF_ERROR(session->Run({inputs}, {"dim"}, {}, out_tensors)); + return Status::OK(); +} + +// Reads a model graph definition from disk, and creates a session object you +// can use to run it. +Status LoadGraph(const string& graph_file_name, + std::unique_ptr* session) { + tensorflow::GraphDef graph_def; + Status load_graph_status = + ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def); + if (!load_graph_status.ok()) { + return tensorflow::errors::NotFound("Failed to load compute graph at '", + graph_file_name, "'"); + } + session->reset(tensorflow::NewSession(tensorflow::SessionOptions())); + Status session_create_status = (*session)->Create(graph_def); + if (!session_create_status.ok()) { + return session_create_status; + } + return Status::OK(); +} + + + +int main(int argc, char* argv[]) { + // These are the command-line flags the program can understand. + // They define where the graph and input data is located, and what kind of + // input the model expects. If you train your own model, or use something + // other than inception_v3, then you'll need to update these. + string image(argv[1]); + string graph ="faster_rcnn_resnet101_coco_11_06_2017/frozen_inference_graph.pb"; + string labels ="labels/mscoco_label_map.pbtxt"; + int32 input_width = 299; + int32 input_height = 299; + float input_mean = 0; + float input_std = 255; + string input_layer = "image_tensor:0"; + vector output_layer ={ "detection_boxes:0", "detection_scores:0", "detection_classes:0", "num_detections:0" }; + + bool self_test = false; + string root_dir = ""; + + // First we load and initialize the model. + std::unique_ptr session; + string graph_path = tensorflow::io::JoinPath(root_dir, graph); + LOG(ERROR) << "graph_path:" << graph_path; + Status load_graph_status = LoadGraph(graph_path, &session); + if (!load_graph_status.ok()) { + LOG(ERROR) << "LoadGraph ERROR!!!!"<< load_graph_status; + return -1; + } + + // Get the image from disk as a float array of numbers, resized and normalized + // to the specifications the main graph expects. + std::vector resized_tensors; + string image_path = tensorflow::io::JoinPath(root_dir, image); + Status read_tensor_status = + ReadTensorFromImageFile(image_path, input_height, input_width, input_mean, + input_std, &resized_tensors); + if (!read_tensor_status.ok()) { + LOG(ERROR) << read_tensor_status; + return -1; + } + const Tensor& resized_tensor = resized_tensors[0]; + + LOG(ERROR) <<"image shape:" << resized_tensor.shape().DebugString()<< ",len:" << resized_tensors.size() << ",tensor type:"<< resized_tensor.dtype(); + // << ",data:" << resized_tensor.flat(); + // Actually run the image through the model. + std::vector outputs; + Status run_status = session->Run({{input_layer, resized_tensor}}, + output_layer, {}, &outputs); + if (!run_status.ok()) { + LOG(ERROR) << "Running model failed: " << run_status; + return -1; + } + + int image_width = resized_tensor.dims(); + int image_height = 0; + //int image_height = resized_tensor.shape()[1]; + + LOG(ERROR) << "size:" << outputs.size() << ",image_width:" << image_width << ",image_height:" << image_height << endl; + + //tensorflow::TTypes::Flat iNum = outputs[0].flat(); + tensorflow::TTypes::Flat scores = outputs[1].flat(); + tensorflow::TTypes::Flat classes = outputs[2].flat(); + tensorflow::TTypes::Flat num_detections = outputs[3].flat(); + auto boxes = outputs[0].flat_outer_dims(); + + LOG(ERROR) << "num_detections:" << num_detections(0) << "," << outputs[0].shape().DebugString(); + + for(size_t i = 0; i < num_detections(0) && i < 20;++i) + { + if(scores(i) > 0.5) + { + LOG(ERROR) << i << ",score:" << scores(i) << ",class:" << classes(i)<< ",box:" << "," << boxes(0,i,0) << "," << boxes(0,i,1) << "," << boxes(0,i,2)<< "," << boxes(0,i,3); + } + } + + return 0; +} + diff --git a/research/object_detection/object_detection_tutorial.py b/research/object_detection/object_detection_tutorial.py new file mode 100644 index 00000000000..777747597d5 --- /dev/null +++ b/research/object_detection/object_detection_tutorial.py @@ -0,0 +1,169 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Constructs model, inputs, and training environment.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import os +import six.moves.urllib as urllib +import sys +import tarfile +import tensorflow as tf +import zipfile + +from collections import defaultdict +from io import StringIO +from matplotlib import pyplot as plt +from PIL import Image +import cv2 + +# This is needed since the notebook is stored in the object_detection folder. +sys.path.append("..") +from object_detection.utils import ops as utils_ops + +if tf.__version__ < '1.4.0': + raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!') + + +# This is needed to display the images. +# %matplotlib inline + +from utils import label_map_util + +from utils import visualization_utils as vis_util + +# What model to download. +MODEL_NAME = 'train' +# MODEL_FILE = MODEL_NAME + '.tar.gz' +# DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' + +# Path to frozen detection graph. This is the actual model that is used for the object detection. +PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' + +# List of the strings that is used to add correct label for each box. +PATH_TO_LABELS = os.path.join('train', 'pascal_label_map.pbtxt') + +NUM_CLASSES = 20 + +# opener = urllib.request.URLopener() +# opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) +# tar_file = tarfile.open(MODEL_FILE) +# for file in tar_file.getmembers(): +# file_name = os.path.basename(file.name) +# if 'frozen_inference_graph.pb' in file_name: +# tar_file.extract(file, os.getcwd()) + +detection_graph = tf.Graph() +with detection_graph.as_default(): + od_graph_def = tf.GraphDef() + with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: + serialized_graph = fid.read() + od_graph_def.ParseFromString(serialized_graph) + tf.import_graph_def(od_graph_def, name='') + +label_map = label_map_util.load_labelmap(PATH_TO_LABELS) +categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) +category_index = label_map_util.create_category_index(categories) + +def load_image_into_numpy_array(image): + (im_width, im_height) = image.size + return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8) + +# For the sake of simplicity we will use only 2 images: +# image1.jpg +# image2.jpg +# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS. +PATH_TO_TEST_IMAGES_DIR = 'test_images' +TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 4) ] + +# Size, in inches, of the output images. +IMAGE_SIZE = (12, 8) + + +def run_inference_for_single_image(image, graph): + with graph.as_default(): + with tf.Session() as sess: + # Get handles to input and output tensors + ops = tf.get_default_graph().get_operations() + all_tensor_names = {output.name for op in ops for output in op.outputs} + tensor_dict = {} + for key in [ + 'num_detections', 'detection_boxes', 'detection_scores', + 'detection_classes', 'detection_masks' + ]: + tensor_name = key + ':0' + if tensor_name in all_tensor_names: + tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( + tensor_name) + if 'detection_masks' in tensor_dict: + # The following processing is only for single image + detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) + detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) + # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. + real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) + detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) + detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) + detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( + detection_masks, detection_boxes, image.shape[0], image.shape[1]) + detection_masks_reframed = tf.cast( + tf.greater(detection_masks_reframed, 0.5), tf.uint8) + # Follow the convention by adding back the batch dimension + tensor_dict['detection_masks'] = tf.expand_dims( + detection_masks_reframed, 0) + image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') + + # Run inference + output_dict = sess.run(tensor_dict, + feed_dict={image_tensor: np.expand_dims(image, 0)}) + + # all outputs are float32 numpy arrays, so convert types as appropriate + output_dict['num_detections'] = int(output_dict['num_detections'][0]) + output_dict['detection_classes'] = output_dict[ + 'detection_classes'][0].astype(np.uint8) + output_dict['detection_boxes'] = output_dict['detection_boxes'][0] + output_dict['detection_scores'] = output_dict['detection_scores'][0] + if 'detection_masks' in output_dict: + output_dict['detection_masks'] = output_dict['detection_masks'][0] + return output_dict + +for image_path in TEST_IMAGE_PATHS: + print ('image_path', image_path) + image = Image.open(image_path) + # the array based representation of the image will be used later in order to prepare the + # result image with boxes and labels on it. + image_np = load_image_into_numpy_array(image) + # Expand dimensions since the model expects images to have shape: [1, None, None, 3] + image_np_expanded = np.expand_dims(image_np, axis=0) + # Actual detection. + output_dict = run_inference_for_single_image(image_np, detection_graph) + print (output_dict) + # Visualization of the results of a detection. + vis_util.visualize_boxes_and_labels_on_image_array( + image_np, + output_dict['detection_boxes'], + output_dict['detection_classes'], + output_dict['detection_scores'], + category_index, + instance_masks=output_dict.get('detection_masks'), + use_normalized_coordinates=True, + line_thickness=8) + plt.figure(figsize=IMAGE_SIZE) + cv2.imwrite('train/test.jpg', image_np) + plt.imshow(image_np) + + diff --git a/research/object_detection/object_detection_tutorial_convert.py b/research/object_detection/object_detection_tutorial_convert.py new file mode 100644 index 00000000000..c30e737aa95 --- /dev/null +++ b/research/object_detection/object_detection_tutorial_convert.py @@ -0,0 +1,168 @@ +# coding: utf-8 + +# # Object Detection Demo 从摄像头读取数据。 +# Welcome to the object detection inference walkthrough! This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/installation.md) before you start. + +# # Imports + +# In[1]: + +import numpy as np +import os +import six.moves.urllib as urllib +import sys +import tarfile +import tensorflow as tf +import zipfile + +from collections import defaultdict +from io import StringIO +from matplotlib import pyplot as plt +from PIL import Image + +import cv2 #add 20170825 +cap = cv2.VideoCapture(0) #add 20170825 + +# ## Env setup + +# In[2]: #delete 20170825 +# This is needed to display the images. #delete 20170825 +#get_ipython().magic('matplotlib inline') #delete 20170825 + +# This is needed since the notebook is stored in the object_detection folder. +sys.path.append("..") + + +# ## Object detection imports +# Here are the imports from the object detection module. + +# In[3]: + +from utils import label_map_util + +from utils import visualization_utils as vis_util + + +# # Model preparation + +# ## Variables +# +# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file. +# +# By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies. + +# In[4]: + +# What model to download. +MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' +MODEL_FILE = MODEL_NAME + '.tar.gz' +DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' + +# Path to frozen detection graph. This is the actual model that is used for the object detection. +PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' + +# List of the strings that is used to add correct label for each box. +PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') + +NUM_CLASSES = 90 + + +# ## Download Model + +# In[5]: + +opener = urllib.request.URLopener() +opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) +tar_file = tarfile.open(MODEL_FILE) +for file in tar_file.getmembers(): + file_name = os.path.basename(file.name) + if 'frozen_inference_graph.pb' in file_name: + tar_file.extract(file, os.getcwd()) + + +# ## Load a (frozen) Tensorflow model into memory. + +# In[6]: + +detection_graph = tf.Graph() +with detection_graph.as_default(): + od_graph_def = tf.GraphDef() + with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: + serialized_graph = fid.read() + od_graph_def.ParseFromString(serialized_graph) + tf.import_graph_def(od_graph_def, name='') + + +# ## Loading label map +# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine + +# In[7]: + +label_map = label_map_util.load_labelmap(PATH_TO_LABELS) +categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) +category_index = label_map_util.create_category_index(categories) + + +# ## Helper code + +# In[8]: + +def load_image_into_numpy_array(image): + (im_width, im_height) = image.size + return np.array(image.getdata()).reshape( + (im_height, im_width, 3)).astype(np.uint8) + + +# # Detection + +# In[9]: + +# For the sake of simplicity we will use only 2 images: +# image1.jpg +# image2.jpg +# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS. +PATH_TO_TEST_IMAGES_DIR = 'test_images' +TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ] + +# Size, in inches, of the output images. +IMAGE_SIZE = (12, 8) + + +# In[10]: + +with detection_graph.as_default(): + with tf.Session(graph=detection_graph) as sess: + while True: #for image_path in TEST_IMAGE_PATHS: #changed 20170825 + ret, image_np = cap.read() + + # Expand dimensions since the model expects images to have shape: [1, None, None, 3] + image_np_expanded = np.expand_dims(image_np, axis=0) + image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') + # Each box represents a part of the image where a particular object was detected. + boxes = detection_graph.get_tensor_by_name('detection_boxes:0') + # Each score represent how level of confidence for each of the objects. + # Score is shown on the result image, together with the class label. + scores = detection_graph.get_tensor_by_name('detection_scores:0') + classes = detection_graph.get_tensor_by_name('detection_classes:0') + num_detections = detection_graph.get_tensor_by_name('num_detections:0') + # Actual detection. + (boxes, scores, classes, num_detections) = sess.run( + [boxes, scores, classes, num_detections], + feed_dict={image_tensor: image_np_expanded}) + # Visualization of the results of a detection. + vis_util.visualize_boxes_and_labels_on_image_array( + image_np, + np.squeeze(boxes), + np.squeeze(classes).astype(np.int32), + np.squeeze(scores), + category_index, + use_normalized_coordinates=True, + line_thickness=8) + cv2.imshow('object detection', cv2.resize(image_np,(800,600))) + if cv2.waitKey(25) & 0xFF ==ord('q'): + cv2.destroyAllWindows() + break + #plt.figure(figsize=IMAGE_SIZE) #delete 20170825 + #plt.imshow(image_np) #delete 20170825 + +