Visualize inference results¶
This article explains how to visualize inference results for the following project types:
- Image classification
- Multilabel image classification
- Object detection
- Instance and semantic segmentation
To set up the basic visualization of inference results, you will need two scripts:
api_inference.py, which interacts with an API to retrieve inference results.utils.py, which contains utility functions to process and visualize the returned data.
Retrieve inference results¶
The api_inference.py example script reads an input image, performs platform inference over the API, and visualizes the results. An image stored on a local device is provided as the input. The output is a processed image with visualized inference results.
The input parameters are configured in the CONFIG dictionary with the following parameters:
img_path: Path to the input image for inference.api_endpoint: Robovision AI inference API endpoint.
api_inference.py
import requests
import cv2
import json
import numpy as np
import os
from PIL import Image
from utils import ColorGenerator, create_output_image # Import utility functions for color generation and image processing
# Single label classification
CONFIG_SL_CLF = {
"img_path" : "<path/to/image>",
"api_endpoint" : "<deployment_api_endpoint>",
}
# Multi label classification
CONFIG_ML_CLF = {
"img_path" : "<path/to/image>",
"api_endpoint" : "<deployment_api_endpoint>",
}
# Object detection
CONFIG_OD = {
"img_path" : "<path/to/image>",
"api_endpoint" : "<deployment_api_endpoint>",
}
# Segmentation
CONFIG_SE = {
"img_path" : "<path/to/image>",
"api_endpoint" : "<deployment_api_endpoint>",
}
if __name__ == '__main__':
# Initialize session parameters
color_generator = ColorGenerator() # Generates colors for visualization
session = requests.session() # Create a persistent HTTP session
# 1. Read input image
img_path = CONFIG_SL_CLF["img_path"]
# Ensure the provided image path exists
while not os.path.exists(img_path):
img_path = input("The provided path to the image does not exist. Please provide a valid path: ")
input_image = cv2.imread(img_path) # Load the image using OpenCV
# Uncomment to display the input image before processing
# print(f"Displaying image: {img_path}")
# cv2.imshow("Input Image", input_image)
# 2. Perform inference by sending the image to the API
with open(img_path, 'rb') as f:
response = requests.post(f"{CONFIG_SL_CLF['api_endpoint']}/predict", files={'image': f.read()})
# Check if the request was successful
if response.status_code == 200:
json_response = response.json() # Parse the response JSON
else:
print(f"Request failed with status code: {response.status_code}. Reason: {response.reason}")
exit() # Exit the script if the request fails
# 3. Visualize inference results if the response is not empty
resulting_image = create_output_image(input_image, json_response, color_generator)
# Display the processed image with inference results
cv2.imshow("Inference Result", resulting_image)
cv2.waitKey(0) # Wait for a key press before closing the window
cv2.destroyAllWindows() # Close the image display window
Visualize inference results¶
The visualization functionality is centralized in the utils.py file. To generate an output image, use the create_output_image(...) function as the primary entry point.
utils.py
import base64
import cv2
import numpy as np
import random
from config_dashboard import DashboardConfig
import requests
import json
from pycocotools.mask import decode
from json.decoder import JSONDecodeError
class ColorGenerator:
"""
Custom class used for handling colors
"""
def __init__(self):
self.class_color_dict: dict = {}
def set_color(self, class_name: str):
idx_class = len(self.class_color_dict)
if(idx_class < len(DashboardConfig.class_colors)):
# First colors are in config file to ensure clear contrast
color = DashboardConfig.class_colors[idx_class]
self.class_color_dict.update({class_name: color})
return color
else:
# Random color
color = (
random.randint(0, 255),
random.randint(0, 255),
random.randint(0, 255)
)
self.class_color_dict.update({class_name: color})
return color
def get_color(self, class_name: str):
if class_name not in self.class_color_dict:
# Add new color
color = self.set_color(class_name)
else:
# Return Existing color
color = self.class_color_dict[class_name]
return color
"""
Helper functions
"""
def decode_image(image: str) -> np.ndarray:
"""Helper function to decode a string to an image represented by an numpy array"""
bytes_image = base64.b64decode(image)
decoded_image = cv2.imdecode(np.frombuffer(bytes_image, dtype=np.uint8), cv2.IMREAD_COLOR)
decoded_image = decoded_image[:, :, ::-1]
return decoded_image
def create_output_image(image: np.ndarray, json_response: dict, color_generator: ColorGenerator) -> np.ndarray:
"""
General function for creating the output output image with inference results
"""
result = None
# Get the project type
project_type = get_project_type(json_response)
# Visualize Inference results
if(len(json_response) == 0):
# Empty response, return input image
return image
elif(project_type == "SLCL"):
# Single Label Classification
result = get_SL_CLF_output(image, json_response, color_generator)
elif(project_type == "MLCL"):
# Multi Label Classification
result = get_ML_CLF_output(image, json_response, color_generator)
elif(project_type == "OD"):
# Object Detection
result = get_OD_output(image, json_response, color_generator)
elif(project_type == "SE"):
# Segmentation
result = get_SE_output(image, json_response, color_generator)
else:
print("Project type not supported. Exiting application")
exit()
return result
def create_output_centroid(image: np.ndarray, json_response: dict, color_generator: ColorGenerator) -> np.ndarray:
"""
General function for creating the output output image with inference results
"""
result = None
# Get the project type
project_type = get_project_type(json_response)
# Visualize inference results
if(len(json_response) == 0):
# Empty response, return input image
return image
elif(project_type == "SLCL"):
# Single label classification
result = get_SL_CLF_output(image, json_response, color_generator)
elif(project_type == "MLCL"):
# Multi label classification
result = get_ML_CLF_output(image, json_response, color_generator)
elif(project_type == "OD"):
# Object detection
result = get_OD_output_centroid(image, json_response, color_generator)
elif(project_type == "SE"):
# Segmentation
result = get_SE_output_centroid(image, json_response, color_generator)
else:
print("Project type not supported. Exiting application")
exit()
return result
def get_SL_CLF_output(image: np.ndarray, prediction: list, color_generator: ColorGenerator) -> np.ndarray:
"""
Single label classification: Returns the image with the class label & confidence
"""
image_copy = image.copy()
x,y, = 10,20 # Start position of labels
label = prediction["value"]["id"]
confidence = prediction["confidence"]
label_color = color_generator.get_color(label)
text_parameters = {
'text': f"{label}: {confidence}",
'fontFace': cv2.FONT_HERSHEY_DUPLEX,
'thickness': 1,
'fontScale': 0.8,
}
# Draw rectangle behind text
text_dims, baseline = cv2.getTextSize(**text_parameters)
color = (label_color[2], label_color[1], label_color[0]) # BGR -> RGB
vpad_rect = 10
width_rect = text_dims[0] + 20
image_copy = cv2.rectangle(image_copy, (x, y - text_dims[1] - vpad_rect), (x + width_rect, y + vpad_rect), color, -1)
# Draw text
image_copy = cv2.putText(
img=image_copy,
**text_parameters,
org=(x, y),
color=(255, 255, 255),
)
return image_copy
def get_ML_CLF_output(image: np.ndarray, predictions: list, color_generator: ColorGenerator) -> np.ndarray:
"""
Multi label classification: Returns the image with the class labels & confidences
"""
image_copy = image.copy()
x,y, = 10,20 # Start position of labels
# Mutli label classifications contains multiple labels & confidence values
for prediction in predictions:
label = prediction["value"]["id"]
confidence = prediction["confidence"]
label_color = color_generator.get_color(label)
text_parameters = {
'text': f"{label}: {confidence}",
'fontFace': cv2.FONT_HERSHEY_DUPLEX,
'thickness': 1,
'fontScale': 0.8,
}
# Draw rectangle behind text
text_dims, baseline = cv2.getTextSize(**text_parameters)
color = (label_color[2], label_color[1], label_color[0]) # BGR -> RGB
vpad_rect = 10
width_rect = text_dims[0] + 20
image_copy = cv2.rectangle(image_copy, (x, y - text_dims[1] - vpad_rect), (x + width_rect, y + vpad_rect), color, -1)
# Draw text
image_copy = cv2.putText(
img=image_copy,
**text_parameters,
org=(x, y),
color=(255, 255, 255),
)
# Increment Y position for the next label
y += int(1.5 * text_dims[1])
return image_copy
def get_OD_output(image: np.ndarray, predictions: list, color_generator: ColorGenerator) -> np.ndarray:
"""
Object detection: Returns the image with bounding boxes, labels & classes
"""
image_copy = image.copy()
for prediction in predictions:
# Iterate over bounding boxes
x,y = [int(i) for i in prediction["value"]["center"]["position"]]
h,w = [int(i) for i in prediction["value"]["size"]]
label = prediction["label"]["id"]
confidence = prediction["confidence"]
label_color = color_generator.get_color(label)
# Draw bounding box
image_copy = cv2.rectangle(image_copy, (x-int(w/2), y-int(h/2)), (x+int(w/2), y+int(h/2)), label_color, 2)
# Add label & confidence above the bounding box
label_text = f"{label}: {confidence:.2f}"
label_size, _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, thickness=4)
label_position = (x - int(label_size[0]/2), y - int(h/2) - 10) # Horizontally centered
image_copy = cv2.putText(image_copy, label_text, label_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_color, 1, cv2.LINE_AA)
return image_copy
def get_OD_output_centroid(image: np.ndarray, predictions: list, color_generator: ColorGenerator) -> np.ndarray:
"""
Object detection: Returns the image with bounding boxes, labels, classes,
and marks the center of each bounding box with a cross and center coordinates.
"""
image_copy = image.copy()
for prediction in predictions:
# Extract bounding box and label info
x, y = [int(i) for i in prediction["value"]["center"]["position"]]
h, w = [int(i) for i in prediction["value"]["size"]]
label = prediction["label"]["id"]
confidence = prediction["confidence"]
label_color = color_generator.get_color(label)
# Draw bounding box
image_copy = cv2.rectangle(image_copy,
(x - int(w / 2), y - int(h / 2)),
(x + int(w / 2), y + int(h / 2)),
label_color, 2)
# Add label & confidence above the bounding box
label_text = f"{label}: {confidence:.2f}"
label_size, _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, thickness=1)
label_position = (x - int(label_size[0] / 2), y - int(h / 2) - 10) # Horizontally centered
image_copy = cv2.putText(image_copy, label_text, label_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_color, 1, cv2.LINE_AA)
# Draw cross at the center of the bounding box
cross_size = 5
image_copy = cv2.line(image_copy, (x - cross_size, y), (x + cross_size, y), label_color, 1)
image_copy = cv2.line(image_copy, (x, y - cross_size), (x, y + cross_size), label_color, 1)
# Add center coordinates near the cross
coord_text = f"({x}, {y})"
coord_size, _ = cv2.getTextSize(coord_text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, thickness=1)
coord_position = (x + 10, y + 5) # Offset to avoid overlap with the cross
image_copy = cv2.putText(image_copy, coord_text, coord_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_color, 1, cv2.LINE_AA)
return image_copy
def get_SE_output(image: np.ndarray, predictions: list, color_generator: ColorGenerator) -> np.ndarray:
"""
Segmentation: Returns the image with masks
"""
image_copy = image.copy()
lst_masks = []
for prediction in predictions:
mask_label = prediction["label"]["id"]
# Decode mask (RLE Encoded)
# Platform inference: Mask is RLE Encoded
mask_decoded = decode(prediction["value"])
lst_masks.append({'mask': mask_decoded, 'name': mask_label})
# Generate a black image with colored masks.
mask_shape = lst_masks[0]['mask'].shape
black_background = np.zeros((mask_shape[0], mask_shape[1], 3), dtype='uint8')
# Iterate over all detected masks.
for mask_dict in lst_masks:
# Get the mask and the mask color.
mask = mask_dict['mask']
mask_classname = mask_dict['name']
mask_color = color_generator.get_color(mask_classname)
# Fill in the pixels in the black image corresponding with the mask with the appropriate color.
black_background[mask > 0] = mask_color
# If specified, draw the contours of the mask on the target image.
if True:
contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(image_copy, contours, -1, mask_color, 3)
# Overlay the (originally black) mask image with the source image.
bin_mask = (black_background > 0).all(axis=-1)
alpha = 0.1
image_copy[bin_mask] = (image_copy[bin_mask].astype('float') * (1-alpha) +
black_background[bin_mask].astype('float') * alpha).astype('uint8')
return image_copy
def get_SE_output_centroid(image: np.ndarray, predictions: list, color_generator: ColorGenerator) -> np.ndarray:
"""
Segmentation: Returns the image with masks and centroids
"""
image_copy = image.copy()
lst_masks = []
for prediction in predictions:
mask_label = prediction["label"]["id"]
# Decode mask (RLE Encoded)
# Platform inference: Mask is RLE Encoded
mask_decoded = decode(prediction["value"])
lst_masks.append({'mask': mask_decoded, 'name': mask_label})
# Generate a black image with colored masks.
mask_shape = lst_masks[0]['mask'].shape
black_background = np.zeros((mask_shape[0], mask_shape[1], 3), dtype='uint8')
# Iterate over all detected masks.
for mask_dict in lst_masks:
# Get the mask and the mask color.
mask = mask_dict['mask']
mask_classname = mask_dict['name']
mask_color = color_generator.get_color(mask_classname)
# Fill in the pixels in the black image corresponding with the mask with the appropriate color.
black_background[mask > 0] = mask_color
# If specified, draw the contours of the mask on the target image.
if True:
contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(image_copy, contours, -1, mask_color, 3)
# Calculate and draw the centroid
M = cv2.moments(mask)
if M["m00"] != 0: # To avoid division by zero
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# Draw a cross at the centroid
cross_size = 10
thickness = 2
cv2.line(image_copy, (cX - cross_size, cY), (cX + cross_size, cY), mask_color, thickness)
cv2.line(image_copy, (cX, cY - cross_size), (cX, cY + cross_size), mask_color, thickness)
# Overlay the (originally black) mask image with the source image.
bin_mask = (black_background > 0).all(axis=-1)
alpha = 0.1
image_copy[bin_mask] = (image_copy[bin_mask].astype('float') * (1 - alpha) +
black_background[bin_mask].astype('float') * alpha).astype('uint8')
return image_copy
def draw_class_label(image: np.ndarray, class_label: str, class_score: float = None):
"""
Classification: Draw class label on image
"""
VISUALIZATION_HEIGHT = 500
h, w = image.shape[:2]
scale_factor = VISUALIZATION_HEIGHT / h
image_copy = image.copy()
image_copy = cv2.resize(image_copy, (int(scale_factor * w), VISUALIZATION_HEIGHT))
text = class_label if class_score is None else f"{class_label}: {class_score:.2f}"
text_parameters = {
'text': text,
'fontFace': cv2.FONT_HERSHEY_DUPLEX,
'thickness': 1,
'fontScale': 0.8,
}
text_dims, baseline = cv2.getTextSize(**text_parameters)
image_copy = cv2.rectangle(image_copy, (0,0), (text_dims[0], int(1.5*text_dims[1])), (156,32,240), -1)
image_copy = cv2.putText(
img=image_copy,
**text_parameters,
org=(0, text_dims[1] + 3),
color=(255, 255, 255),
)
return image_copy
def get_project_type(response) -> str:
"""
Determines project type based on response keys
NOTE: Can be optimization
"""
# Get the keys for making project type determination
if(type(response) == list):
response_keys = list(response[0]["value"].keys())
else:
response_keys = list(response["value"].keys())
if(response_keys == ["id"]):
# Classificadtion
if(type(response) == list):
return "MLCL"
else:
return "SLCL"
elif(response_keys == ["center","size"]):
# Object Detection
return "OD"
elif(response_keys == ["size","counts"]):
# Segmenation
return "SE"
else:
return "UNK"