Image-Segmentation-Various

Sleeping

App Files Files Community

Image-Segmentation-Various / understand.py

gatesla

Got it to successfully create individual bounding boxes for a whole mask

8b5e39e over 1 year ago

raw

history blame

11.2 kB

	import matplotlib.pyplot as plt
	import requests, validators
	import torch
	import pathlib
	import numpy as np
	from PIL import Image

	from transformers import DetrFeatureExtractor, DetrForSegmentation, MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
	# from transformers.models.detr.feature_extraction_detr import rgb_to_id
	from transformers.image_transforms import rgb_to_id

	TEST_IMAGE = Image.open(r"images/9999999_00783_d_0000358.jpg")
	MODEL_NAME_DETR = "facebook/detr-resnet-50-panoptic"
	MODEL_NAME_MASKFORMER = "facebook/maskformer-swin-large-coco"
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	#######
	# Parameters
	#######
	image = TEST_IMAGE
	model_name = MODEL_NAME_MASKFORMER

	# Starting with MaskFormer

	processor = MaskFormerImageProcessor.from_pretrained(model_name) # <class 'transformers.models.maskformer.image_processing_maskformer.MaskFormerImageProcessor'>
	# DIR() --> ['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__',
	# '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__',
	# '__weakref__', '_auto_class', '_create_repo', '_get_files_timestamps', '_max_size', '_pad_image', '_preprocess', '_preprocess_image', '_preprocess_mask', '_processor_class',
	# '_set_processor_class', '_upload_modified_files', 'center_crop', 'convert_segmentation_map_to_binary_masks', 'do_normalize', 'do_reduce_labels', 'do_rescale', 'do_resize',
	# 'encode_inputs', 'fetch_images', 'from_dict', 'from_json_file', 'from_pretrained', 'get_image_processor_dict', 'ignore_index', 'image_mean', 'image_std', 'model_input_names',
	# 'normalize', 'pad', 'post_process_instance_segmentation', 'post_process_panoptic_segmentation', 'post_process_segmentation', 'post_process_semantic_segmentation', 'preprocess',
	# 'push_to_hub', 'register_for_auto_class', 'resample', 'rescale', 'rescale_factor', 'resize', 'save_pretrained', 'size', 'size_divisor', 'to_dict', 'to_json_file', 'to_json_string']

	model = MaskFormerForInstanceSegmentation.from_pretrained(model_name) # <class 'transformers.models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentation'>
	# DIR for model was too big
	model.to(DEVICE)

	# img = np.array(TEST_IMAGE)

	inputs = processor(images=image, return_tensors="pt") # <class 'transformers.image_processing_utils.BatchFeature'>
	# DIR() --> ['_MutableMapping__marker', '__abstractmethods__', '__class__', '__contains__', '__copy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__',
	# '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__',
	# '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__slots__', '__str__',
	# '__subclasshook__', '__weakref__', '_abc_impl', '_get_is_as_tensor_fns', 'clear', 'convert_to_tensors', 'copy', 'data', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem',
	# 'setdefault', 'to', 'update', 'values']
	inputs.to(DEVICE)


	outputs = model(**inputs) # <class 'transformers.models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput'>
	# Each element of this class is a <class 'torch.Tensor'>
	# DIR() --> ['__annotations__', '__class__', '__contains__', '__dataclass_fields__', '__dataclass_params__', '__delattr__', '__delitem__', '__dict__', '__dir__',
	# '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__',
	# '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__post_init__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__',
	# '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'attentions', 'auxiliary_logits', 'class_queries_logits', 'clear', 'copy', 'encoder_hidden_states',
	# 'encoder_last_hidden_state', 'fromkeys', 'get', 'hidden_states', 'items', 'keys', 'loss', 'masks_queries_logits', 'move_to_end', 'pixel_decoder_hidden_states',
	# 'pixel_decoder_last_hidden_state', 'pop', 'popitem', 'setdefault', 'to_tuple', 'transformer_decoder_hidden_states', 'transformer_decoder_last_hidden_state',
	# 'update', 'values']

	results = processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
	# <class 'dict'>
	# Keys: dict_keys(['segmentation', 'segments_info'])
	# type(results["segments_info"]) --> list
	# type(results["segmentation"]) --> <class 'torch.Tensor'>


	def show_mask_for_number(map_to_use, label_id):

	if torch.cuda.is_available():
	mask = (map_to_use.cpu().numpy() == label_id)
	else:
	mask = (map_to_use.numpy() == label_id)

	visual_mask = (mask* 255).astype(np.uint8)
	visual_mask = Image.fromarray(visual_mask)
	plt.imshow(visual_mask)
	plt.show()


	def get_coordinates_for_bb_simple(map_to_use, label_id):
	if torch.cuda.is_available():
	mask = (map_to_use.cpu().numpy() == label_id)
	else:
	mask = (map_to_use.numpy() == label_id)

	x, y = np.where(mask==True)
	x_max, x_min = max(x), min(x)
	y_max, y_min = max(y), min(y)
	return (x_min, y_min), (x_max, y_max)

	def make_simple_box(left_top, right_bottom, map_size):
	full_mask = np.full(map_size, False)
	left_x, top_y = left_top
	right_x, bottom_y = right_bottom
	full_mask[left_x:right_x, top_y] = True
	full_mask[left_x:right_x, bottom_y] = True
	full_mask[left_x, top_y:bottom_y] = True
	full_mask[right_x, top_y:bottom_y] = True

	visual_mask = (full_mask* 255).astype(np.uint8)
	visual_mask = Image.fromarray(visual_mask)
	plt.imshow(visual_mask)
	plt.show()


	def test(map_to_use, label_id):
	if torch.cuda.is_available():
	mask = (map_to_use.cpu().numpy() == label_id)
	else:
	mask = (map_to_use.numpy() == label_id)


	lt, rb = get_coordinates_for_bb_simple(map_to_use, label_id)
	left_x, top_y = lt
	right_x, bottom_y = rb

	mask[left_x:right_x, top_y] = .5
	mask[left_x:right_x, bottom_y] = .5
	mask[left_x, top_y:bottom_y] = .5
	mask[right_x, top_y:bottom_y] = .5

	visual_mask = (mask* 255).astype(np.uint8)
	visual_mask = Image.fromarray(visual_mask)
	plt.imshow(visual_mask)
	plt.show()



	# From Tutorial (Box 79)
	# def get_mask(segment_idx):
	# segment = results['segments_info'][segment_idx]
	# print("Visualizing mask for:", id2label[segment['label_id']])
	# mask = (predicted_panoptic_seg == segment['id'])
	# visual_mask = (mask * 255).astype(np.uint8)
	# return Image.fromarray(visual_mask)

	# How to get ID

	"""
	>>> model.config.id2label
	{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter',
	13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
	27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket',
	39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza',
	54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone',
	68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush', 80: 'banner', 81: 'blanket',
	82: 'bridge', 83: 'cardboard', 84: 'counter', 85: 'curtain', 86: 'door-stuff', 87: 'floor-wood', 88: 'flower', 89: 'fruit', 90: 'gravel', 91: 'house', 92: 'light', 93: 'mirror-stuff', 94: 'net', 95: 'pillow',
	96: 'platform', 97: 'playingfield', 98: 'railroad', 99: 'river', 100: 'road', 101: 'roof', 102: 'sand', 103: 'sea', 104: 'shelf', 105: 'snow', 106: 'stairs', 107: 'tent', 108: 'towel', 109: 'wall-brick',
	110: 'wall-stone', 111: 'wall-tile', 112: 'wall-wood', 113: 'water-other', 114: 'window-blind', 115: 'window-other', 116: 'tree-merged', 117: 'fence-merged', 118: 'ceiling-merged', 119: 'sky-other-merged',
	120: 'cabinet-merged', 121: 'table-merged', 122: 'floor-other-merged', 123: 'pavement-merged', 124: 'mountain-merged', 125: 'grass-merged', 126: 'dirt-merged', 127: 'paper-merged', 128: 'food-other-merged',
	129: 'building-other-merged', 130: 'rock-merged', 131: 'wall-other-merged', 132: 'rug-merged'}
	>>> model.config.id2label[123]
	'pavement-merged'
	>>> results["segments_info"][1]
	{'id': 2, 'label_id': 123, 'was_fused': False, 'score': 0.995813}
	"""
	# Above labels don't correspond to anything ... https://github.com/nightrome/cocostuff/blob/master/labels.md
	# This one was closest to helping: https://github.com/NielsRogge/Transformers-Tutorials/blob/master/MaskFormer/Inference/Inference_with_MaskFormer_for_semantic_%2B_panoptic_segmentation.ipynb

	"""
	>>> Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8))
	<PIL.Image.Image image mode=L size=2000x1500 at 0x7F07773691C0>
	>>> temp = Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8))
	"""

	"""
	>>> mask = (results["segmentation"].cpu().numpy == 4)
	>>> mask = (results["segmentation"].cpu().numpy() == 4)
	>>> mask
	array([[False, False, False, ..., False, False, False],
	[False, False, False, ..., False, False, False],
	[False, False, False, ..., False, False, False],
	...,
	[False, False, False, ..., False, False, False],
	[False, False, False, ..., False, False, False],
	[False, False, False, ..., False, False, False]])
	>>> visual_mask = (mask * 255).astype(np.uint8)
	>>> visual_mask = Image.fromarray(visual_mask)
	>>> plt.imshow(visual_mask)
	<matplotlib.image.AxesImage object at 0x7f0761e78040>
	>>> plt.show()
	"""

	"""
	>>> mask = (results["segmentation"].cpu().numpy() == 1)
	>>> visual_mask = (mask*255).astype(np.uint8)
	>>> visual_mask = Image.fromarray(visual_mask)
	>>> plt.imshow(visual_mask)
	<matplotlib.image.AxesImage object at 0x7f0760298550>
	>>> plt.show()
	>>> results["segments_info"][0]
	{'id': 1, 'label_id': 25, 'was_fused': False, 'score': 0.998022}
	>>>
	"""

	"""
	>>> np.where(mask==True)
	(array([300, 300, 300, ..., 392, 392, 392]), array([452, 453, 454, ..., 473, 474, 475]))
	>>> max(np.where(mask==True)[0])
	392
	>>> min(np.where(mask==True)[0])
	300
	>>> max(np.where(mask==True)[1])
	538
	>>> min(np.where(mask==True)[1])
	399
	"""