Spaces:
Sleeping
Sleeping
File size: 4,032 Bytes
0e78cbf d766b17 0e78cbf d766b17 0e78cbf d766b17 3945649 d766b17 3945649 d766b17 3945649 d766b17 3945649 d766b17 3945649 d766b17 3945649 d766b17 3945649 d766b17 3945649 d766b17 1102d40 3945649 0e78cbf 46ea120 3945649 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import logging
import cv2
import numpy as np
from typing import List
import torch
import random
from ultralytics import YOLOWorld
class YoloWorld:
def __init__(self,model_name = "yolov8x-worldv2.pt"):
self.model = YOLOWorld(model_name)
self.model.to(device='cpu')
def run_inference(self,image_path:str,object_prompts:List):
object_details = []
self.model.set_classes(object_prompts)
results = self.model.predict(image_path)
for result in results:
for box in result.boxes:
object_data = {}
x1, y1, x2, y2 = np.array(box.xyxy.cpu(), dtype=np.int32).squeeze()
c1,c2 = (x1,y1),(x2,y2)
confidence = round(float(box.conf.cpu()),2)
label = f'{results[0].names[int(box.cls)]}' # [{100*round(confidence,2)}%]'
print("Object Name :{} Bounding Box:{},{} Confidence score {}\n ".format(label ,c1 ,c2,confidence))
object_data[label] = {
'bounding_box':[x1,y1,x2,y2],
'confidence':confidence
}
object_details.append(object_data)
return object_details
@staticmethod
def draw_bboxes(rgb_frame,boxes,labels,line_thickness=3):
rgb_frame = cv2.cvtColor(cv2.imread(rgb_frame),cv2.COLOR_BGR2RGB)
tl = line_thickness or round(0.002 * (rgb_frame.shape[0] + rgb_frame.shape[1]) / 2) + 1 # line/font thickness
rgb_frame_copy = rgb_frame.copy()
color_dict = {}
# color = color or [random.randint(0, 255) for _ in range(3)]
for item in np.unique(np.asarray(labels)):
color_dict[item] = [random.randint(28, 255) for _ in range(3)]
for box,label in zip(boxes,labels):
if box.type() == 'torch.IntTensor':
box = box.numpy()
# extract coordinates
x1,y1,x2,y2 = box
c1,c2 = (x1,y1),(x2,y2)
# Draw rectangle
cv2.rectangle(rgb_frame_copy, c1,c2, color_dict[label], thickness=tl, lineType=cv2.LINE_AA)
tf = max(tl - 1, 1) # font thickness
# label = label2id[int(label.numpy())]
t_size = cv2.getTextSize(str(label), 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.putText(rgb_frame_copy, str(label), (c1[0], c1[1] - 2), 0, tl / 3, color_dict[label], thickness=tf, lineType=cv2.LINE_AA)
return rgb_frame_copy
def run_yolo_infer(self,image_path:str,object_prompts:List):
processed_predictions = []
bounding_boxes = []
labels = []
scores = []
self.model.set_classes(object_prompts)
results = self.model.predict(image_path)
for result in results:
for i,box in enumerate(result.boxes):
x1, y1, x2, y2 = np.array(box.xyxy.cpu(), dtype=np.int32).squeeze()
bounding_boxes.append([x1,y1,x2,y2])
labels.append(result.names[int(box.cls.cpu())])
scores.append(round(float(box.conf.cpu()),2))
processed_predictions.append(dict(boxes= torch.tensor(bounding_boxes),
labels= labels,
scores=torch.tensor(scores))
)
detected_image = self.draw_bboxes(rgb_frame=image_path,
boxes=processed_predictions[0]['boxes'],
labels=processed_predictions[0]['labels']
)
cv2.imwrite('final_mask.png', cv2.cvtColor(detected_image,cv2.COLOR_BGR2RGB))
return "Predicted image : final_mask.jpg . Details :{}".format(processed_predictions[0])
# if __name__ == "__main__":
# yolo = YoloWorld()
# predicted_data = yolo.run_yolo_infer('../image_store/demo2.jpg',['person','hat','building'])
# print(predicted_data)
|