Path: ...!news.mixmin.net!news.swapon.de!fu-berlin.de!uni-berlin.de!not-for-mail From: marc nicole Newsgroups: comp.lang.python Subject: Predicting an object over an pretrained model is not working Date: Tue, 30 Jul 2024 20:18:42 +0200 Lines: 230 Message-ID: References: Mime-Version: 1.0 Content-Type: text/plain; charset="UTF-8" X-Trace: news.uni-berlin.de hFXrEPJBznSC5LfR7iv6+wl3+MfdVB+e2ladIb+r1xvA== Cancel-Lock: sha1:cxc04WCAlBomnqdGv1RZQHf2LQU= sha256:AbbBslH9VIu7/o8Dn7UvPp5QuQh+03+TPwAs/b5JL68= Return-Path: X-Original-To: python-list@python.org Delivered-To: python-list@mail.python.org Authentication-Results: mail.python.org; dkim=pass reason="2048-bit key; unprotected key" header.d=gmail.com header.i=@gmail.com header.b=inHWGfQc; dkim-adsp=pass; dkim-atps=neutral X-Spam-Status: OK 0.020 X-Spam-Evidence: '*H*': 0.96; '*S*': 0.00; 'def': 0.04; 'image.': 0.07; '"""': 0.09; 'cell.': 0.09; 'code?': 0.09; 'coordinate': 0.09; 'output:': 0.09; 'skip:x 10': 0.09; 'subject:not': 0.09; 'tensorflow': 0.09; 'threshold': 0.09; '>': 0.14; '<': 0.16; '(1,': 0.16; 'annotated': 0.16; 'args:': 0.16; 'box.': 0.16; 'dict': 0.16; 'input.': 0.16; 'possible?': 0.16; 'predicts': 0.16; 'skip:5 20': 0.16; 'subject:model': 0.16; 'subject:working': 0.16; 'problem': 0.16; 'implement': 0.19; 'to:addr:python-list': 0.20; 'all,': 0.20; 'input': 0.21; 'skip:_ 10': 0.22; 'code': 0.23; 'skip:p 30': 0.23; ' List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Mailman-Original-Message-ID: Bytes: 14826 Hello all, I want to predict an object by given as input an image and want to have my model be able to predict the label. I have trained a model using tensorflow based on annotated database where the target object to predict was added to the pretrained model. the code I am using is the following where I set the target object image as input and want to have the prediction output: class MultiObjectDetection(): def __init__(self, classes_name): self._classes_name = classes_name self._num_classes = len(classes_name) self._common_params = {'image_size': 448, 'num_classes': self._num_classes, 'batch_size':1} self._net_params = {'cell_size': 7, 'boxes_per_cell':2, 'weight_decay': 0.0005} self._net = YoloTinyNet(self._common_params, self._net_params, test=True) def predict_object(self, image): predicts = self._net.inference(image) return predicts def process_predicts(self, resized_img, predicts, thresh=0.2): """ process the predicts of object detection with one image input. Args: resized_img: resized source image. predicts: output of the model. thresh: thresh of bounding box confidence. Return: predicts_dict: {"stick": [[x1, y1, x2, y2, scores1], [...]]}. """ cls_num = self._num_classes bbx_per_cell = self._net_params["boxes_per_cell"] cell_size = self._net_params["cell_size"] img_size = self._common_params["image_size"] p_classes = predicts[0, :, :, 0:cls_num] C = predicts[0, :, :, cls_num:cls_num+bbx_per_cell] # two bounding boxes in one cell. coordinate = predicts[0, :, :, cls_num+bbx_per_cell:] # all bounding boxes position. p_classes = np.reshape(p_classes, (cell_size, cell_size, 1, cls_num)) C = np.reshape(C, (cell_size, cell_size, bbx_per_cell, 1)) P = C * p_classes # confidencefor all classes of all bounding boxes (cell_size, cell_size, bounding_box_num, class_num) = (7, 7, 2, 1). predicts_dict = {} for i in range(cell_size): for j in range(cell_size): temp_data = np.zeros_like(P, np.float32) temp_data[i, j, :, :] = P[i, j, :, :] position = np.argmax(temp_data) # refer to the class num (with maximum confidence) for every bounding box. index = np.unravel_index(position, P.shape) if P[index] > thresh: class_num = index[-1] coordinate = np.reshape(coordinate, (cell_size, cell_size, bbx_per_cell, 4)) # (cell_size, cell_size, bbox_num_per_cell, coordinate)[xmin, ymin, xmax, ymax] max_coordinate = coordinate[index[0], index[1], index[2], :] xcenter = max_coordinate[0] ycenter = max_coordinate[1] w = max_coordinate[2] h = max_coordinate[3] xcenter = (index[1] + xcenter) * (1.0*img_size /cell_size) ycenter = (index[0] + ycenter) * (1.0*img_size /cell_size) w = w * img_size h = h * img_size xmin = 0 if (xcenter - w/2.0 < 0) else (xcenter - w/2.0) ymin = 0 if (xcenter - w/2.0 < 0) else (ycenter - h/2.0) xmax = resized_img.shape[0] if (xmin + w) > resized_img.shape[0] else (xmin + w) ymax = resized_img.shape[1] if (ymin + h) > resized_img.shape[1] else (ymin + h) class_name = self._classes_name[class_num] predicts_dict.setdefault(class_name, []) predicts_dict[class_name].append([int(xmin), int(ymin), int(xmax), int(ymax), P[index]]) return predicts_dict def non_max_suppress(self, predicts_dict, threshold=0.5): """ implement non-maximum supression on predict bounding boxes. Args: predicts_dict: {"stick": [[x1, y1, x2, y2, scores1], [...]]}. threshhold: iou threshold Return: ========== REMAINDER OF ARTICLE TRUNCATED ==========