  1. def custom_multi_gpu_test_onnx(model, data_loader,tmpdir=None, gpu_collect=False):
  2. """Test model with multiple gpus.
  3. This method tests model with multiple gpus and collects the results
  4. under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
  5. it encodes results to gpu tensors and use gpu communication for results
  6. collection. On cpu mode it saves the results on different gpus to 'tmpdir'
  7. and collects them by the rank 0 worker.
  8. Args:
  9. model (nn.Module): Model to be tested.
  10. data_loader (nn.Dataloader): Pytorch data loader.
  11. tmpdir (str): Path of directory to save the temporary results from
  12. different gpus under cpu mode.
  13. gpu_collect (bool): Option to use either gpu or cpu to collect results.
  14. Returns:
  15. list: The prediction results.
  16. """
  17. model.eval()
  18. bbox_results = []
  19. mask_results = []
  20. dataset = data_loader.dataset
  21. rank, world_size = get_dist_info()
  22. if rank == 0:
  23. prog_bar = mmcv.ProgressBar(len(dataset))
  24. time.sleep(2) # This line can prevent deadlock problem in some cases.
  25. have_mask = False
  26. repetitions = 100
  27. for i, data in enumerate(data_loader):
  28. with torch.no_grad():
  29. inputs = {}
  30. inputs['img'] = data['img'][0].data[0].float().unsqueeze(0) #torch.randn(6,3,736,1280)#.cuda()
  31. #inputs['return_loss'] = False
  32. inputs['img_metas'] = [1]
  33. inputs['img_metas'][0] = [1]
  34. inputs['img_metas'][0][0] = {}
  35. inputs['img_metas'][0][0]['can_bus'] = torch.from_numpy(data['img_metas'][0].data[0][0]['can_bus']).float()#torch.randn(18)#.cuda()
  36. inputs['img_metas'][0][0]['lidar2img'] = torch.from_numpy(np.array(data['img_metas'][0].data[0][0]['lidar2img'])).float().unsqueeze(0)#torch.randn(1,6,4,4)#.cuda()
  37. inputs['img_metas'][0][0]['scene_token'] = 'fcbccedd61424f1b85dcbf8f897f9754'
  38. inputs['img_metas'][0][0]['img_shape'] = torch.Tensor([[480,800]])
  39. output_file = '/×××/BEVformer/mmdetection3d/BEVFormer/J5/bevformer_tiny.onnx'
  40. torch.onnx.export(
  41. model,
  42. inputs,
  43. output_file,
  44. export_params=True,
  45. keep_initializers_as_inputs=True,
  46. do_constant_folding=False,
  47. verbose=False,
  48. opset_version=11,
  49. )
  50. print(f"ONNX file has been saved in {output_file}")
  51. return {0:'1'}





解决方法: 点进dist_utils.py里面,修改内容,如下所示

  1. def _init_dist_pytorch(backend, **kwargs):
  2. # TODO: use local_rank instead of rank % num_gpus
  3. os.environ['RANK'] = '0'
  4. os.environ['MASTER_ADDR'] = 'localhost'
  5. os.environ['MASTER_PORT'] = '5678'
  6. rank = int(os.environ['RANK'])
  7. num_gpus = torch.cuda.device_count()
  8. torch.cuda.set_device(rank % num_gpus)
  9. dist.init_process_group(backend=backend, world_size=int(1),**kwargs)

报错2:AttributeError: 'NoneType' object has no attribute 'size'



  1. def forward(self, input): #return_loss=True,
  2. """Calls either forward_train or forward_test depending on whether
  3. return_loss=True.
  4. Note this setting will change the expected inputs. When
  5. `return_loss=True`, img and img_metas are single-nested (i.e.
  6. torch.Tensor and list[dict]), and when `resturn_loss=False`, img and
  7. img_metas should be double nested (i.e. list[torch.Tensor],
  8. list[list[dict]]), with the outer list indicating test time
  9. augmentations.
  10. """
  11. #return_loss = input['return_loss']
  12. #if return_loss:
  13. #return self.forward_train(**kwargs)
  14. #else:
  15. #input['rescale']=True
  16. # return_loss=False, rescale=True,
  17. return self.forward_test(input['img_metas'], input['img'])

 (2)forward_test函数定义去掉**kwargs, self.simple_test()函数输入也去掉**kwargs

报错3:ValueError: only one element tensors can be converted to Python scalars

原因 bevformer本来是以numpy形式输入'can_bus’参数的,但是转模型的时候应该所有变量都是tensor的形式,我们在初始化数据输入的时候是用torch.randn()生成输入,所以做如下修改:


  1. def get_bev_features(
  2. self,
  3. mlvl_feats,
  4. bev_queries,
  5. bev_h,
  6. bev_w,
  7. grid_length=[0.512, 0.512],
  8. bev_pos=None,
  9. prev_bev=None,
  10. **kwargs):
  11. """
  12. obtain bev features.
  13. """
  14. bs = mlvl_feats[0].size(0)
  15. bev_queries = bev_queries.unsqueeze(1).repeat(1, bs, 1)
  16. bev_pos = bev_pos.flatten(2).permute(2, 0, 1)
  17. # obtain rotation angle and shift with ego motion
  18. delta_x = np.array([each['can_bus'][0].cpu().numpy()
  19. for each in kwargs['img_metas']])
  20. delta_x = torch.from_numpy(delta_x)
  21. delta_y = np.array([each['can_bus'][1].cpu().numpy()
  22. for each in kwargs['img_metas']])
  23. delta_y = torch.from_numpy(delta_y)
  24. ego_angle = np.array(
  25. [each['can_bus'][-2] / np.pi * 180 for each in kwargs['img_metas']])
  26. ego_angle = torch.from_numpy(ego_angle.astype(np.float32))
  27. grid_length_y = grid_length[0]
  28. grid_length_x = grid_length[1]
  29. translation_length = torch.sqrt(delta_x ** 2 + delta_y ** 2)
  30. translation_angle = (
  31. (
  32. torch.atan(delta_y / (delta_x + 1e-8))
  33. + ((1 - torch.sign(delta_x)) / 2) * torch.sign(delta_y) * np.pi
  34. )
  35. / np.pi
  36. * 180
  37. )
  38. bev_angle = ego_angle - translation_angle
  39. shift_y = translation_length * \
  40. torch.cos(bev_angle / 180 * np.pi) / grid_length_y / bev_h
  41. shift_x = translation_length * \
  42. torch.sin(bev_angle / 180 * np.pi) / grid_length_x / bev_w
  43. shift_y = shift_y * int(self.use_shift)
  44. shift_x = shift_x * int(self.use_shift)
  45. shift = torch.stack([shift_x, shift_y]).permute(1, 0)
  46. #shift = 0
  47. if prev_bev is not None:
  48. if prev_bev.shape[1] == bev_h * bev_w:
  49. prev_bev = prev_bev.permute(1, 0, 2)
  50. if self.rotate_prev_bev:
  51. for i in range(bs):
  52. # num_prev_bev = prev_bev.size(1)
  53. rotation_angle = kwargs['img_metas'][i]['can_bus'][-1]
  54. tmp_prev_bev = prev_bev[:, i].reshape(
  55. bev_h, bev_w, -1).permute(2, 0, 1)
  56. tmp_prev_bev = rotate(tmp_prev_bev, rotation_angle,
  57. center=self.rotate_center)
  58. tmp_prev_bev = tmp_prev_bev.permute(1, 2, 0).reshape(
  59. bev_h * bev_w, 1, -1)
  60. prev_bev[:, i] = tmp_prev_bev[:, 0]
  61. # add can bus signals
  62. can_bus = bev_queries.new_tensor(
  63. [each['can_bus'].cpu().numpy() for each in kwargs['img_metas']]) # [:, :]
  64. can_bus = self.can_bus_mlp(can_bus)[None, :, :]
  65. bev_queries = bev_queries + can_bus * int(self.use_can_bus)
  66. feat_flatten = []
  67. spatial_shapes = []
  68. for lvl, feat in enumerate(mlvl_feats):
  69. bs, num_cam, c, h, w = feat.shape
  70. spatial_shape = (h, w)
  71. feat = feat.flatten(3).permute(1, 0, 3, 2)
  72. if self.use_cams_embeds:
  73. feat = feat + self.cams_embeds[:, None, None, :].to(feat.dtype)
  74. feat = feat + self.level_embeds[None,
  75. None, lvl:lvl + 1, :].to(feat.dtype)
  76. spatial_shapes.append(spatial_shape)
  77. feat_flatten.append(feat)
  78. feat_flatten = torch.cat(feat_flatten, 2)
  79. spatial_shapes = torch.as_tensor(
  80. spatial_shapes, dtype=torch.long, device=bev_pos.device)
  81. level_start_index = torch.cat((spatial_shapes.new_zeros(
  82. (1,)), spatial_shapes.prod(1).cumsum(0)[:-1]))
  83. feat_flatten = feat_flatten.permute(
  84. 0, 2, 1, 3) # (num_cam, H*W, bs, embed_dims)
  85. bev_embed = self.encoder(
  86. bev_queries,
  87. feat_flatten,
  88. feat_flatten,
  89. bev_h=bev_h,
  90. bev_w=bev_w,
  91. bev_pos=bev_pos,
  92. spatial_shapes=spatial_shapes,
  93. level_start_index=level_start_index,
  94. prev_bev=prev_bev,
  95. shift=shift,
  96. **kwargs
  97. )
  98. return bev_embed

报错4:ValueError: only one element tensors can be converted to Python scalars

在encoder.py的point_sampling函数里面也有这个问题, 直接注释掉95~99行,改为

lidar2img = img_metas[0]['lidar2img']

报错5:KeyError: 'box_type_3d'


报错6:RuntimeError: Exporting the operator linspace to ONNX opset version 11 is not supported.

如果必须要用opset 11版本的torch.onnx转模型,这个地方会提示torch.linspace算子不支持,定位到算子在bevformer/modules/encoder.py的 BEVFormerEncoder.get_reference_points函数中 


  1. def get_reference_points(H, W, Z=8, num_points_in_pillar=4, dim='3d', bs=1, device='cuda', dtype=torch.float):
  2. """Get the reference points used in SCA and TSA.
  3. Args:
  4. H, W: spatial shape of bev.
  5. Z: hight of pillar.
  6. D: sample D points uniformly from each pillar.
  7. device (obj:`device`): The device where
  8. reference_points should be.
  9. Returns:
  10. Tensor: reference points used in decoder, has \
  11. shape (bs, num_keys, num_levels, 2).
  12. """
  13. # reference points in 3D space, used in spatial cross-attention (SCA)
  14. if dim == '3d':
  15. zs = torch.cat((torch.arange(0.5,Z-0.5,(Z-1)/(num_points_in_pillar-1)), torch.Tensor([Z-0.5])),dim=0).view(-1, 1, 1).expand(num_points_in_pillar, H, W) / Z
  16. xs = torch.cat((torch.arange(0.5, W-0.5, (W-1)/(W-1)), torch.Tensor([W-0.5])),dim=0).view(1, 1, W).expand(num_points_in_pillar, H, W) / W
  17. ys = torch.cat((torch.arange(0.5, H-0.5, (H-1)/(H-1)), torch.Tensor([H-0.5])),dim=0).view(1, H, 1).expand(num_points_in_pillar, H, W) / H
  18. ref_3d = torch.stack((xs, ys, zs), -1)
  19. ref_3d = ref_3d.permute(0, 3, 1, 2).flatten(2).permute(0, 2, 1)
  20. ref_3d = ref_3d[None].repeat(bs, 1, 1, 1)
  21. return ref_3d
  22. # reference points on 2D bev plane, used in temporal self-attention (TSA).
  23. elif dim == '2d':
  24. ref_y, ref_x = torch.meshgrid(
  25. torch.cat((torch.arange(0.5, H-0.5, (H-1)/(H-1)), torch.Tensor([H-0.5])),dim=0),
  26. torch.cat((torch.arange(0.5, W-0.5, (W-1)/(W-1)), torch.Tensor([W-0.5])),dim=0)
  27. )
  28. ref_y = ref_y.reshape(-1)[None] / H
  29. ref_x = ref_x.reshape(-1)[None] / W
  30. ref_2d = torch.stack((ref_x, ref_y), -1)
  31. ref_2d = ref_2d.repeat(bs, 1, 1).unsqueeze(2)
  32. return ref_2d

报错7:RuntimeError: Exporting the operator maximum to ONNX opset version 11 is not supported

提示maximum算子不支持,定位到算子位于evformer/modules/encoder.py的 BEVFormerEncoder.point_sampling函数中,直接将torch.maximum()改为torch.max()效果是一样的。

报错8:RuntimeError: Exporting the operator nan_to_num to ONNX opset version 11 is not supported.


报错9:RuntimeError: Exporting the operator grid_sampler to ONNX opset version 11 is not supported


from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch


from mmcv.ops.point_sample import bilinear_grid_sample


  1. sampling_value_l_ = F.grid_sample(
  2. value_l_,
  3. sampling_grid_l_,
  4. mode='bilinear',
  5. padding_mode='zeros',
  6. align_corners=False)


sampling_value_l_ = bilinear_grid_sample(value_l_,sampling_grid_l_)



报错10:RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces).

直接点进报错信息中的/mmcv/ops/point_sample.py中,找到x = x.view(n,-1),改为:

  1. x = x.contiguous().view(n, -1)
  2. y = y.contiguous().view(n, -1)

报错11:RuntimeError: Exporting the operator atan2 to ONNX opset version 11 is not supported.


  1. rot = (
  2. (
  3. torch.atan((rot_sine / (rot_cosine + 1e-8)).sigmoid())
  4. + ((1 - torch.sign(rot_cosine)) / 2) * torch.sign(rot_sine) * np.pi
  5. )
  6. )

报错12:TypeError: _all() takes 2 positional arguments but 4 were given 
(Occurred when translating all).


from torch.onnx import symbolic_opset9

点进symbolic_opset9这个文件里面,定位到2440行,将def _any(g,input)和def _all(g, input)这;两个函数改为:

  1. def _any(g, *args):
  2. # aten::any(Tensor self)
  3. if len(args) == 1:
  4. input = args[0]
  5. dim, keepdim = None, 0
  6. # aten::any(Tensor self, int dim, bool keepdim)
  7. else:
  8. input, dim, keepdim = args
  9. dim = [_parse_arg(dim, "i")]
  10. keepdim = _parse_arg(keepdim, "i")
  11. input = _cast_Long(g, input, False) # type: ignore[name-defined]
  12. input_sum = sym_help._reducesum_helper(g, input,
  13. axes_i=dim, keepdims_i=keepdim)
  14. return gt(g, input_sum, g.op("Constant", value_t=torch.LongTensor([0])))
  15. def _all(g, *args):
  16. input = g.op("Not", args[0])
  17. # aten::all(Tensor self)
  18. if len(args) == 1:
  19. return g.op("Not", _any(g, input))
  20. # aten::all(Tensor self, int dim, bool keepdim)
  21. else:
  22. return g.op("Not", _any(g, input, args[1], args[2]))
报错13:RuntimeError: Exporting the operator __iand_ to ONNX opset version 11 is not supported.

算子不支持,这个算子找了好久,定位到mmdetection3d/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py的80行,意思是mask &= ......相与操作‘&’有问题,替换为:

mask = (mask.float()*((final_box_preds[..., :3] <= self.post_center_range[3:]).all(1)).float()).bool()



虽然转好了onnx,但是可以看到输出很多警告信息,实际上这个onnx可能还是有点问题的,我们先用onnx simplifier包优化一下:

  1. import onnx
  2. import onnxsim
  3. onnx_path = '/×××/mmdetection3d/BEVFormer/ckpts/bevformer_tiny.onnx'
  4. model_onnx = onnx.load(onnx_path) # load onnx model
  5. onnx.checker.check_model(model_onnx) # check onnx model
  6. print(onnx.helper.printable_graph(model_onnx.graph)) # print
  7. sim_onnx_path = '/×××/mmdetection3d/BEVFormer/ckpts/bevformer_tiny_sim.onnx'
  8. print(f'simplifying with onnx-simplifier {onnxsim.__version__}...')
  9. model_onnx, check = onnxsim.simplify(model_onnx, check_n=3,skip_shape_inference=True)
  10. assert check, 'assert check failed'
  11. onnx.save(model_onnx, sim_onnx_path)

 报错1:onnxruntime.capi.onnxruntime_pybind11_state.InvalidGraph: [ONNXRuntimeError] : 10 : INVALID_GRAPH : This is an invalid model. Type Error: Type 'tensor(int64)' of input parameter (8733) of operator (Clip) in node (Clip_7993) is invalid.



import torch.nn.functional


w_q, w_k, w_v = w.chunk(3)


w_q, w_k, w_v = w.split(int(w.shape[0]/3))


b_q, b_k, b_v = b.chunk(3)


b_q, b_k, b_v = b.split(int(b.shape[0]/3))

 另外,在SpatialCrossAttention的forward中的有一行 count = torch.clamp(count, min=1.0)




  1. def inverse_sigmoid(x, eps=1e-5):
  2. """Inverse function of sigmoid.
  3. Args:
  4. x (Tensor): The tensor to do the
  5. inverse.
  6. eps (float): EPS avoid numerical
  7. overflow. Defaults 1e-5.
  8. Returns:
  9. Tensor: The x has passed the inverse
  10. function of sigmoid, has same
  11. shape with input.
  12. """
  13. #x = x.clamp(min=0, max=1)
  14. x[x<0] = 0
  15. x[x>1] = 1
  16. #x1 = x#.clamp(min=eps)
  17. x1 = x.clone()
  18. x1[x1<eps] = eps
  19. #x2 = (1 - x).clamp(min=eps)
  20. x2 = (1-x).clone()
  21. x2[x2<eps] = eps
  22. #return x1# / x2
  23. return torch.log(x1 / x2)


报错2:onnxruntime.capi.onnxruntime_pybind11_state.InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Non-zero status code returned while running Expand node. Name:'Expand_1855' Status Message: invalid expand shape



queries_rebatch[j, i, :len(index_query_per_img)] = query[j, index_query_per_img]


queries_rebatch[j, i, :len(index_query_per_img)] = query[j, np.array(index_query_per_img)]


reference_points_rebatch[j, i, :len(index_query_per_img)] = reference_points_per_img[j, index_query_per_img]


reference_points_rebatch[j, i, :len(index_query_per_img)] = reference_points_per_img[j, np.array(index_query_per_img)]


slots[j, index_query_per_img] += queries[j, i, :len(index_query_per_img)]


index_query_per_img = np.array(index_query_per_img)

报错3:Tensor 18362 changes after optimization. The max diff is 2.288818359375e-05.

 提示优化结果有偏差,初步定位了一下位置,发现在后处理部分,也就是bevformer.py的self.pts_bbox_head.get_bboxes,暂且把这个去掉,让def simple_test_pts(self, x, img_metas, prev_bev=None, rescale=False):只输出outs,如下所示

  1. def simple_test_pts(self, x, img_metas, prev_bev=None, rescale=False):
  2. """Test function"""
  3. outs = self.pts_bbox_head(x, img_metas, prev_bev=prev_bev)
  4. return outs






