main.c 122 KB
Newer Older
1
2
3
/*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4
 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/module.h>
37
#include <linux/kernel.h>
38
39
40
41
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
42
#include <linux/slab.h>
43
#include <linux/io-mapping.h>
44
#include <linux/delay.h>
45
#include <linux/kmod.h>
46
#include <linux/etherdevice.h>
Jiri Pirko's avatar
Jiri Pirko committed
47
#include <net/devlink.h>
48

49
#include <uapi/rdma/mlx4-abi.h>
50
51
52
53
54
55
56
57
58
59
60
61
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>

#include "mlx4.h"
#include "fw.h"
#include "icm.h"

MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);

62
63
struct workqueue_struct *mlx4_wq;

64
65
#ifdef CONFIG_MLX4_DEBUG

66
int mlx4_debug_level; /* 0 by default */
67
68
69
70
71
72
73
module_param_named(debug_level, mlx4_debug_level, int, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");

#endif /* CONFIG_MLX4_DEBUG */

#ifdef CONFIG_PCI_MSI

74
static int msi_x = 1;
75
module_param(msi_x, int, 0444);
76
MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x");
77
78
79
80
81
82
83

#else /* CONFIG_PCI_MSI */

#define msi_x (0)

#endif /* CONFIG_PCI_MSI */

84
static uint8_t num_vfs[3] = {0, 0, 0};
85
static int num_vfs_argc;
86
module_param_array(num_vfs, byte, &num_vfs_argc, 0444);
87
88
89
90
MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
			  "num_vfs=port1,port2,port1+2");

static uint8_t probe_vf[3] = {0, 0, 0};
91
static int probe_vfs_argc;
92
93
94
module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
			   "probe_vf=port1,port2,port1+2");
95

96
static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
97
98
99
100
module_param_named(log_num_mgm_entry_size,
			mlx4_log_num_mgm_entry_size, int, 0444);
MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
					 " of qp per mcg, for example:"
101
					 " 10 gives 248.range: 7 <="
102
					 " log_num_mgm_entry_size <= 12."
103
104
					 " To activate device managed"
					 " flow steering when available, set to -1");
105

106
static bool enable_64b_cqe_eqe = true;
Or Gerlitz's avatar
Or Gerlitz committed
107
108
module_param(enable_64b_cqe_eqe, bool, 0444);
MODULE_PARM_DESC(enable_64b_cqe_eqe,
109
		 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
Or Gerlitz's avatar
Or Gerlitz committed
110

111
112
113
114
115
static bool enable_4k_uar;
module_param(enable_4k_uar, bool, 0444);
MODULE_PARM_DESC(enable_4k_uar,
		 "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)");

116
#define PF_CONTEXT_BEHAVIOUR_MASK	(MLX4_FUNC_CAP_64B_EQE_CQE | \
117
118
					 MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
					 MLX4_FUNC_CAP_DMFS_A0_STATIC)
119

120
121
#define RESET_PERSIST_MASK_FLAGS	(MLX4_FLAG_SRIOV)

122
static char mlx4_version[] =
123
	DRV_NAME ": Mellanox ConnectX core driver v"
124
	DRV_VERSION "\n";
125

126
static const struct mlx4_profile default_profile = {
127
	.num_qp		= 1 << 18,
128
	.num_srq	= 1 << 16,
129
	.rdmarc_per_qp	= 1 << 4,
130
131
	.num_cq		= 1 << 16,
	.num_mcg	= 1 << 13,
132
	.num_mpt	= 1 << 19,
133
	.num_mtt	= 1 << 20, /* It is really num mtt segements */
134
135
};

136
static const struct mlx4_profile low_mem_profile = {
137
138
139
140
141
142
143
144
145
	.num_qp		= 1 << 17,
	.num_srq	= 1 << 6,
	.rdmarc_per_qp	= 1 << 4,
	.num_cq		= 1 << 8,
	.num_mcg	= 1 << 8,
	.num_mpt	= 1 << 9,
	.num_mtt	= 1 << 7,
};

146
static int log_num_mac = 7;
147
148
149
150
151
152
module_param_named(log_num_mac, log_num_mac, int, 0444);
MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");

static int log_num_vlan;
module_param_named(log_num_vlan, log_num_vlan, int, 0444);
MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
153
154
/* Log2 max number of VLANs per ETH port (0-7) */
#define MLX4_LOG_NUM_VLANS 7
155
156
#define MLX4_MIN_LOG_NUM_VLANS 0
#define MLX4_MIN_LOG_NUM_MAC 1
157

158
static bool use_prio;
159
module_param_named(use_prio, use_prio, bool, 0444);
160
MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
161

162
int log_mtts_per_seg = ilog2(1);
163
module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
164
165
MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
		 "(0-7) (default: 0)");
166

167
static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
168
169
static int arr_argc = 2;
module_param_array(port_type_array, int, &arr_argc, 0444);
170
171
MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
				"1 for IB, 2 for Ethernet");
172
173
174
175
176
177
178

struct mlx4_port_config {
	struct list_head list;
	enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
	struct pci_dev *pdev;
};

179
180
static atomic_t pf_loading = ATOMIC_INIT(0);

181
182
183
184
185
186
187
188
189
190
191
192
193
194
static int mlx4_devlink_ierr_reset_get(struct devlink *devlink, u32 id,
				       struct devlink_param_gset_ctx *ctx)
{
	ctx->val.vbool = !!mlx4_internal_err_reset;
	return 0;
}

static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id,
				       struct devlink_param_gset_ctx *ctx)
{
	mlx4_internal_err_reset = ctx->val.vbool;
	return 0;
}

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id,
					    struct devlink_param_gset_ctx *ctx)
{
	struct mlx4_priv *priv = devlink_priv(devlink);
	struct mlx4_dev *dev = &priv->dev;

	ctx->val.vbool = dev->persist->crdump.snapshot_enable;
	return 0;
}

static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id,
					    struct devlink_param_gset_ctx *ctx)
{
	struct mlx4_priv *priv = devlink_priv(devlink);
	struct mlx4_dev *dev = &priv->dev;

	dev->persist->crdump.snapshot_enable = ctx->val.vbool;
	return 0;
}

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
static int
mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id,
			       union devlink_param_value val,
			       struct netlink_ext_ack *extack)
{
	u32 value = val.vu32;

	if (value < 1 || value > 128)
		return -ERANGE;

	if (!is_power_of_2(value)) {
		NL_SET_ERR_MSG_MOD(extack, "max_macs supported must be power of 2");
		return -EINVAL;
	}

	return 0;
}

enum mlx4_devlink_param_id {
	MLX4_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
	MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
	MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
};

static const struct devlink_param mlx4_devlink_params[] = {
	DEVLINK_PARAM_GENERIC(INT_ERR_RESET,
			      BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
			      mlx4_devlink_ierr_reset_get,
			      mlx4_devlink_ierr_reset_set, NULL),
	DEVLINK_PARAM_GENERIC(MAX_MACS,
			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
			      NULL, NULL, mlx4_devlink_max_macs_validate),
248
249
250
251
252
	DEVLINK_PARAM_GENERIC(REGION_SNAPSHOT,
			      BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
			      mlx4_devlink_crdump_snapshot_get,
			      mlx4_devlink_crdump_snapshot_set, NULL),
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
	DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
			     "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL,
			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
			     NULL, NULL, NULL),
	DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
			     "enable_4k_uar", DEVLINK_PARAM_TYPE_BOOL,
			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
			     NULL, NULL, NULL),
};

static void mlx4_devlink_set_params_init_values(struct devlink *devlink)
{
	union devlink_param_value value;

	value.vbool = !!mlx4_internal_err_reset;
268
269
270
	devlink_param_driverinit_value_set(devlink,
					   DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
					   value);
271
272

	value.vu32 = 1UL << log_num_mac;
273
274
275
	devlink_param_driverinit_value_set(devlink,
					   DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
					   value);
276
277

	value.vbool = enable_64b_cqe_eqe;
278
279
280
	devlink_param_driverinit_value_set(devlink,
					   MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
					   value);
281
282

	value.vbool = enable_4k_uar;
283
284
285
	devlink_param_driverinit_value_set(devlink,
					   MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
					   value);
286
287

	value.vbool = false;
288
289
290
	devlink_param_driverinit_value_set(devlink,
					   DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
					   value);
291
292
}

293
294
295
296
297
298
299
300
301
302
303
304
305
306
static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
					      struct mlx4_dev_cap *dev_cap)
{
	/* The reserved_uars is calculated by system page size unit.
	 * Therefore, adjustment is added when the uar page size is less
	 * than the system page size
	 */
	dev->caps.reserved_uars	=
		max_t(int,
		      mlx4_get_num_reserved_uar(dev),
		      dev_cap->reserved_uars /
			(1 << (PAGE_SHIFT - dev->uar_page_shift)));
}

307
308
int mlx4_check_port_params(struct mlx4_dev *dev,
			   enum mlx4_port_type *port_type)
309
310
311
{
	int i;

312
313
314
	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
		for (i = 0; i < dev->caps.num_ports - 1; i++) {
			if (port_type[i] != port_type[i + 1]) {
315
				mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
316
				return -EOPNOTSUPP;
317
			}
318
319
320
321
322
		}
	}

	for (i = 0; i < dev->caps.num_ports; i++) {
		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
323
324
			mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
				 i + 1);
325
			return -EOPNOTSUPP;
326
327
328
329
330
331
332
333
334
335
		}
	}
	return 0;
}

static void mlx4_set_port_mask(struct mlx4_dev *dev)
{
	int i;

	for (i = 1; i <= dev->caps.num_ports; ++i)
336
		dev->caps.port_mask[i] = dev->caps.port_type[i];
337
}
338

339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
enum {
	MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
};

static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
	int err = 0;
	struct mlx4_func func;

	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
		err = mlx4_QUERY_FUNC(dev, &func, 0);
		if (err) {
			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
			return err;
		}
		dev_cap->max_eqs = func.max_eq;
		dev_cap->reserved_eqs = func.rsvd_eqs;
		dev_cap->reserved_uars = func.rsvd_uars;
		err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
	}
	return err;
}

362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
{
	struct mlx4_caps *dev_cap = &dev->caps;

	/* FW not supporting or cancelled by user */
	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
	    !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
		return;

	/* Must have 64B CQE_EQE enabled by FW to use bigger stride
	 * When FW has NCSI it may decide not to report 64B CQE/EQEs
	 */
	if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
	    !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
		return;
	}

	if (cache_line_size() == 128 || cache_line_size() == 256) {
		mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
		/* Changing the real data inside CQE size to 32B */
		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;

		if (mlx4_is_master(dev))
			dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
	} else {
390
391
		if (cache_line_size() != 32  && cache_line_size() != 64)
			mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
392
393
394
395
396
		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
	}
}

Matan Barak's avatar
Matan Barak committed
397
398
399
400
401
402
403
404
405
406
407
408
409
410
static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
			  struct mlx4_port_cap *port_cap)
{
	dev->caps.vl_cap[port]	    = port_cap->max_vl;
	dev->caps.ib_mtu_cap[port]	    = port_cap->ib_mtu;
	dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
	dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
	/* set gid and pkey table operating lengths by default
	 * to non-sriov values
	 */
	dev->caps.gid_table_len[port]  = port_cap->max_gids;
	dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
	dev->caps.port_width_cap[port] = port_cap->max_port_width;
	dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
411
	dev->caps.max_tc_eth	       = port_cap->max_tc_eth;
Matan Barak's avatar
Matan Barak committed
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
	dev->caps.def_mac[port]        = port_cap->def_mac;
	dev->caps.supported_type[port] = port_cap->supported_port_types;
	dev->caps.suggested_type[port] = port_cap->suggested_type;
	dev->caps.default_sense[port] = port_cap->default_sense;
	dev->caps.trans_type[port]	    = port_cap->trans_type;
	dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
	dev->caps.wavelength[port]     = port_cap->wavelength;
	dev->caps.trans_code[port]     = port_cap->trans_code;

	return 0;
}

static int mlx4_dev_port(struct mlx4_dev *dev, int port,
			 struct mlx4_port_cap *port_cap)
{
	int err = 0;

	err = mlx4_QUERY_PORT(dev, port, port_cap);

	if (err)
		mlx4_err(dev, "QUERY_PORT command failed.\n");

	return err;
}

437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
{
	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
		return;

	if (mlx4_is_mfunc(dev)) {
		mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
		return;
	}

	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
		mlx4_dbg(dev,
			 "Keep FCS is not supported - Disabling Ignore FCS");
		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
		return;
	}
}

Matan Barak's avatar
Matan Barak committed
456
#define MLX4_A0_STEERING_TABLE_SIZE	256
457
static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
458
459
{
	int err;
460
	int i;
461
462
463

	err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
	if (err) {
464
		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
465
466
		return err;
	}
467
	mlx4_dev_cap_dump(dev, dev_cap);
468
469

	if (dev_cap->min_page_sz > PAGE_SIZE) {
470
		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
471
472
473
474
			 dev_cap->min_page_sz, PAGE_SIZE);
		return -ENODEV;
	}
	if (dev_cap->num_ports > MLX4_MAX_PORTS) {
475
		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
476
477
478
479
			 dev_cap->num_ports, MLX4_MAX_PORTS);
		return -ENODEV;
	}

480
	if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
481
		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
482
			 dev_cap->uar_size,
483
484
			 (unsigned long long)
			 pci_resource_len(dev->persist->pdev, 2));
485
486
487
488
		return -ENODEV;
	}

	dev->caps.num_ports	     = dev_cap->num_ports;
489
490
491
492
	dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
	dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
				      dev->caps.num_sys_eqs :
				      MLX4_MAX_EQ_NUM;
493
	for (i = 1; i <= dev->caps.num_ports; ++i) {
Matan Barak's avatar
Matan Barak committed
494
495
496
497
498
		err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
		if (err) {
			mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
			return err;
		}
499
500
	}

501
	dev->caps.uar_page_size	     = PAGE_SIZE;
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
	dev->caps.bf_reg_size	     = dev_cap->bf_reg_size;
	dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
	dev->caps.max_sq_sg	     = dev_cap->max_sq_sg;
	dev->caps.max_rq_sg	     = dev_cap->max_rq_sg;
	dev->caps.max_wqes	     = dev_cap->max_qp_sz;
	dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
	dev->caps.max_srq_wqes	     = dev_cap->max_srq_sz;
	dev->caps.max_srq_sge	     = dev_cap->max_rq_sg - 1;
	dev->caps.reserved_srqs	     = dev_cap->reserved_srqs;
	dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
	dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
	/*
	 * Subtract 1 from the limit because we need to allocate a
517
	 * spare CQE to enable resizing the CQ.
518
519
520
521
	 */
	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
522
	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
523
	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
524

525
	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
526
527
528
529
	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
					dev_cap->reserved_xrcds : 0;
	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
					dev_cap->max_xrcds : 0;
530
531
	dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;

532
	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
533
534
	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
	dev->caps.flags		     = dev_cap->flags;
535
	dev->caps.flags2	     = dev_cap->flags2;
536
537
	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
538
	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
Eli Cohen's avatar
Eli Cohen committed
539
	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
540
	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
541
542
	dev->caps.wol_port[1]          = dev_cap->wol_port[1];
	dev->caps.wol_port[2]          = dev_cap->wol_port[2];
543
	dev->caps.health_buffer_addrs  = dev_cap->health_buffer_addrs;
544

545
546
547
548
549
	/* Save uar page shift */
	if (!mlx4_is_slave(dev)) {
		/* Virtual PCI function needs to determine UAR page size from
		 * firmware. Only master PCI function can set the uar page size
		 */
550
		if (enable_4k_uar || !dev->persist->num_vfs)
551
552
553
554
			dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
		else
			dev->uar_page_shift = PAGE_SHIFT;

555
556
557
		mlx4_set_num_reserved_uars(dev, dev_cap);
	}

558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
		struct mlx4_init_hca_param hca_param;

		memset(&hca_param, 0, sizeof(hca_param));
		err = mlx4_QUERY_HCA(dev, &hca_param);
		/* Turn off PHV_EN flag in case phv_check_en is set.
		 * phv_check_en is a HW check that parse the packet and verify
		 * phv bit was reported correctly in the wqe. To allow QinQ
		 * PHV_EN flag should be set and phv_check_en must be cleared
		 * otherwise QinQ packets will be drop by the HW.
		 */
		if (err || hca_param.phv_check_en)
			dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN;
	}

573
574
	/* Sense port always allowed on supported devices for ConnectX-1 and -2 */
	if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
575
		dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
576
577
578
	/* Don't do sense port on multifunction devices (for now at least) */
	if (mlx4_is_mfunc(dev))
		dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
579

580
581
582
583
584
585
586
	if (mlx4_low_memory_profile()) {
		dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
		dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
	} else {
		dev->caps.log_num_macs  = log_num_mac;
		dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
	}
587
588

	for (i = 1; i <= dev->caps.num_ports; ++i) {
589
590
591
592
593
		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
		if (dev->caps.supported_type[i]) {
			/* if only ETH is supported - assign ETH */
			if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
594
			/* if only IB is supported, assign IB */
595
			else if (dev->caps.supported_type[i] ==
596
597
				 MLX4_PORT_TYPE_IB)
				dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
598
			else {
599
600
601
602
				/* if IB and ETH are supported, we set the port
				 * type according to user selection of port type;
				 * if user selected none, take the FW hint */
				if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
603
604
					dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
						MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
605
				else
606
					dev->caps.port_type[i] = port_type_array[i - 1];
607
608
			}
		}
609
610
611
612
613
614
		/*
		 * Link sensing is allowed on the port if 3 conditions are true:
		 * 1. Both protocols are supported on the port.
		 * 2. Different types are supported on the port
		 * 3. FW declared that it supports link sensing
		 */
615
		mlx4_priv(dev)->sense.sense_allowed[i] =
616
			((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
617
			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
618
			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
619

620
621
622
623
624
		/*
		 * If "default_sense" bit is set, we move the port to "AUTO" mode
		 * and perform sense_port FW command to try and set the correct
		 * port type from beginning
		 */
625
		if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
626
627
628
629
630
631
632
633
634
			enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
			dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
			mlx4_SENSE_PORT(dev, i, &sensed_port);
			if (sensed_port != MLX4_PORT_TYPE_NONE)
				dev->caps.port_type[i] = sensed_port;
		} else {
			dev->caps.possible_type[i] = dev->caps.port_type[i];
		}

Matan Barak's avatar
Matan Barak committed
635
636
		if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
			dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
637
			mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
638
639
				  i, 1 << dev->caps.log_num_macs);
		}
Matan Barak's avatar
Matan Barak committed
640
641
		if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
			dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
642
			mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
643
644
645
646
				  i, 1 << dev->caps.log_num_vlans);
		}
	}

647
648
649
650
651
652
653
654
	if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
	    (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
	    (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
		mlx4_warn(dev,
			  "Granular QoS per VF not supported with IB/Eth configuration\n");
		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
	}

655
	dev->caps.max_counters = dev_cap->max_counters;
656

657
658
659
660
661
662
663
	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
		(1 << dev->caps.log_num_macs) *
		(1 << dev->caps.log_num_vlans) *
		dev->caps.num_ports;
	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683

	if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
		dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
	else
		dev->caps.dmfs_high_rate_qpn_base =
			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];

	if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
		dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
		dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
	} else {
		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
		dev->caps.dmfs_high_rate_qpn_base =
			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
		dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
	}

684
685
	dev->caps.rl_caps = dev_cap->rl_caps;

Matan Barak's avatar
Matan Barak committed
686
	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
687
		dev->caps.dmfs_high_rate_qpn_range;
688
689
690
691
692
693

	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];

694
	dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
Or Gerlitz's avatar
Or Gerlitz committed
695

696
	if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
Or Gerlitz's avatar
Or Gerlitz committed
697
698
699
700
701
702
		if (dev_cap->flags &
		    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
			mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
		}
703
704
705
706
707
708
709
710

		if (dev_cap->flags2 &
		    (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
		     MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
			mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
		}
Or Gerlitz's avatar
Or Gerlitz committed
711
712
	}

713
	if ((dev->caps.flags &
Or Gerlitz's avatar
Or Gerlitz committed
714
715
716
717
	    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
	    mlx4_is_master(dev))
		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;

718
	if (!mlx4_is_slave(dev)) {
719
		mlx4_enable_cqe_eqe_stride(dev);
720
		dev->caps.alloc_res_qp_mask =
Matan Barak's avatar
Matan Barak committed
721
722
			(dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
			MLX4_RESERVE_A0_QP;
723
724
725
726
727
728
729
730

		if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
		    dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
			mlx4_warn(dev, "Old device ETS support detected\n");
			mlx4_warn(dev, "Consider upgrading device FW.\n");
			dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
		}

731
732
733
	} else {
		dev->caps.alloc_res_qp_mask = 0;
	}
734

735
736
	mlx4_enable_ignore_fcs(dev);

737
738
	return 0;
}
739

740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
/*The function checks if there are live vf, return the num of them*/
static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	struct mlx4_slave_state *s_state;
	int i;
	int ret = 0;

	for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
		s_state = &priv->mfunc.master.slave_state[i];
		if (s_state->active && s_state->last_cmd !=
		    MLX4_COMM_CMD_RESET) {
			mlx4_warn(dev, "%s: slave: %d is still active\n",
				  __func__, i);
			ret++;
		}
	}
	return ret;
}

760
761
762
int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
{
	u32 qk = MLX4_RESERVED_QKEY_BASE;
763
764
765

	if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
	    qpn < dev->phys_caps.base_proxy_sqpn)
766
767
		return -EINVAL;

768
	if (qpn >= dev->phys_caps.base_tunnel_sqpn)
769
		/* tunnel qp */
770
		qk += qpn - dev->phys_caps.base_tunnel_sqpn;
771
	else
772
		qk += qpn - dev->phys_caps.base_proxy_sqpn;
773
774
775
776
777
	*qkey = qk;
	return 0;
}
EXPORT_SYMBOL(mlx4_get_parav_qkey);

778
779
780
781
782
783
784
785
786
787
788
void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
{
	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);

	if (!mlx4_is_master(dev))
		return;

	priv->virt2phys_pkey[slave][port - 1][i] = val;
}
EXPORT_SYMBOL(mlx4_sync_pkey_table);

789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
{
	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);

	if (!mlx4_is_master(dev))
		return;

	priv->slave_node_guids[slave] = guid;
}
EXPORT_SYMBOL(mlx4_put_slave_node_guid);

__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
{
	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);

	if (!mlx4_is_master(dev))
		return 0;

	return priv->slave_node_guids[slave];
}
EXPORT_SYMBOL(mlx4_get_slave_node_guid);

811
int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
812
813
814
815
816
817
818
819
820
821
822
823
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	struct mlx4_slave_state *s_slave;

	if (!mlx4_is_master(dev))
		return 0;

	s_slave = &priv->mfunc.master.slave_state[slave];
	return !!s_slave->active;
}
EXPORT_SYMBOL(mlx4_is_slave_active);

824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
				       struct _rule_hw *eth_header)
{
	if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
	    is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
		struct mlx4_net_trans_rule_hw_eth *eth =
			(struct mlx4_net_trans_rule_hw_eth *)eth_header;
		struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
		bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
			next_rule->rsvd == 0;

		if (last_rule)
			ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
	}
}
EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio);

841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
static void slave_adjust_steering_mode(struct mlx4_dev *dev,
				       struct mlx4_dev_cap *dev_cap,
				       struct mlx4_init_hca_param *hca_param)
{
	dev->caps.steering_mode = hca_param->steering_mode;
	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
		dev->caps.fs_log_max_ucast_qp_range_size =
			dev_cap->fs_log_max_ucast_qp_range_size;
	} else
		dev->caps.num_qp_per_mgm =
			4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);

	mlx4_dbg(dev, "Steering mode is: %s\n",
		 mlx4_steering_mode_str(dev->caps.steering_mode));
}

858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev)
{
	kfree(dev->caps.spec_qps);
	dev->caps.spec_qps = NULL;
}

static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev)
{
	struct mlx4_func_cap *func_cap = NULL;
	struct mlx4_caps *caps = &dev->caps;
	int i, err = 0;

	func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
	caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL);

	if (!func_cap || !caps->spec_qps) {
		mlx4_err(dev, "Failed to allocate memory for special qps cap\n");
		err = -ENOMEM;
		goto err_mem;
	}

	for (i = 1; i <= caps->num_ports; ++i) {
		err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap);
		if (err) {
			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
				 i, err);
			goto err_mem;
		}
		caps->spec_qps[i - 1] = func_cap->spec_qps;
		caps->port_mask[i] = caps->port_type[i];
		caps->phys_port_id[i] = func_cap->phys_port_id;
		err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
						      &caps->gid_table_len[i],
						      &caps->pkey_table_len[i]);
		if (err) {
			mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n",
				 i, err);
			goto err_mem;
		}
	}

err_mem:
	if (err)
		mlx4_slave_destroy_special_qp_cap(dev);
	kfree(func_cap);
	return err;
}

906
907
908
909
static int mlx4_slave_cap(struct mlx4_dev *dev)
{
	int			   err;
	u32			   page_size;
910
911
912
913
914
915
916
917
918
919
920
921
	struct mlx4_dev_cap	   *dev_cap = NULL;
	struct mlx4_func_cap	   *func_cap = NULL;
	struct mlx4_init_hca_param *hca_param = NULL;

	hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL);
	func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
	dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
	if (!hca_param || !func_cap || !dev_cap) {
		mlx4_err(dev, "Failed to allocate memory for slave_cap\n");
		err = -ENOMEM;
		goto free_mem;
	}
922

923
	err = mlx4_QUERY_HCA(dev, hca_param);
924
	if (err) {
925
		mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
926
		goto free_mem;
927
928
	}

929
930
931
	/* fail if the hca has an unknown global capability
	 * at this time global_caps should be always zeroed
	 */
932
	if (hca_param->global_caps) {
933
		mlx4_err(dev, "Unknown hca global capabilities\n");
934
935
		err = -EINVAL;
		goto free_mem;
936
937
	}

938
	dev->caps.hca_core_clock = hca_param->hca_core_clock;
939

940
941
	dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp;
	err = mlx4_dev_cap(dev, dev_cap);
942
	if (err) {
943
		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
944
		goto free_mem;
945
946
	}

947
948
	err = mlx4_QUERY_FW(dev);
	if (err)
949
		mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
950

951
952
953
	page_size = ~dev->caps.page_size_cap + 1;
	mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
	if (page_size > PAGE_SIZE) {
954
		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
955
			 page_size, PAGE_SIZE);
956
957
		err = -ENODEV;
		goto free_mem;
958
959
	}

960
	/* Set uar_page_shift for VF */
961
	dev->uar_page_shift = hca_param->uar_page_sz + 12;
962

963
964
965
966
	/* Make sure the master uar page size is valid */
	if (dev->uar_page_shift > PAGE_SHIFT) {
		mlx4_err(dev,
			 "Invalid configuration: uar page size is larger than system page size\n");
967
968
		err = -ENODEV;
		goto free_mem;
969
970
	}

971
	/* Set reserved_uars based on the uar_page_shift */
972
	mlx4_set_num_reserved_uars(dev, dev_cap);
973
974
975
976
977
978
979

	/* Although uar page size in FW differs from system page size,
	 * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
	 * still works with assumption that uar page size == system page size
	 */
	dev->caps.uar_page_size = PAGE_SIZE;

980
	err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap);
981
	if (err) {
982
983
		mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
			 err);
984
		goto free_mem;
985
986
	}

987
	if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
988
	    PF_CONTEXT_BEHAVIOUR_MASK) {
989
		mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
			 func_cap->pf_context_behaviour,
			 PF_CONTEXT_BEHAVIOUR_MASK);
		err = -EINVAL;
		goto free_mem;
	}

	dev->caps.num_ports		= func_cap->num_ports;
	dev->quotas.qp			= func_cap->qp_quota;
	dev->quotas.srq			= func_cap->srq_quota;
	dev->quotas.cq			= func_cap->cq_quota;
	dev->quotas.mpt			= func_cap->mpt_quota;
	dev->quotas.mtt			= func_cap->mtt_quota;
	dev->caps.num_qps		= 1 << hca_param->log_num_qps;
	dev->caps.num_srqs		= 1 << hca_param->log_num_srqs;
	dev->caps.num_cqs		= 1 << hca_param->log_num_cqs;
	dev->caps.num_mpts		= 1 << hca_param->log_mpt_sz;
	dev->caps.num_eqs		= func_cap->max_eq;
	dev->caps.reserved_eqs		= func_cap->reserved_eq;
	dev->caps.reserved_lkey		= func_cap->reserved_lkey;
1009
1010
1011
1012
1013
	dev->caps.num_pds               = MLX4_NUM_PDS;
	dev->caps.num_mgms              = 0;
	dev->caps.num_amgms             = 0;

	if (dev->caps.num_ports > MLX4_MAX_PORTS) {
1014
1015
		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
			 dev->caps.num_ports, MLX4_MAX_PORTS);
1016
1017
		err = -ENODEV;
		goto free_mem;
1018
1019
	}

1020
1021
	mlx4_replace_zero_macs(dev);

1022
1023
1024
1025
	err = mlx4_slave_special_qp_cap(dev);
	if (err) {
		mlx4_err(dev, "Set special QP caps failed. aborting\n");
		goto free_mem;
1026
	}
1027

1028
1029
	if (dev->caps.uar_page_size * (dev->caps.num_uars -
				       dev->caps.reserved_uars) >
1030
1031
				       pci_resource_len(dev->persist->pdev,
							2)) {
1032
		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
1033
			 dev->caps.uar_page_size * dev->caps.num_uars,
1034
1035
			 (unsigned long long)
			 pci_resource_len(dev->persist->pdev, 2));
1036
		err = -ENOMEM;
1037
		goto err_mem;
1038
1039
	}

1040
	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
Or Gerlitz's avatar
Or Gerlitz committed
1041
1042
1043
1044
1045
1046
1047
		dev->caps.eqe_size   = 64;
		dev->caps.eqe_factor = 1;
	} else {
		dev->caps.eqe_size   = 32;
		dev->caps.eqe_factor = 0;
	}

1048
	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
Or Gerlitz's avatar
Or Gerlitz committed
1049
		dev->caps.cqe_size   = 64;
1050
		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
Or Gerlitz's avatar
Or Gerlitz committed
1051
1052
1053
1054
	} else {
		dev->caps.cqe_size   = 32;
	}

1055
1056
	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
		dev->caps.eqe_size = hca_param->eqe_size;
1057
1058
1059
		dev->caps.eqe_factor = 0;
	}

1060
1061
	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
		dev->caps.cqe_size = hca_param->cqe_size;
1062
1063
1064
1065
		/* User still need to know when CQE > 32B */
		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
	}

1066
	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1067
	mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
1068

1069
1070
1071
	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN;
	mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n");

1072
	slave_adjust_steering_mode(dev, dev_cap, hca_param);
1073
	mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
1074
		 hca_param->rss_ip_frags ? "on" : "off");
1075

1076
	if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
1077
1078
1079
	    dev->caps.bf_reg_size)
		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;

1080
	if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
Matan Barak's avatar
Matan Barak committed
1081
1082
		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;

1083
err_mem:
1084
1085
1086
1087
1088
1089
	if (err)
		mlx4_slave_destroy_special_qp_cap(dev);
free_mem:
	kfree(hca_param);
	kfree(func_cap);
	kfree(dev_cap);
1090
	return err;
1091
}
1092

1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
static void mlx4_request_modules(struct mlx4_dev *dev)
{
	int port;
	int has_ib_port = false;
	int has_eth_port = false;
#define EN_DRV_NAME	"mlx4_en"
#define IB_DRV_NAME	"mlx4_ib"

	for (port = 1; port <= dev->caps.num_ports; port++) {
		if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
			has_ib_port = true;
		else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
			has_eth_port = true;
	}

	if (has_eth_port)
		request_module_nowait(EN_DRV_NAME);
1110
1111
	if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
		request_module_nowait(IB_DRV_NAME);
1112
1113
}

1114
1115
1116
1117
/*
 * Change the port configuration of the device.
 * Every user of this function must hold the port mutex.
 */
1118
1119
int mlx4_change_port_types(struct mlx4_dev *dev,
			   enum mlx4_port_type *port_types)
1120
1121
1122
1123
1124
1125
{
	int err = 0;
	int change = 0;
	int port;

	for (port = 0; port <  dev->caps.num_ports; port++) {
1126
1127
		/* Change the port type only if the new type is different
		 * from the current, and not set to Auto */
1128
		if (port_types[port] != dev->caps.port_type[port + 1])
1129
1130
1131
1132
1133
1134
			change = 1;
	}
	if (change) {
		mlx4_unregister_device(dev);
		for (port = 1; port <= dev->caps.num_ports; port++) {
			mlx4_CLOSE_PORT(dev, port);
1135
			dev->caps.port_type[port] = port_types[port - 1];
1136
			err = mlx4_SET_PORT(dev, port, -1);
1137
			if (err) {
1138
1139
				mlx4_err(dev, "Failed to set port %d, aborting\n",
					 port);
1140
1141
1142
1143
1144
				goto out;
			}
		}
		mlx4_set_port_mask(dev);
		err = mlx4_register_device(dev);
1145
1146
1147
1148
1149
		if (err) {
			mlx4_err(dev, "Failed to register device\n");
			goto out;
		}
		mlx4_request_modules(dev);
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
	}

out:
	return err;
}

static ssize_t show_port_type(struct device *dev,
			      struct device_attribute *attr,
			      char *buf)
{
	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
						   port_attr);
	struct mlx4_dev *mdev = info->dev;
1163
1164
1165
1166
1167
1168
1169
1170
1171
	char type[8];

	sprintf(type, "%s",
		(mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
		"ib" : "eth");
	if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
		sprintf(buf, "auto (%s)\n", type);
	else
		sprintf(buf, "%s\n", type);
1172

1173
	return strlen(buf);
1174
1175
}

1176
1177
static int __set_port_type(struct mlx4_port_info *info,
			   enum mlx4_port_type port_type)
1178
1179
1180
1181
{
	struct mlx4_dev *mdev = info->dev;
	struct mlx4_priv *priv = mlx4_priv(mdev);
	enum mlx4_port_type types[MLX4_MAX_PORTS];
1182
	enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1183
1184
1185
	int i;
	int err = 0;

1186
1187
1188
1189
	if ((port_type & mdev->caps.supported_type[info->port]) != port_type) {
		mlx4_err(mdev,
			 "Requested port type for port %d is not supported on this HCA\n",
			 info->port);
1190
		return -EOPNOTSUPP;
1191
1192
	}

1193
	mlx4_stop_sense(mdev);
1194
	mutex_lock(&priv->port_mutex);
1195
1196
	info->tmp_type = port_type;

1197
1198
1199
1200
	/* Possible type is always the one that was delivered */
	mdev->caps.possible_type[info->port] = info->tmp_type;

	for (i = 0; i < mdev->caps.num_ports; i++) {
1201
		types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1202
1203
1204
1205
					mdev->caps.possible_type[i+1];
		if (types[i] == MLX4_PORT_TYPE_AUTO)
			types[i] = mdev->caps.port_type[i+1];
	}
1206

1207
1208
	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
	    !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1209
1210
1211
		for (i = 1; i <= mdev->caps.num_ports; i++) {
			if (mdev->caps.possible_type