1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2007-2012 Intel Corporation. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Copyright 2013, Nexenta Systems, Inc. All rights reserved.
29 */
30
31 #include "igb_sw.h"
32
33 static char ident[] = "Intel 1Gb Ethernet";
34 static char igb_version[] = "igb 1.1.18";
35
36 /*
37 * Local function protoypes
38 */
39 static int igb_register_mac(igb_t *);
40 static int igb_identify_hardware(igb_t *);
41 static int igb_regs_map(igb_t *);
42 static void igb_init_properties(igb_t *);
43 static int igb_init_driver_settings(igb_t *);
44 static void igb_init_locks(igb_t *);
45 static void igb_destroy_locks(igb_t *);
46 static int igb_init_mac_address(igb_t *);
47 static int igb_init(igb_t *);
48 static int igb_init_adapter(igb_t *);
49 static void igb_stop_adapter(igb_t *);
50 static int igb_reset(igb_t *);
51 static void igb_tx_clean(igb_t *);
52 static boolean_t igb_tx_drain(igb_t *);
53 static boolean_t igb_rx_drain(igb_t *);
54 static int igb_alloc_rings(igb_t *);
55 static int igb_alloc_rx_data(igb_t *);
56 static void igb_free_rx_data(igb_t *);
57 static void igb_free_rings(igb_t *);
58 static void igb_setup_rings(igb_t *);
59 static void igb_setup_rx(igb_t *);
60 static void igb_setup_tx(igb_t *);
61 static void igb_setup_rx_ring(igb_rx_ring_t *);
62 static void igb_setup_tx_ring(igb_tx_ring_t *);
63 static void igb_setup_rss(igb_t *);
64 static void igb_setup_mac_rss_classify(igb_t *);
65 static void igb_setup_mac_classify(igb_t *);
66 static void igb_init_unicst(igb_t *);
67 static void igb_setup_multicst(igb_t *);
68 static void igb_get_phy_state(igb_t *);
69 static void igb_param_sync(igb_t *);
70 static void igb_get_conf(igb_t *);
71 static int igb_get_prop(igb_t *, char *, int, int, int);
72 static boolean_t igb_is_link_up(igb_t *);
73 static boolean_t igb_link_check(igb_t *);
74 static void igb_local_timer(void *);
75 static void igb_link_timer(void *);
76 static void igb_arm_watchdog_timer(igb_t *);
77 static void igb_start_watchdog_timer(igb_t *);
78 static void igb_restart_watchdog_timer(igb_t *);
79 static void igb_stop_watchdog_timer(igb_t *);
80 static void igb_start_link_timer(igb_t *);
81 static void igb_stop_link_timer(igb_t *);
82 static void igb_disable_adapter_interrupts(igb_t *);
83 static void igb_enable_adapter_interrupts_82575(igb_t *);
84 static void igb_enable_adapter_interrupts_82576(igb_t *);
85 static void igb_enable_adapter_interrupts_82580(igb_t *);
86 static boolean_t is_valid_mac_addr(uint8_t *);
87 static boolean_t igb_stall_check(igb_t *);
88 static boolean_t igb_set_loopback_mode(igb_t *, uint32_t);
89 static void igb_set_external_loopback(igb_t *);
90 static void igb_set_internal_phy_loopback(igb_t *);
91 static void igb_set_internal_serdes_loopback(igb_t *);
92 static boolean_t igb_find_mac_address(igb_t *);
93 static int igb_alloc_intrs(igb_t *);
94 static int igb_alloc_intr_handles(igb_t *, int);
95 static int igb_add_intr_handlers(igb_t *);
96 static void igb_rem_intr_handlers(igb_t *);
97 static void igb_rem_intrs(igb_t *);
98 static int igb_enable_intrs(igb_t *);
99 static int igb_disable_intrs(igb_t *);
100 static void igb_setup_msix_82575(igb_t *);
101 static void igb_setup_msix_82576(igb_t *);
102 static void igb_setup_msix_82580(igb_t *);
103 static uint_t igb_intr_legacy(void *, void *);
104 static uint_t igb_intr_msi(void *, void *);
105 static uint_t igb_intr_rx(void *, void *);
106 static uint_t igb_intr_tx(void *, void *);
107 static uint_t igb_intr_tx_other(void *, void *);
108 static void igb_intr_rx_work(igb_rx_ring_t *);
109 static void igb_intr_tx_work(igb_tx_ring_t *);
110 static void igb_intr_link_work(igb_t *);
111 static void igb_get_driver_control(struct e1000_hw *);
112 static void igb_release_driver_control(struct e1000_hw *);
113
114 static int igb_attach(dev_info_t *, ddi_attach_cmd_t);
115 static int igb_detach(dev_info_t *, ddi_detach_cmd_t);
116 static int igb_resume(dev_info_t *);
117 static int igb_suspend(dev_info_t *);
118 static int igb_quiesce(dev_info_t *);
119 static void igb_unconfigure(dev_info_t *, igb_t *);
120 static int igb_fm_error_cb(dev_info_t *, ddi_fm_error_t *,
121 const void *);
122 static void igb_fm_init(igb_t *);
123 static void igb_fm_fini(igb_t *);
124 static void igb_release_multicast(igb_t *);
125
126 char *igb_priv_props[] = {
127 "_eee_support",
128 "_tx_copy_thresh",
129 "_tx_recycle_thresh",
130 "_tx_overload_thresh",
131 "_tx_resched_thresh",
132 "_rx_copy_thresh",
133 "_rx_limit_per_intr",
134 "_intr_throttling",
135 "_adv_pause_cap",
136 "_adv_asym_pause_cap",
137 NULL
138 };
139
140 static struct cb_ops igb_cb_ops = {
141 nulldev, /* cb_open */
142 nulldev, /* cb_close */
143 nodev, /* cb_strategy */
144 nodev, /* cb_print */
145 nodev, /* cb_dump */
146 nodev, /* cb_read */
147 nodev, /* cb_write */
148 nodev, /* cb_ioctl */
149 nodev, /* cb_devmap */
150 nodev, /* cb_mmap */
151 nodev, /* cb_segmap */
152 nochpoll, /* cb_chpoll */
153 ddi_prop_op, /* cb_prop_op */
154 NULL, /* cb_stream */
155 D_MP | D_HOTPLUG, /* cb_flag */
156 CB_REV, /* cb_rev */
157 nodev, /* cb_aread */
158 nodev /* cb_awrite */
159 };
160
161 static struct dev_ops igb_dev_ops = {
162 DEVO_REV, /* devo_rev */
163 0, /* devo_refcnt */
164 NULL, /* devo_getinfo */
165 nulldev, /* devo_identify */
166 nulldev, /* devo_probe */
167 igb_attach, /* devo_attach */
168 igb_detach, /* devo_detach */
169 nodev, /* devo_reset */
170 &igb_cb_ops, /* devo_cb_ops */
171 NULL, /* devo_bus_ops */
172 ddi_power, /* devo_power */
173 igb_quiesce, /* devo_quiesce */
174 };
175
176 static struct modldrv igb_modldrv = {
177 &mod_driverops, /* Type of module. This one is a driver */
178 ident, /* Discription string */
179 &igb_dev_ops, /* driver ops */
180 };
181
182 static struct modlinkage igb_modlinkage = {
183 MODREV_1, &igb_modldrv, NULL
184 };
185
186 /* Access attributes for register mapping */
187 ddi_device_acc_attr_t igb_regs_acc_attr = {
188 DDI_DEVICE_ATTR_V1,
189 DDI_STRUCTURE_LE_ACC,
190 DDI_STRICTORDER_ACC,
191 DDI_FLAGERR_ACC
192 };
193
194 #define IGB_M_CALLBACK_FLAGS \
195 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
196
197 static mac_callbacks_t igb_m_callbacks = {
198 IGB_M_CALLBACK_FLAGS,
199 igb_m_stat,
200 igb_m_start,
201 igb_m_stop,
202 igb_m_promisc,
203 igb_m_multicst,
204 NULL,
205 NULL,
206 NULL,
207 igb_m_ioctl,
208 igb_m_getcapab,
209 NULL,
210 NULL,
211 igb_m_setprop,
212 igb_m_getprop,
213 igb_m_propinfo
214 };
215
216 /*
217 * Initialize capabilities of each supported adapter type
218 */
219 static adapter_info_t igb_82575_cap = {
220 /* limits */
221 4, /* maximum number of rx queues */
222 1, /* minimum number of rx queues */
223 4, /* default number of rx queues */
224 4, /* maximum number of tx queues */
225 1, /* minimum number of tx queues */
226 4, /* default number of tx queues */
227 65535, /* maximum interrupt throttle rate */
228 0, /* minimum interrupt throttle rate */
229 200, /* default interrupt throttle rate */
230
231 /* function pointers */
232 igb_enable_adapter_interrupts_82575,
233 igb_setup_msix_82575,
234
235 /* capabilities */
236 (IGB_FLAG_HAS_DCA | /* capability flags */
237 IGB_FLAG_VMDQ_POOL),
238
239 0xffc00000 /* mask for RXDCTL register */
240 };
241
242 static adapter_info_t igb_82576_cap = {
243 /* limits */
244 16, /* maximum number of rx queues */
245 1, /* minimum number of rx queues */
246 4, /* default number of rx queues */
247 16, /* maximum number of tx queues */
248 1, /* minimum number of tx queues */
249 4, /* default number of tx queues */
250 65535, /* maximum interrupt throttle rate */
251 0, /* minimum interrupt throttle rate */
252 200, /* default interrupt throttle rate */
253
254 /* function pointers */
255 igb_enable_adapter_interrupts_82576,
256 igb_setup_msix_82576,
257
258 /* capabilities */
259 (IGB_FLAG_HAS_DCA | /* capability flags */
260 IGB_FLAG_VMDQ_POOL |
261 IGB_FLAG_NEED_CTX_IDX),
262
263 0xffe00000 /* mask for RXDCTL register */
264 };
265
266 static adapter_info_t igb_82580_cap = {
267 /* limits */
268 8, /* maximum number of rx queues */
269 1, /* minimum number of rx queues */
270 4, /* default number of rx queues */
271 8, /* maximum number of tx queues */
272 1, /* minimum number of tx queues */
273 4, /* default number of tx queues */
274 65535, /* maximum interrupt throttle rate */
275 0, /* minimum interrupt throttle rate */
276 200, /* default interrupt throttle rate */
277
278 /* function pointers */
279 igb_enable_adapter_interrupts_82580,
280 igb_setup_msix_82580,
281
282 /* capabilities */
283 (IGB_FLAG_HAS_DCA | /* capability flags */
284 IGB_FLAG_VMDQ_POOL |
285 IGB_FLAG_NEED_CTX_IDX),
286
287 0xffe00000 /* mask for RXDCTL register */
288 };
289
290 static adapter_info_t igb_i350_cap = {
291 /* limits */
292 8, /* maximum number of rx queues */
293 1, /* minimum number of rx queues */
294 4, /* default number of rx queues */
295 8, /* maximum number of tx queues */
296 1, /* minimum number of tx queues */
297 4, /* default number of tx queues */
298 65535, /* maximum interrupt throttle rate */
299 0, /* minimum interrupt throttle rate */
300 200, /* default interrupt throttle rate */
301
302 /* function pointers */
303 igb_enable_adapter_interrupts_82580,
304 igb_setup_msix_82580,
305
306 /* capabilities */
307 (IGB_FLAG_HAS_DCA | /* capability flags */
308 IGB_FLAG_VMDQ_POOL |
309 IGB_FLAG_NEED_CTX_IDX),
310
311 0xffe00000 /* mask for RXDCTL register */
312 };
313
314 /*
315 * Module Initialization Functions
316 */
317
318 int
319 _init(void)
320 {
321 int status;
322
323 mac_init_ops(&igb_dev_ops, MODULE_NAME);
324
325 status = mod_install(&igb_modlinkage);
326
327 if (status != DDI_SUCCESS) {
328 mac_fini_ops(&igb_dev_ops);
329 }
330
331 return (status);
332 }
333
334 int
335 _fini(void)
336 {
337 int status;
338
339 status = mod_remove(&igb_modlinkage);
340
341 if (status == DDI_SUCCESS) {
342 mac_fini_ops(&igb_dev_ops);
343 }
344
345 return (status);
346
347 }
348
349 int
350 _info(struct modinfo *modinfop)
351 {
352 int status;
353
354 status = mod_info(&igb_modlinkage, modinfop);
355
356 return (status);
357 }
358
359 /*
360 * igb_attach - driver attach
361 *
362 * This function is the device specific initialization entry
363 * point. This entry point is required and must be written.
364 * The DDI_ATTACH command must be provided in the attach entry
365 * point. When attach() is called with cmd set to DDI_ATTACH,
366 * all normal kernel services (such as kmem_alloc(9F)) are
367 * available for use by the driver.
368 *
369 * The attach() function will be called once for each instance
370 * of the device on the system with cmd set to DDI_ATTACH.
371 * Until attach() succeeds, the only driver entry points which
372 * may be called are open(9E) and getinfo(9E).
373 */
374 static int
375 igb_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
376 {
377 igb_t *igb;
378 struct igb_osdep *osdep;
379 struct e1000_hw *hw;
380 int instance;
381
382 /*
383 * Check the command and perform corresponding operations
384 */
385 switch (cmd) {
386 default:
387 return (DDI_FAILURE);
388
389 case DDI_RESUME:
390 return (igb_resume(devinfo));
391
392 case DDI_ATTACH:
393 break;
394 }
395
396 /* Get the device instance */
397 instance = ddi_get_instance(devinfo);
398
399 /* Allocate memory for the instance data structure */
400 igb = kmem_zalloc(sizeof (igb_t), KM_SLEEP);
401
402 igb->dip = devinfo;
403 igb->instance = instance;
404
405 hw = &igb->hw;
406 osdep = &igb->osdep;
407 hw->back = osdep;
408 osdep->igb = igb;
409
410 /* Attach the instance pointer to the dev_info data structure */
411 ddi_set_driver_private(devinfo, igb);
412
413
414 /* Initialize for fma support */
415 igb->fm_capabilities = igb_get_prop(igb, "fm-capable",
416 0, 0x0f,
417 DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
418 DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
419 igb_fm_init(igb);
420 igb->attach_progress |= ATTACH_PROGRESS_FMINIT;
421
422 /*
423 * Map PCI config space registers
424 */
425 if (pci_config_setup(devinfo, &osdep->cfg_handle) != DDI_SUCCESS) {
426 igb_error(igb, "Failed to map PCI configurations");
427 goto attach_fail;
428 }
429 igb->attach_progress |= ATTACH_PROGRESS_PCI_CONFIG;
430
431 /*
432 * Identify the chipset family
433 */
434 if (igb_identify_hardware(igb) != IGB_SUCCESS) {
435 igb_error(igb, "Failed to identify hardware");
436 goto attach_fail;
437 }
438
439 /*
440 * Map device registers
441 */
442 if (igb_regs_map(igb) != IGB_SUCCESS) {
443 igb_error(igb, "Failed to map device registers");
444 goto attach_fail;
445 }
446 igb->attach_progress |= ATTACH_PROGRESS_REGS_MAP;
447
448 /*
449 * Initialize driver parameters
450 */
451 igb_init_properties(igb);
452 igb->attach_progress |= ATTACH_PROGRESS_PROPS;
453
454 /*
455 * Allocate interrupts
456 */
457 if (igb_alloc_intrs(igb) != IGB_SUCCESS) {
458 igb_error(igb, "Failed to allocate interrupts");
459 goto attach_fail;
460 }
461 igb->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR;
462
463 /*
464 * Allocate rx/tx rings based on the ring numbers.
465 * The actual numbers of rx/tx rings are decided by the number of
466 * allocated interrupt vectors, so we should allocate the rings after
467 * interrupts are allocated.
468 */
469 if (igb_alloc_rings(igb) != IGB_SUCCESS) {
470 igb_error(igb, "Failed to allocate rx/tx rings or groups");
471 goto attach_fail;
472 }
473 igb->attach_progress |= ATTACH_PROGRESS_ALLOC_RINGS;
474
475 /*
476 * Add interrupt handlers
477 */
478 if (igb_add_intr_handlers(igb) != IGB_SUCCESS) {
479 igb_error(igb, "Failed to add interrupt handlers");
480 goto attach_fail;
481 }
482 igb->attach_progress |= ATTACH_PROGRESS_ADD_INTR;
483
484 /*
485 * Initialize driver parameters
486 */
487 if (igb_init_driver_settings(igb) != IGB_SUCCESS) {
488 igb_error(igb, "Failed to initialize driver settings");
489 goto attach_fail;
490 }
491
492 if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK) {
493 ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
494 goto attach_fail;
495 }
496
497 /*
498 * Initialize mutexes for this device.
499 * Do this before enabling the interrupt handler and
500 * register the softint to avoid the condition where
501 * interrupt handler can try using uninitialized mutex
502 */
503 igb_init_locks(igb);
504 igb->attach_progress |= ATTACH_PROGRESS_LOCKS;
505
506 /*
507 * Initialize the adapter
508 */
509 if (igb_init(igb) != IGB_SUCCESS) {
510 igb_error(igb, "Failed to initialize adapter");
511 goto attach_fail;
512 }
513 igb->attach_progress |= ATTACH_PROGRESS_INIT_ADAPTER;
514
515 /*
516 * Initialize statistics
517 */
518 if (igb_init_stats(igb) != IGB_SUCCESS) {
519 igb_error(igb, "Failed to initialize statistics");
520 goto attach_fail;
521 }
522 igb->attach_progress |= ATTACH_PROGRESS_STATS;
523
524 /*
525 * Register the driver to the MAC
526 */
527 if (igb_register_mac(igb) != IGB_SUCCESS) {
528 igb_error(igb, "Failed to register MAC");
529 goto attach_fail;
530 }
531 igb->attach_progress |= ATTACH_PROGRESS_MAC;
532
533 /*
534 * Now that mutex locks are initialized, and the chip is also
535 * initialized, enable interrupts.
536 */
537 if (igb_enable_intrs(igb) != IGB_SUCCESS) {
538 igb_error(igb, "Failed to enable DDI interrupts");
539 goto attach_fail;
540 }
541 igb->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR;
542
543 igb_log(igb, "%s", igb_version);
544 atomic_or_32(&igb->igb_state, IGB_INITIALIZED);
545
546 /*
547 * Newer models have Energy Efficient Ethernet, let's disable this by
548 * default.
549 */
550 if (igb->hw.mac.type == e1000_i350)
551 (void) e1000_set_eee_i350(&igb->hw);
552
553 return (DDI_SUCCESS);
554
555 attach_fail:
556 igb_unconfigure(devinfo, igb);
557 return (DDI_FAILURE);
558 }
559
560 /*
561 * igb_detach - driver detach
562 *
563 * The detach() function is the complement of the attach routine.
564 * If cmd is set to DDI_DETACH, detach() is used to remove the
565 * state associated with a given instance of a device node
566 * prior to the removal of that instance from the system.
567 *
568 * The detach() function will be called once for each instance
569 * of the device for which there has been a successful attach()
570 * once there are no longer any opens on the device.
571 *
572 * Interrupts routine are disabled, All memory allocated by this
573 * driver are freed.
574 */
575 static int
576 igb_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
577 {
578 igb_t *igb;
579
580 /*
581 * Check detach command
582 */
583 switch (cmd) {
584 default:
585 return (DDI_FAILURE);
586
587 case DDI_SUSPEND:
588 return (igb_suspend(devinfo));
589
590 case DDI_DETACH:
591 break;
592 }
593
594
595 /*
596 * Get the pointer to the driver private data structure
597 */
598 igb = (igb_t *)ddi_get_driver_private(devinfo);
599 if (igb == NULL)
600 return (DDI_FAILURE);
601
602 /*
603 * Unregister MAC. If failed, we have to fail the detach
604 */
605 if (mac_unregister(igb->mac_hdl) != 0) {
606 igb_error(igb, "Failed to unregister MAC");
607 return (DDI_FAILURE);
608 }
609 igb->attach_progress &= ~ATTACH_PROGRESS_MAC;
610
611 /*
612 * If the device is still running, it needs to be stopped first.
613 * This check is necessary because under some specific circumstances,
614 * the detach routine can be called without stopping the interface
615 * first.
616 */
617 mutex_enter(&igb->gen_lock);
618 if (igb->igb_state & IGB_STARTED) {
619 atomic_and_32(&igb->igb_state, ~IGB_STARTED);
620 igb_stop(igb, B_TRUE);
621 mutex_exit(&igb->gen_lock);
622 /* Disable and stop the watchdog timer */
623 igb_disable_watchdog_timer(igb);
624 } else
625 mutex_exit(&igb->gen_lock);
626
627 /*
628 * Check if there are still rx buffers held by the upper layer.
629 * If so, fail the detach.
630 */
631 if (!igb_rx_drain(igb))
632 return (DDI_FAILURE);
633
634 /*
635 * Do the remaining unconfigure routines
636 */
637 igb_unconfigure(devinfo, igb);
638
639 return (DDI_SUCCESS);
640 }
641
642 /*
643 * quiesce(9E) entry point.
644 *
645 * This function is called when the system is single-threaded at high
646 * PIL with preemption disabled. Therefore, this function must not be
647 * blocked.
648 *
649 * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
650 * DDI_FAILURE indicates an error condition and should almost never happen.
651 */
652 static int
653 igb_quiesce(dev_info_t *devinfo)
654 {
655 igb_t *igb;
656 struct e1000_hw *hw;
657
658 igb = (igb_t *)ddi_get_driver_private(devinfo);
659
660 if (igb == NULL)
661 return (DDI_FAILURE);
662
663 hw = &igb->hw;
664
665 /*
666 * Disable the adapter interrupts
667 */
668 igb_disable_adapter_interrupts(igb);
669
670 /* Tell firmware driver is no longer in control */
671 igb_release_driver_control(hw);
672
673 /*
674 * Reset the chipset
675 */
676 (void) e1000_reset_hw(hw);
677
678 /*
679 * Reset PHY if possible
680 */
681 if (e1000_check_reset_block(hw) == E1000_SUCCESS)
682 (void) e1000_phy_hw_reset(hw);
683
684 return (DDI_SUCCESS);
685 }
686
687 /*
688 * igb_unconfigure - release all resources held by this instance
689 */
690 static void
691 igb_unconfigure(dev_info_t *devinfo, igb_t *igb)
692 {
693 /*
694 * Disable interrupt
695 */
696 if (igb->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
697 (void) igb_disable_intrs(igb);
698 }
699
700 /*
701 * Unregister MAC
702 */
703 if (igb->attach_progress & ATTACH_PROGRESS_MAC) {
704 (void) mac_unregister(igb->mac_hdl);
705 }
706
707 /*
708 * Free statistics
709 */
710 if (igb->attach_progress & ATTACH_PROGRESS_STATS) {
711 kstat_delete((kstat_t *)igb->igb_ks);
712 }
713
714 /*
715 * Remove interrupt handlers
716 */
717 if (igb->attach_progress & ATTACH_PROGRESS_ADD_INTR) {
718 igb_rem_intr_handlers(igb);
719 }
720
721 /*
722 * Remove interrupts
723 */
724 if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_INTR) {
725 igb_rem_intrs(igb);
726 }
727
728 /*
729 * Remove driver properties
730 */
731 if (igb->attach_progress & ATTACH_PROGRESS_PROPS) {
732 (void) ddi_prop_remove_all(devinfo);
733 }
734
735 /*
736 * Stop the adapter
737 */
738 if (igb->attach_progress & ATTACH_PROGRESS_INIT_ADAPTER) {
739 mutex_enter(&igb->gen_lock);
740 igb_stop_adapter(igb);
741 mutex_exit(&igb->gen_lock);
742 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
743 ddi_fm_service_impact(igb->dip, DDI_SERVICE_UNAFFECTED);
744 }
745
746 /*
747 * Free multicast table
748 */
749 igb_release_multicast(igb);
750
751 /*
752 * Free register handle
753 */
754 if (igb->attach_progress & ATTACH_PROGRESS_REGS_MAP) {
755 if (igb->osdep.reg_handle != NULL)
756 ddi_regs_map_free(&igb->osdep.reg_handle);
757 }
758
759 /*
760 * Free PCI config handle
761 */
762 if (igb->attach_progress & ATTACH_PROGRESS_PCI_CONFIG) {
763 if (igb->osdep.cfg_handle != NULL)
764 pci_config_teardown(&igb->osdep.cfg_handle);
765 }
766
767 /*
768 * Free locks
769 */
770 if (igb->attach_progress & ATTACH_PROGRESS_LOCKS) {
771 igb_destroy_locks(igb);
772 }
773
774 /*
775 * Free the rx/tx rings
776 */
777 if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_RINGS) {
778 igb_free_rings(igb);
779 }
780
781 /*
782 * Remove FMA
783 */
784 if (igb->attach_progress & ATTACH_PROGRESS_FMINIT) {
785 igb_fm_fini(igb);
786 }
787
788 /*
789 * Free the driver data structure
790 */
791 kmem_free(igb, sizeof (igb_t));
792
793 ddi_set_driver_private(devinfo, NULL);
794 }
795
796 /*
797 * igb_register_mac - Register the driver and its function pointers with
798 * the GLD interface
799 */
800 static int
801 igb_register_mac(igb_t *igb)
802 {
803 struct e1000_hw *hw = &igb->hw;
804 mac_register_t *mac;
805 int status;
806
807 if ((mac = mac_alloc(MAC_VERSION)) == NULL)
808 return (IGB_FAILURE);
809
810 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
811 mac->m_driver = igb;
812 mac->m_dip = igb->dip;
813 mac->m_src_addr = hw->mac.addr;
814 mac->m_callbacks = &igb_m_callbacks;
815 mac->m_min_sdu = 0;
816 mac->m_max_sdu = igb->max_frame_size -
817 sizeof (struct ether_vlan_header) - ETHERFCSL;
818 mac->m_margin = VLAN_TAGSZ;
819 mac->m_priv_props = igb_priv_props;
820 mac->m_v12n = MAC_VIRT_LEVEL1;
821
822 status = mac_register(mac, &igb->mac_hdl);
823
824 mac_free(mac);
825
826 return ((status == 0) ? IGB_SUCCESS : IGB_FAILURE);
827 }
828
829 /*
830 * igb_identify_hardware - Identify the type of the chipset
831 */
832 static int
833 igb_identify_hardware(igb_t *igb)
834 {
835 struct e1000_hw *hw = &igb->hw;
836 struct igb_osdep *osdep = &igb->osdep;
837
838 /*
839 * Get the device id
840 */
841 hw->vendor_id =
842 pci_config_get16(osdep->cfg_handle, PCI_CONF_VENID);
843 hw->device_id =
844 pci_config_get16(osdep->cfg_handle, PCI_CONF_DEVID);
845 hw->revision_id =
846 pci_config_get8(osdep->cfg_handle, PCI_CONF_REVID);
847 hw->subsystem_device_id =
848 pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBSYSID);
849 hw->subsystem_vendor_id =
850 pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBVENID);
851
852 /*
853 * Set the mac type of the adapter based on the device id
854 */
855 if (e1000_set_mac_type(hw) != E1000_SUCCESS) {
856 return (IGB_FAILURE);
857 }
858
859 /*
860 * Install adapter capabilities based on mac type
861 */
862 switch (hw->mac.type) {
863 case e1000_82575:
864 igb->capab = &igb_82575_cap;
865 break;
866 case e1000_82576:
867 igb->capab = &igb_82576_cap;
868 break;
869 case e1000_82580:
870 igb->capab = &igb_82580_cap;
871 break;
872 case e1000_i350:
873 igb->capab = &igb_i350_cap;
874 break;
875 default:
876 return (IGB_FAILURE);
877 }
878
879 return (IGB_SUCCESS);
880 }
881
882 /*
883 * igb_regs_map - Map the device registers
884 */
885 static int
886 igb_regs_map(igb_t *igb)
887 {
888 dev_info_t *devinfo = igb->dip;
889 struct e1000_hw *hw = &igb->hw;
890 struct igb_osdep *osdep = &igb->osdep;
891 off_t mem_size;
892
893 /*
894 * First get the size of device registers to be mapped.
895 */
896 if (ddi_dev_regsize(devinfo, IGB_ADAPTER_REGSET, &mem_size) !=
897 DDI_SUCCESS) {
898 return (IGB_FAILURE);
899 }
900
901 /*
902 * Call ddi_regs_map_setup() to map registers
903 */
904 if ((ddi_regs_map_setup(devinfo, IGB_ADAPTER_REGSET,
905 (caddr_t *)&hw->hw_addr, 0,
906 mem_size, &igb_regs_acc_attr,
907 &osdep->reg_handle)) != DDI_SUCCESS) {
908 return (IGB_FAILURE);
909 }
910
911 return (IGB_SUCCESS);
912 }
913
914 /*
915 * igb_init_properties - Initialize driver properties
916 */
917 static void
918 igb_init_properties(igb_t *igb)
919 {
920 /*
921 * Get conf file properties, including link settings
922 * jumbo frames, ring number, descriptor number, etc.
923 */
924 igb_get_conf(igb);
925 }
926
927 /*
928 * igb_init_driver_settings - Initialize driver settings
929 *
930 * The settings include hardware function pointers, bus information,
931 * rx/tx rings settings, link state, and any other parameters that
932 * need to be setup during driver initialization.
933 */
934 static int
935 igb_init_driver_settings(igb_t *igb)
936 {
937 struct e1000_hw *hw = &igb->hw;
938 igb_rx_ring_t *rx_ring;
939 igb_tx_ring_t *tx_ring;
940 uint32_t rx_size;
941 uint32_t tx_size;
942 int i;
943
944 /*
945 * Initialize chipset specific hardware function pointers
946 */
947 if (e1000_setup_init_funcs(hw, B_TRUE) != E1000_SUCCESS) {
948 return (IGB_FAILURE);
949 }
950
951 /*
952 * Get bus information
953 */
954 if (e1000_get_bus_info(hw) != E1000_SUCCESS) {
955 return (IGB_FAILURE);
956 }
957
958 /*
959 * Get the system page size
960 */
961 igb->page_size = ddi_ptob(igb->dip, (ulong_t)1);
962
963 /*
964 * Set rx buffer size
965 * The IP header alignment room is counted in the calculation.
966 * The rx buffer size is in unit of 1K that is required by the
967 * chipset hardware.
968 */
969 rx_size = igb->max_frame_size + IPHDR_ALIGN_ROOM;
970 igb->rx_buf_size = ((rx_size >> 10) +
971 ((rx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
972
973 /*
974 * Set tx buffer size
975 */
976 tx_size = igb->max_frame_size;
977 igb->tx_buf_size = ((tx_size >> 10) +
978 ((tx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
979
980 /*
981 * Initialize rx/tx rings parameters
982 */
983 for (i = 0; i < igb->num_rx_rings; i++) {
984 rx_ring = &igb->rx_rings[i];
985 rx_ring->index = i;
986 rx_ring->igb = igb;
987 }
988
989 for (i = 0; i < igb->num_tx_rings; i++) {
990 tx_ring = &igb->tx_rings[i];
991 tx_ring->index = i;
992 tx_ring->igb = igb;
993 if (igb->tx_head_wb_enable)
994 tx_ring->tx_recycle = igb_tx_recycle_head_wb;
995 else
996 tx_ring->tx_recycle = igb_tx_recycle_legacy;
997
998 tx_ring->ring_size = igb->tx_ring_size;
999 tx_ring->free_list_size = igb->tx_ring_size +
1000 (igb->tx_ring_size >> 1);
1001 }
1002
1003 /*
1004 * Initialize values of interrupt throttling rates
1005 */
1006 for (i = 1; i < MAX_NUM_EITR; i++)
1007 igb->intr_throttling[i] = igb->intr_throttling[0];
1008
1009 /*
1010 * The initial link state should be "unknown"
1011 */
1012 igb->link_state = LINK_STATE_UNKNOWN;
1013
1014 return (IGB_SUCCESS);
1015 }
1016
1017 /*
1018 * igb_init_locks - Initialize locks
1019 */
1020 static void
1021 igb_init_locks(igb_t *igb)
1022 {
1023 igb_rx_ring_t *rx_ring;
1024 igb_tx_ring_t *tx_ring;
1025 int i;
1026
1027 for (i = 0; i < igb->num_rx_rings; i++) {
1028 rx_ring = &igb->rx_rings[i];
1029 mutex_init(&rx_ring->rx_lock, NULL,
1030 MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1031 }
1032
1033 for (i = 0; i < igb->num_tx_rings; i++) {
1034 tx_ring = &igb->tx_rings[i];
1035 mutex_init(&tx_ring->tx_lock, NULL,
1036 MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1037 mutex_init(&tx_ring->recycle_lock, NULL,
1038 MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1039 mutex_init(&tx_ring->tcb_head_lock, NULL,
1040 MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1041 mutex_init(&tx_ring->tcb_tail_lock, NULL,
1042 MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1043 }
1044
1045 mutex_init(&igb->gen_lock, NULL,
1046 MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1047
1048 mutex_init(&igb->watchdog_lock, NULL,
1049 MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1050
1051 mutex_init(&igb->link_lock, NULL,
1052 MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1053 }
1054
1055 /*
1056 * igb_destroy_locks - Destroy locks
1057 */
1058 static void
1059 igb_destroy_locks(igb_t *igb)
1060 {
1061 igb_rx_ring_t *rx_ring;
1062 igb_tx_ring_t *tx_ring;
1063 int i;
1064
1065 for (i = 0; i < igb->num_rx_rings; i++) {
1066 rx_ring = &igb->rx_rings[i];
1067 mutex_destroy(&rx_ring->rx_lock);
1068 }
1069
1070 for (i = 0; i < igb->num_tx_rings; i++) {
1071 tx_ring = &igb->tx_rings[i];
1072 mutex_destroy(&tx_ring->tx_lock);
1073 mutex_destroy(&tx_ring->recycle_lock);
1074 mutex_destroy(&tx_ring->tcb_head_lock);
1075 mutex_destroy(&tx_ring->tcb_tail_lock);
1076 }
1077
1078 mutex_destroy(&igb->gen_lock);
1079 mutex_destroy(&igb->watchdog_lock);
1080 mutex_destroy(&igb->link_lock);
1081 }
1082
1083 static int
1084 igb_resume(dev_info_t *devinfo)
1085 {
1086 igb_t *igb;
1087
1088 igb = (igb_t *)ddi_get_driver_private(devinfo);
1089 if (igb == NULL)
1090 return (DDI_FAILURE);
1091
1092 mutex_enter(&igb->gen_lock);
1093
1094 /*
1095 * Enable interrupts
1096 */
1097 if (igb->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
1098 if (igb_enable_intrs(igb) != IGB_SUCCESS) {
1099 igb_error(igb, "Failed to enable DDI interrupts");
1100 mutex_exit(&igb->gen_lock);
1101 return (DDI_FAILURE);
1102 }
1103 }
1104
1105 if (igb->igb_state & IGB_STARTED) {
1106 if (igb_start(igb, B_FALSE) != IGB_SUCCESS) {
1107 mutex_exit(&igb->gen_lock);
1108 return (DDI_FAILURE);
1109 }
1110
1111 /*
1112 * Enable and start the watchdog timer
1113 */
1114 igb_enable_watchdog_timer(igb);
1115 }
1116
1117 atomic_and_32(&igb->igb_state, ~IGB_SUSPENDED);
1118
1119 mutex_exit(&igb->gen_lock);
1120
1121 return (DDI_SUCCESS);
1122 }
1123
1124 static int
1125 igb_suspend(dev_info_t *devinfo)
1126 {
1127 igb_t *igb;
1128
1129 igb = (igb_t *)ddi_get_driver_private(devinfo);
1130 if (igb == NULL)
1131 return (DDI_FAILURE);
1132
1133 mutex_enter(&igb->gen_lock);
1134
1135 atomic_or_32(&igb->igb_state, IGB_SUSPENDED);
1136
1137 /*
1138 * Disable interrupts
1139 */
1140 if (igb->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
1141 (void) igb_disable_intrs(igb);
1142 }
1143
1144 if (!(igb->igb_state & IGB_STARTED)) {
1145 mutex_exit(&igb->gen_lock);
1146 return (DDI_SUCCESS);
1147 }
1148
1149 igb_stop(igb, B_FALSE);
1150
1151 mutex_exit(&igb->gen_lock);
1152
1153 /*
1154 * Disable and stop the watchdog timer
1155 */
1156 igb_disable_watchdog_timer(igb);
1157
1158 return (DDI_SUCCESS);
1159 }
1160
1161 static int
1162 igb_init(igb_t *igb)
1163 {
1164 mutex_enter(&igb->gen_lock);
1165
1166 /*
1167 * Initilize the adapter
1168 */
1169 if (igb_init_adapter(igb) != IGB_SUCCESS) {
1170 mutex_exit(&igb->gen_lock);
1171 igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1172 ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1173 return (IGB_FAILURE);
1174 }
1175
1176 mutex_exit(&igb->gen_lock);
1177
1178 return (IGB_SUCCESS);
1179 }
1180
1181 /*
1182 * igb_init_mac_address - Initialize the default MAC address
1183 *
1184 * On success, the MAC address is entered in the igb->hw.mac.addr
1185 * and hw->mac.perm_addr fields and the adapter's RAR(0) receive
1186 * address register.
1187 *
1188 * Important side effects:
1189 * 1. adapter is reset - this is required to put it in a known state.
1190 * 2. all of non-volatile memory (NVM) is read & checksummed - NVM is where
1191 * MAC address and all default settings are stored, so a valid checksum
1192 * is required.
1193 */
1194 static int
1195 igb_init_mac_address(igb_t *igb)
1196 {
1197 struct e1000_hw *hw = &igb->hw;
1198
1199 ASSERT(mutex_owned(&igb->gen_lock));
1200
1201 /*
1202 * Reset chipset to put the hardware in a known state
1203 * before we try to get MAC address from NVM.
1204 */
1205 if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1206 igb_error(igb, "Adapter reset failed.");
1207 goto init_mac_fail;
1208 }
1209
1210 /*
1211 * NVM validation
1212 */
1213 if (e1000_validate_nvm_checksum(hw) < 0) {
1214 /*
1215 * Some PCI-E parts fail the first check due to
1216 * the link being in sleep state. Call it again,
1217 * if it fails a second time its a real issue.
1218 */
1219 if (e1000_validate_nvm_checksum(hw) < 0) {
1220 igb_error(igb,
1221 "Invalid NVM checksum. Please contact "
1222 "the vendor to update the NVM.");
1223 goto init_mac_fail;
1224 }
1225 }
1226
1227 /*
1228 * Get the mac address
1229 * This function should handle SPARC case correctly.
1230 */
1231 if (!igb_find_mac_address(igb)) {
1232 igb_error(igb, "Failed to get the mac address");
1233 goto init_mac_fail;
1234 }
1235
1236 /* Validate mac address */
1237 if (!is_valid_mac_addr(hw->mac.addr)) {
1238 igb_error(igb, "Invalid mac address");
1239 goto init_mac_fail;
1240 }
1241
1242 return (IGB_SUCCESS);
1243
1244 init_mac_fail:
1245 return (IGB_FAILURE);
1246 }
1247
1248 /*
1249 * igb_init_adapter - Initialize the adapter
1250 */
1251 static int
1252 igb_init_adapter(igb_t *igb)
1253 {
1254 struct e1000_hw *hw = &igb->hw;
1255 uint32_t pba;
1256 uint32_t high_water;
1257 int i;
1258
1259 ASSERT(mutex_owned(&igb->gen_lock));
1260
1261 /*
1262 * In order to obtain the default MAC address, this will reset the
1263 * adapter and validate the NVM that the address and many other
1264 * default settings come from.
1265 */
1266 if (igb_init_mac_address(igb) != IGB_SUCCESS) {
1267 igb_error(igb, "Failed to initialize MAC address");
1268 goto init_adapter_fail;
1269 }
1270
1271 /*
1272 * Setup flow control
1273 *
1274 * These parameters set thresholds for the adapter's generation(Tx)
1275 * and response(Rx) to Ethernet PAUSE frames. These are just threshold
1276 * settings. Flow control is enabled or disabled in the configuration
1277 * file.
1278 * High-water mark is set down from the top of the rx fifo (not
1279 * sensitive to max_frame_size) and low-water is set just below
1280 * high-water mark.
1281 * The high water mark must be low enough to fit one full frame above
1282 * it in the rx FIFO. Should be the lower of:
1283 * 90% of the Rx FIFO size, or the full Rx FIFO size minus one full
1284 * frame.
1285 */
1286 /*
1287 * The default setting of PBA is correct for 82575 and other supported
1288 * adapters do not have the E1000_PBA register, so PBA value is only
1289 * used for calculation here and is never written to the adapter.
1290 */
1291 if (hw->mac.type == e1000_82575) {
1292 pba = E1000_PBA_34K;
1293 } else {
1294 pba = E1000_PBA_64K;
1295 }
1296
1297 high_water = min(((pba << 10) * 9 / 10),
1298 ((pba << 10) - igb->max_frame_size));
1299
1300 if (hw->mac.type == e1000_82575) {
1301 /* 8-byte granularity */
1302 hw->fc.high_water = high_water & 0xFFF8;
1303 hw->fc.low_water = hw->fc.high_water - 8;
1304 } else {
1305 /* 16-byte granularity */
1306 hw->fc.high_water = high_water & 0xFFF0;
1307 hw->fc.low_water = hw->fc.high_water - 16;
1308 }
1309
1310 hw->fc.pause_time = E1000_FC_PAUSE_TIME;
1311 hw->fc.send_xon = B_TRUE;
1312
1313 (void) e1000_validate_mdi_setting(hw);
1314
1315 /*
1316 * Reset the chipset hardware the second time to put PBA settings
1317 * into effect.
1318 */
1319 if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1320 igb_error(igb, "Second reset failed");
1321 goto init_adapter_fail;
1322 }
1323
1324 /*
1325 * Don't wait for auto-negotiation to complete
1326 */
1327 hw->phy.autoneg_wait_to_complete = B_FALSE;
1328
1329 /*
1330 * Copper options
1331 */
1332 if (hw->phy.media_type == e1000_media_type_copper) {
1333 hw->phy.mdix = 0; /* AUTO_ALL_MODES */
1334 hw->phy.disable_polarity_correction = B_FALSE;
1335 hw->phy.ms_type = e1000_ms_hw_default; /* E1000_MASTER_SLAVE */
1336 }
1337
1338 /*
1339 * Initialize link settings
1340 */
1341 (void) igb_setup_link(igb, B_FALSE);
1342
1343 /*
1344 * Configure/Initialize hardware
1345 */
1346 if (e1000_init_hw(hw) != E1000_SUCCESS) {
1347 igb_error(igb, "Failed to initialize hardware");
1348 goto init_adapter_fail;
1349 }
1350
1351 /*
1352 * Start the link setup timer
1353 */
1354 igb_start_link_timer(igb);
1355
1356 /*
1357 * Disable wakeup control by default
1358 */
1359 E1000_WRITE_REG(hw, E1000_WUC, 0);
1360
1361 /*
1362 * Record phy info in hw struct
1363 */
1364 (void) e1000_get_phy_info(hw);
1365
1366 /*
1367 * Make sure driver has control
1368 */
1369 igb_get_driver_control(hw);
1370
1371 /*
1372 * Restore LED settings to the default from EEPROM
1373 * to meet the standard for Sun platforms.
1374 */
1375 (void) e1000_cleanup_led(hw);
1376
1377 /*
1378 * Setup MSI-X interrupts
1379 */
1380 if (igb->intr_type == DDI_INTR_TYPE_MSIX)
1381 igb->capab->setup_msix(igb);
1382
1383 /*
1384 * Initialize unicast addresses.
1385 */
1386 igb_init_unicst(igb);
1387
1388 /*
1389 * Setup and initialize the mctable structures.
1390 */
1391 igb_setup_multicst(igb);
1392
1393 /*
1394 * Set interrupt throttling rate
1395 */
1396 for (i = 0; i < igb->intr_cnt; i++)
1397 E1000_WRITE_REG(hw, E1000_EITR(i), igb->intr_throttling[i]);
1398
1399 /*
1400 * Save the state of the phy
1401 */
1402 igb_get_phy_state(igb);
1403
1404 igb_param_sync(igb);
1405
1406 return (IGB_SUCCESS);
1407
1408 init_adapter_fail:
1409 /*
1410 * Reset PHY if possible
1411 */
1412 if (e1000_check_reset_block(hw) == E1000_SUCCESS)
1413 (void) e1000_phy_hw_reset(hw);
1414
1415 return (IGB_FAILURE);
1416 }
1417
1418 /*
1419 * igb_stop_adapter - Stop the adapter
1420 */
1421 static void
1422 igb_stop_adapter(igb_t *igb)
1423 {
1424 struct e1000_hw *hw = &igb->hw;
1425
1426 ASSERT(mutex_owned(&igb->gen_lock));
1427
1428 /* Stop the link setup timer */
1429 igb_stop_link_timer(igb);
1430
1431 /* Tell firmware driver is no longer in control */
1432 igb_release_driver_control(hw);
1433
1434 /*
1435 * Reset the chipset
1436 */
1437 if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1438 igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1439 ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1440 }
1441
1442 /*
1443 * e1000_phy_hw_reset is not needed here, MAC reset above is sufficient
1444 */
1445 }
1446
1447 /*
1448 * igb_reset - Reset the chipset and restart the driver.
1449 *
1450 * It involves stopping and re-starting the chipset,
1451 * and re-configuring the rx/tx rings.
1452 */
1453 static int
1454 igb_reset(igb_t *igb)
1455 {
1456 int i;
1457
1458 mutex_enter(&igb->gen_lock);
1459
1460 ASSERT(igb->igb_state & IGB_STARTED);
1461 atomic_and_32(&igb->igb_state, ~IGB_STARTED);
1462
1463 /*
1464 * Disable the adapter interrupts to stop any rx/tx activities
1465 * before draining pending data and resetting hardware.
1466 */
1467 igb_disable_adapter_interrupts(igb);
1468
1469 /*
1470 * Drain the pending transmit packets
1471 */
1472 (void) igb_tx_drain(igb);
1473
1474 for (i = 0; i < igb->num_rx_rings; i++)
1475 mutex_enter(&igb->rx_rings[i].rx_lock);
1476 for (i = 0; i < igb->num_tx_rings; i++)
1477 mutex_enter(&igb->tx_rings[i].tx_lock);
1478
1479 /*
1480 * Stop the adapter
1481 */
1482 igb_stop_adapter(igb);
1483
1484 /*
1485 * Clean the pending tx data/resources
1486 */
1487 igb_tx_clean(igb);
1488
1489 /*
1490 * Start the adapter
1491 */
1492 if (igb_init_adapter(igb) != IGB_SUCCESS) {
1493 igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1494 goto reset_failure;
1495 }
1496
1497 /*
1498 * Setup the rx/tx rings
1499 */
1500 igb->tx_ring_init = B_FALSE;
1501 igb_setup_rings(igb);
1502
1503 atomic_and_32(&igb->igb_state, ~(IGB_ERROR | IGB_STALL));
1504
1505 /*
1506 * Enable adapter interrupts
1507 * The interrupts must be enabled after the driver state is START
1508 */
1509 igb->capab->enable_intr(igb);
1510
1511 if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
1512 goto reset_failure;
1513
1514 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1515 goto reset_failure;
1516
1517 for (i = igb->num_tx_rings - 1; i >= 0; i--)
1518 mutex_exit(&igb->tx_rings[i].tx_lock);
1519 for (i = igb->num_rx_rings - 1; i >= 0; i--)
1520 mutex_exit(&igb->rx_rings[i].rx_lock);
1521
1522 atomic_or_32(&igb->igb_state, IGB_STARTED);
1523
1524 mutex_exit(&igb->gen_lock);
1525
1526 return (IGB_SUCCESS);
1527
1528 reset_failure:
1529 for (i = igb->num_tx_rings - 1; i >= 0; i--)
1530 mutex_exit(&igb->tx_rings[i].tx_lock);
1531 for (i = igb->num_rx_rings - 1; i >= 0; i--)
1532 mutex_exit(&igb->rx_rings[i].rx_lock);
1533
1534 mutex_exit(&igb->gen_lock);
1535
1536 ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1537
1538 return (IGB_FAILURE);
1539 }
1540
1541 /*
1542 * igb_tx_clean - Clean the pending transmit packets and DMA resources
1543 */
1544 static void
1545 igb_tx_clean(igb_t *igb)
1546 {
1547 igb_tx_ring_t *tx_ring;
1548 tx_control_block_t *tcb;
1549 link_list_t pending_list;
1550 uint32_t desc_num;
1551 int i, j;
1552
1553 LINK_LIST_INIT(&pending_list);
1554
1555 for (i = 0; i < igb->num_tx_rings; i++) {
1556 tx_ring = &igb->tx_rings[i];
1557
1558 mutex_enter(&tx_ring->recycle_lock);
1559
1560 /*
1561 * Clean the pending tx data - the pending packets in the
1562 * work_list that have no chances to be transmitted again.
1563 *
1564 * We must ensure the chipset is stopped or the link is down
1565 * before cleaning the transmit packets.
1566 */
1567 desc_num = 0;
1568 for (j = 0; j < tx_ring->ring_size; j++) {
1569 tcb = tx_ring->work_list[j];
1570 if (tcb != NULL) {
1571 desc_num += tcb->desc_num;
1572
1573 tx_ring->work_list[j] = NULL;
1574
1575 igb_free_tcb(tcb);
1576
1577 LIST_PUSH_TAIL(&pending_list, &tcb->link);
1578 }
1579 }
1580
1581 if (desc_num > 0) {
1582 atomic_add_32(&tx_ring->tbd_free, desc_num);
1583 ASSERT(tx_ring->tbd_free == tx_ring->ring_size);
1584
1585 /*
1586 * Reset the head and tail pointers of the tbd ring;
1587 * Reset the head write-back if it is enabled.
1588 */
1589 tx_ring->tbd_head = 0;
1590 tx_ring->tbd_tail = 0;
1591 if (igb->tx_head_wb_enable)
1592 *tx_ring->tbd_head_wb = 0;
1593
1594 E1000_WRITE_REG(&igb->hw, E1000_TDH(tx_ring->index), 0);
1595 E1000_WRITE_REG(&igb->hw, E1000_TDT(tx_ring->index), 0);
1596 }
1597
1598 mutex_exit(&tx_ring->recycle_lock);
1599
1600 /*
1601 * Add the tx control blocks in the pending list to
1602 * the free list.
1603 */
1604 igb_put_free_list(tx_ring, &pending_list);
1605 }
1606 }
1607
1608 /*
1609 * igb_tx_drain - Drain the tx rings to allow pending packets to be transmitted
1610 */
1611 static boolean_t
1612 igb_tx_drain(igb_t *igb)
1613 {
1614 igb_tx_ring_t *tx_ring;
1615 boolean_t done;
1616 int i, j;
1617
1618 /*
1619 * Wait for a specific time to allow pending tx packets
1620 * to be transmitted.
1621 *
1622 * Check the counter tbd_free to see if transmission is done.
1623 * No lock protection is needed here.
1624 *
1625 * Return B_TRUE if all pending packets have been transmitted;
1626 * Otherwise return B_FALSE;
1627 */
1628 for (i = 0; i < TX_DRAIN_TIME; i++) {
1629
1630 done = B_TRUE;
1631 for (j = 0; j < igb->num_tx_rings; j++) {
1632 tx_ring = &igb->tx_rings[j];
1633 done = done &&
1634 (tx_ring->tbd_free == tx_ring->ring_size);
1635 }
1636
1637 if (done)
1638 break;
1639
1640 msec_delay(1);
1641 }
1642
1643 return (done);
1644 }
1645
1646 /*
1647 * igb_rx_drain - Wait for all rx buffers to be released by upper layer
1648 */
1649 static boolean_t
1650 igb_rx_drain(igb_t *igb)
1651 {
1652 boolean_t done;
1653 int i;
1654
1655 /*
1656 * Polling the rx free list to check if those rx buffers held by
1657 * the upper layer are released.
1658 *
1659 * Check the counter rcb_free to see if all pending buffers are
1660 * released. No lock protection is needed here.
1661 *
1662 * Return B_TRUE if all pending buffers have been released;
1663 * Otherwise return B_FALSE;
1664 */
1665 for (i = 0; i < RX_DRAIN_TIME; i++) {
1666 done = (igb->rcb_pending == 0);
1667
1668 if (done)
1669 break;
1670
1671 msec_delay(1);
1672 }
1673
1674 return (done);
1675 }
1676
1677 /*
1678 * igb_start - Start the driver/chipset
1679 */
1680 int
1681 igb_start(igb_t *igb, boolean_t alloc_buffer)
1682 {
1683 int i;
1684
1685 ASSERT(mutex_owned(&igb->gen_lock));
1686
1687 if (alloc_buffer) {
1688 if (igb_alloc_rx_data(igb) != IGB_SUCCESS) {
1689 igb_error(igb,
1690 "Failed to allocate software receive rings");
1691 return (IGB_FAILURE);
1692 }
1693
1694 /* Allocate buffers for all the rx/tx rings */
1695 if (igb_alloc_dma(igb) != IGB_SUCCESS) {
1696 igb_error(igb, "Failed to allocate DMA resource");
1697 return (IGB_FAILURE);
1698 }
1699
1700 igb->tx_ring_init = B_TRUE;
1701 } else {
1702 igb->tx_ring_init = B_FALSE;
1703 }
1704
1705 for (i = 0; i < igb->num_rx_rings; i++)
1706 mutex_enter(&igb->rx_rings[i].rx_lock);
1707 for (i = 0; i < igb->num_tx_rings; i++)
1708 mutex_enter(&igb->tx_rings[i].tx_lock);
1709
1710 /*
1711 * Start the adapter
1712 */
1713 if ((igb->attach_progress & ATTACH_PROGRESS_INIT_ADAPTER) == 0) {
1714 if (igb_init_adapter(igb) != IGB_SUCCESS) {
1715 igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1716 goto start_failure;
1717 }
1718 igb->attach_progress |= ATTACH_PROGRESS_INIT_ADAPTER;
1719 }
1720
1721 /*
1722 * Setup the rx/tx rings
1723 */
1724 igb_setup_rings(igb);
1725
1726 /*
1727 * Enable adapter interrupts
1728 * The interrupts must be enabled after the driver state is START
1729 */
1730 igb->capab->enable_intr(igb);
1731
1732 if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
1733 goto start_failure;
1734
1735 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1736 goto start_failure;
1737
1738 if (igb->hw.mac.type == e1000_i350)
1739 (void) e1000_set_eee_i350(&igb->hw);
1740
1741 for (i = igb->num_tx_rings - 1; i >= 0; i--)
1742 mutex_exit(&igb->tx_rings[i].tx_lock);
1743 for (i = igb->num_rx_rings - 1; i >= 0; i--)
1744 mutex_exit(&igb->rx_rings[i].rx_lock);
1745
1746 return (IGB_SUCCESS);
1747
1748 start_failure:
1749 for (i = igb->num_tx_rings - 1; i >= 0; i--)
1750 mutex_exit(&igb->tx_rings[i].tx_lock);
1751 for (i = igb->num_rx_rings - 1; i >= 0; i--)
1752 mutex_exit(&igb->rx_rings[i].rx_lock);
1753
1754 ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1755
1756 return (IGB_FAILURE);
1757 }
1758
1759 /*
1760 * igb_stop - Stop the driver/chipset
1761 */
1762 void
1763 igb_stop(igb_t *igb, boolean_t free_buffer)
1764 {
1765 int i;
1766
1767 ASSERT(mutex_owned(&igb->gen_lock));
1768
1769 igb->attach_progress &= ~ATTACH_PROGRESS_INIT_ADAPTER;
1770
1771 /*
1772 * Disable the adapter interrupts
1773 */
1774 igb_disable_adapter_interrupts(igb);
1775
1776 /*
1777 * Drain the pending tx packets
1778 */
1779 (void) igb_tx_drain(igb);
1780
1781 for (i = 0; i < igb->num_rx_rings; i++)
1782 mutex_enter(&igb->rx_rings[i].rx_lock);
1783 for (i = 0; i < igb->num_tx_rings; i++)
1784 mutex_enter(&igb->tx_rings[i].tx_lock);
1785
1786 /*
1787 * Stop the adapter
1788 */
1789 igb_stop_adapter(igb);
1790
1791 /*
1792 * Clean the pending tx data/resources
1793 */
1794 igb_tx_clean(igb);
1795
1796 for (i = igb->num_tx_rings - 1; i >= 0; i--)
1797 mutex_exit(&igb->tx_rings[i].tx_lock);
1798 for (i = igb->num_rx_rings - 1; i >= 0; i--)
1799 mutex_exit(&igb->rx_rings[i].rx_lock);
1800
1801 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1802 ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1803
1804 if (igb->link_state == LINK_STATE_UP) {
1805 igb->link_state = LINK_STATE_UNKNOWN;
1806 mac_link_update(igb->mac_hdl, igb->link_state);
1807 }
1808
1809 if (free_buffer) {
1810 /*
1811 * Release the DMA/memory resources of rx/tx rings
1812 */
1813 igb_free_dma(igb);
1814 igb_free_rx_data(igb);
1815 }
1816 }
1817
1818 /*
1819 * igb_alloc_rings - Allocate memory space for rx/tx rings
1820 */
1821 static int
1822 igb_alloc_rings(igb_t *igb)
1823 {
1824 /*
1825 * Allocate memory space for rx rings
1826 */
1827 igb->rx_rings = kmem_zalloc(
1828 sizeof (igb_rx_ring_t) * igb->num_rx_rings,
1829 KM_NOSLEEP);
1830
1831 if (igb->rx_rings == NULL) {
1832 return (IGB_FAILURE);
1833 }
1834
1835 /*
1836 * Allocate memory space for tx rings
1837 */
1838 igb->tx_rings = kmem_zalloc(
1839 sizeof (igb_tx_ring_t) * igb->num_tx_rings,
1840 KM_NOSLEEP);
1841
1842 if (igb->tx_rings == NULL) {
1843 kmem_free(igb->rx_rings,
1844 sizeof (igb_rx_ring_t) * igb->num_rx_rings);
1845 igb->rx_rings = NULL;
1846 return (IGB_FAILURE);
1847 }
1848
1849 /*
1850 * Allocate memory space for rx ring groups
1851 */
1852 igb->rx_groups = kmem_zalloc(
1853 sizeof (igb_rx_group_t) * igb->num_rx_groups,
1854 KM_NOSLEEP);
1855
1856 if (igb->rx_groups == NULL) {
1857 kmem_free(igb->rx_rings,
1858 sizeof (igb_rx_ring_t) * igb->num_rx_rings);
1859 kmem_free(igb->tx_rings,
1860 sizeof (igb_tx_ring_t) * igb->num_tx_rings);
1861 igb->rx_rings = NULL;
1862 igb->tx_rings = NULL;
1863 return (IGB_FAILURE);
1864 }
1865
1866 return (IGB_SUCCESS);
1867 }
1868
1869 /*
1870 * igb_free_rings - Free the memory space of rx/tx rings.
1871 */
1872 static void
1873 igb_free_rings(igb_t *igb)
1874 {
1875 if (igb->rx_rings != NULL) {
1876 kmem_free(igb->rx_rings,
1877 sizeof (igb_rx_ring_t) * igb->num_rx_rings);
1878 igb->rx_rings = NULL;
1879 }
1880
1881 if (igb->tx_rings != NULL) {
1882 kmem_free(igb->tx_rings,
1883 sizeof (igb_tx_ring_t) * igb->num_tx_rings);
1884 igb->tx_rings = NULL;
1885 }
1886
1887 if (igb->rx_groups != NULL) {
1888 kmem_free(igb->rx_groups,
1889 sizeof (igb_rx_group_t) * igb->num_rx_groups);
1890 igb->rx_groups = NULL;
1891 }
1892 }
1893
1894 static int
1895 igb_alloc_rx_data(igb_t *igb)
1896 {
1897 igb_rx_ring_t *rx_ring;
1898 int i;
1899
1900 for (i = 0; i < igb->num_rx_rings; i++) {
1901 rx_ring = &igb->rx_rings[i];
1902 if (igb_alloc_rx_ring_data(rx_ring) != IGB_SUCCESS)
1903 goto alloc_rx_rings_failure;
1904 }
1905 return (IGB_SUCCESS);
1906
1907 alloc_rx_rings_failure:
1908 igb_free_rx_data(igb);
1909 return (IGB_FAILURE);
1910 }
1911
1912 static void
1913 igb_free_rx_data(igb_t *igb)
1914 {
1915 igb_rx_ring_t *rx_ring;
1916 igb_rx_data_t *rx_data;
1917 int i;
1918
1919 for (i = 0; i < igb->num_rx_rings; i++) {
1920 rx_ring = &igb->rx_rings[i];
1921
1922 mutex_enter(&igb->rx_pending_lock);
1923 rx_data = rx_ring->rx_data;
1924
1925 if (rx_data != NULL) {
1926 rx_data->flag |= IGB_RX_STOPPED;
1927
1928 if (rx_data->rcb_pending == 0) {
1929 igb_free_rx_ring_data(rx_data);
1930 rx_ring->rx_data = NULL;
1931 }
1932 }
1933
1934 mutex_exit(&igb->rx_pending_lock);
1935 }
1936 }
1937
1938 /*
1939 * igb_setup_rings - Setup rx/tx rings
1940 */
1941 static void
1942 igb_setup_rings(igb_t *igb)
1943 {
1944 /*
1945 * Setup the rx/tx rings, including the following:
1946 *
1947 * 1. Setup the descriptor ring and the control block buffers;
1948 * 2. Initialize necessary registers for receive/transmit;
1949 * 3. Initialize software pointers/parameters for receive/transmit;
1950 */
1951 igb_setup_rx(igb);
1952
1953 igb_setup_tx(igb);
1954 }
1955
1956 static void
1957 igb_setup_rx_ring(igb_rx_ring_t *rx_ring)
1958 {
1959 igb_t *igb = rx_ring->igb;
1960 igb_rx_data_t *rx_data = rx_ring->rx_data;
1961 struct e1000_hw *hw = &igb->hw;
1962 rx_control_block_t *rcb;
1963 union e1000_adv_rx_desc *rbd;
1964 uint32_t size;
1965 uint32_t buf_low;
1966 uint32_t buf_high;
1967 uint32_t rxdctl;
1968 int i;
1969
1970 ASSERT(mutex_owned(&rx_ring->rx_lock));
1971 ASSERT(mutex_owned(&igb->gen_lock));
1972
1973 /*
1974 * Initialize descriptor ring with buffer addresses
1975 */
1976 for (i = 0; i < igb->rx_ring_size; i++) {
1977 rcb = rx_data->work_list[i];
1978 rbd = &rx_data->rbd_ring[i];
1979
1980 rbd->read.pkt_addr = rcb->rx_buf.dma_address;
1981 rbd->read.hdr_addr = NULL;
1982 }
1983
1984 /*
1985 * Initialize the base address registers
1986 */
1987 buf_low = (uint32_t)rx_data->rbd_area.dma_address;
1988 buf_high = (uint32_t)(rx_data->rbd_area.dma_address >> 32);
1989 E1000_WRITE_REG(hw, E1000_RDBAH(rx_ring->index), buf_high);
1990 E1000_WRITE_REG(hw, E1000_RDBAL(rx_ring->index), buf_low);
1991
1992 /*
1993 * Initialize the length register
1994 */
1995 size = rx_data->ring_size * sizeof (union e1000_adv_rx_desc);
1996 E1000_WRITE_REG(hw, E1000_RDLEN(rx_ring->index), size);
1997
1998 /*
1999 * Initialize buffer size & descriptor type
2000 */
2001 E1000_WRITE_REG(hw, E1000_SRRCTL(rx_ring->index),
2002 ((igb->rx_buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) |
2003 E1000_SRRCTL_DESCTYPE_ADV_ONEBUF));
2004
2005 /*
2006 * Setup the Receive Descriptor Control Register (RXDCTL)
2007 */
2008 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rx_ring->index));
2009 rxdctl &= igb->capab->rxdctl_mask;
2010 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2011 rxdctl |= 16; /* pthresh */
2012 rxdctl |= 8 << 8; /* hthresh */
2013 rxdctl |= 1 << 16; /* wthresh */
2014 E1000_WRITE_REG(hw, E1000_RXDCTL(rx_ring->index), rxdctl);
2015
2016 rx_data->rbd_next = 0;
2017 }
2018
2019 static void
2020 igb_setup_rx(igb_t *igb)
2021 {
2022 igb_rx_ring_t *rx_ring;
2023 igb_rx_data_t *rx_data;
2024 igb_rx_group_t *rx_group;
2025 struct e1000_hw *hw = &igb->hw;
2026 uint32_t rctl, rxcsum;
2027 uint32_t ring_per_group;
2028 int i;
2029
2030 /*
2031 * Setup the Receive Control Register (RCTL), and enable the
2032 * receiver. The initial configuration is to: enable the receiver,
2033 * accept broadcasts, discard bad packets, accept long packets,
2034 * disable VLAN filter checking, and set receive buffer size to
2035 * 2k. For 82575, also set the receive descriptor minimum
2036 * threshold size to 1/2 the ring.
2037 */
2038 rctl = E1000_READ_REG(hw, E1000_RCTL);
2039
2040 /*
2041 * Clear the field used for wakeup control. This driver doesn't do
2042 * wakeup but leave this here for completeness.
2043 */
2044 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2045 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2046
2047 rctl |= (E1000_RCTL_EN | /* Enable Receive Unit */
2048 E1000_RCTL_BAM | /* Accept Broadcast Packets */
2049 E1000_RCTL_LPE | /* Large Packet Enable */
2050 /* Multicast filter offset */
2051 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
2052 E1000_RCTL_RDMTS_HALF | /* rx descriptor threshold */
2053 E1000_RCTL_SECRC); /* Strip Ethernet CRC */
2054
2055 for (i = 0; i < igb->num_rx_groups; i++) {
2056 rx_group = &igb->rx_groups[i];
2057 rx_group->index = i;
2058 rx_group->igb = igb;
2059 }
2060
2061 /*
2062 * Set up all rx descriptor rings - must be called before receive unit
2063 * enabled.
2064 */
2065 ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2066 for (i = 0; i < igb->num_rx_rings; i++) {
2067 rx_ring = &igb->rx_rings[i];
2068 igb_setup_rx_ring(rx_ring);
2069
2070 /*
2071 * Map a ring to a group by assigning a group index
2072 */
2073 rx_ring->group_index = i / ring_per_group;
2074 }
2075
2076 /*
2077 * Setup the Rx Long Packet Max Length register
2078 */
2079 E1000_WRITE_REG(hw, E1000_RLPML, igb->max_frame_size);
2080
2081 /*
2082 * Hardware checksum settings
2083 */
2084 if (igb->rx_hcksum_enable) {
2085 rxcsum =
2086 E1000_RXCSUM_TUOFL | /* TCP/UDP checksum */
2087 E1000_RXCSUM_IPOFL; /* IP checksum */
2088
2089 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2090 }
2091
2092 /*
2093 * Setup classify and RSS for multiple receive queues
2094 */
2095 switch (igb->vmdq_mode) {
2096 case E1000_VMDQ_OFF:
2097 /*
2098 * One ring group, only RSS is needed when more than
2099 * one ring enabled.
2100 */
2101 if (igb->num_rx_rings > 1)
2102 igb_setup_rss(igb);
2103 break;
2104 case E1000_VMDQ_MAC:
2105 /*
2106 * Multiple groups, each group has one ring,
2107 * only the MAC classification is needed.
2108 */
2109 igb_setup_mac_classify(igb);
2110 break;
2111 case E1000_VMDQ_MAC_RSS:
2112 /*
2113 * Multiple groups and multiple rings, both
2114 * MAC classification and RSS are needed.
2115 */
2116 igb_setup_mac_rss_classify(igb);
2117 break;
2118 }
2119
2120 /*
2121 * Enable the receive unit - must be done after all
2122 * the rx setup above.
2123 */
2124 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2125
2126 /*
2127 * Initialize all adapter ring head & tail pointers - must
2128 * be done after receive unit is enabled
2129 */
2130 for (i = 0; i < igb->num_rx_rings; i++) {
2131 rx_ring = &igb->rx_rings[i];
2132 rx_data = rx_ring->rx_data;
2133 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2134 E1000_WRITE_REG(hw, E1000_RDT(i), rx_data->ring_size - 1);
2135 }
2136
2137 /*
2138 * 82575 with manageability enabled needs a special flush to make
2139 * sure the fifos start clean.
2140 */
2141 if ((hw->mac.type == e1000_82575) &&
2142 (E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN)) {
2143 e1000_rx_fifo_flush_82575(hw);
2144 }
2145 }
2146
2147 static void
2148 igb_setup_tx_ring(igb_tx_ring_t *tx_ring)
2149 {
2150 igb_t *igb = tx_ring->igb;
2151 struct e1000_hw *hw = &igb->hw;
2152 uint32_t size;
2153 uint32_t buf_low;
2154 uint32_t buf_high;
2155 uint32_t reg_val;
2156
2157 ASSERT(mutex_owned(&tx_ring->tx_lock));
2158 ASSERT(mutex_owned(&igb->gen_lock));
2159
2160
2161 /*
2162 * Initialize the length register
2163 */
2164 size = tx_ring->ring_size * sizeof (union e1000_adv_tx_desc);
2165 E1000_WRITE_REG(hw, E1000_TDLEN(tx_ring->index), size);
2166
2167 /*
2168 * Initialize the base address registers
2169 */
2170 buf_low = (uint32_t)tx_ring->tbd_area.dma_address;
2171 buf_high = (uint32_t)(tx_ring->tbd_area.dma_address >> 32);
2172 E1000_WRITE_REG(hw, E1000_TDBAL(tx_ring->index), buf_low);
2173 E1000_WRITE_REG(hw, E1000_TDBAH(tx_ring->index), buf_high);
2174
2175 /*
2176 * Setup head & tail pointers
2177 */
2178 E1000_WRITE_REG(hw, E1000_TDH(tx_ring->index), 0);
2179 E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), 0);
2180
2181 /*
2182 * Setup head write-back
2183 */
2184 if (igb->tx_head_wb_enable) {
2185 /*
2186 * The memory of the head write-back is allocated using
2187 * the extra tbd beyond the tail of the tbd ring.
2188 */
2189 tx_ring->tbd_head_wb = (uint32_t *)
2190 ((uintptr_t)tx_ring->tbd_area.address + size);
2191 *tx_ring->tbd_head_wb = 0;
2192
2193 buf_low = (uint32_t)
2194 (tx_ring->tbd_area.dma_address + size);
2195 buf_high = (uint32_t)
2196 ((tx_ring->tbd_area.dma_address + size) >> 32);
2197
2198 /* Set the head write-back enable bit */
2199 buf_low |= E1000_TX_HEAD_WB_ENABLE;
2200
2201 E1000_WRITE_REG(hw, E1000_TDWBAL(tx_ring->index), buf_low);
2202 E1000_WRITE_REG(hw, E1000_TDWBAH(tx_ring->index), buf_high);
2203
2204 /*
2205 * Turn off relaxed ordering for head write back or it will
2206 * cause problems with the tx recycling
2207 */
2208 reg_val = E1000_READ_REG(hw,
2209 E1000_DCA_TXCTRL(tx_ring->index));
2210 reg_val &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN;
2211 E1000_WRITE_REG(hw,
2212 E1000_DCA_TXCTRL(tx_ring->index), reg_val);
2213 } else {
2214 tx_ring->tbd_head_wb = NULL;
2215 }
2216
2217 tx_ring->tbd_head = 0;
2218 tx_ring->tbd_tail = 0;
2219 tx_ring->tbd_free = tx_ring->ring_size;
2220
2221 if (igb->tx_ring_init == B_TRUE) {
2222 tx_ring->tcb_head = 0;
2223 tx_ring->tcb_tail = 0;
2224 tx_ring->tcb_free = tx_ring->free_list_size;
2225 }
2226
2227 /*
2228 * Enable TXDCTL per queue
2229 */
2230 reg_val = E1000_READ_REG(hw, E1000_TXDCTL(tx_ring->index));
2231 reg_val |= E1000_TXDCTL_QUEUE_ENABLE;
2232 E1000_WRITE_REG(hw, E1000_TXDCTL(tx_ring->index), reg_val);
2233
2234 /*
2235 * Initialize hardware checksum offload settings
2236 */
2237 bzero(&tx_ring->tx_context, sizeof (tx_context_t));
2238 }
2239
2240 static void
2241 igb_setup_tx(igb_t *igb)
2242 {
2243 igb_tx_ring_t *tx_ring;
2244 struct e1000_hw *hw = &igb->hw;
2245 uint32_t reg_val;
2246 int i;
2247
2248 for (i = 0; i < igb->num_tx_rings; i++) {
2249 tx_ring = &igb->tx_rings[i];
2250 igb_setup_tx_ring(tx_ring);
2251 }
2252
2253 /*
2254 * Setup the Transmit Control Register (TCTL)
2255 */
2256 reg_val = E1000_READ_REG(hw, E1000_TCTL);
2257 reg_val &= ~E1000_TCTL_CT;
2258 reg_val |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2259 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2260
2261 /* Enable transmits */
2262 reg_val |= E1000_TCTL_EN;
2263
2264 E1000_WRITE_REG(hw, E1000_TCTL, reg_val);
2265 }
2266
2267 /*
2268 * igb_setup_rss - Setup receive-side scaling feature
2269 */
2270 static void
2271 igb_setup_rss(igb_t *igb)
2272 {
2273 struct e1000_hw *hw = &igb->hw;
2274 uint32_t i, mrqc, rxcsum;
2275 int shift = 0;
2276 uint32_t random;
2277 union e1000_reta {
2278 uint32_t dword;
2279 uint8_t bytes[4];
2280 } reta;
2281
2282 /* Setup the Redirection Table */
2283 if (hw->mac.type == e1000_82576) {
2284 shift = 3;
2285 } else if (hw->mac.type == e1000_82575) {
2286 shift = 6;
2287 }
2288 for (i = 0; i < (32 * 4); i++) {
2289 reta.bytes[i & 3] = (i % igb->num_rx_rings) << shift;
2290 if ((i & 3) == 3) {
2291 E1000_WRITE_REG(hw,
2292 (E1000_RETA(0) + (i & ~3)), reta.dword);
2293 }
2294 }
2295
2296 /* Fill out hash function seeds */
2297 for (i = 0; i < 10; i++) {
2298 (void) random_get_pseudo_bytes((uint8_t *)&random,
2299 sizeof (uint32_t));
2300 E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
2301 }
2302
2303 /* Setup the Multiple Receive Queue Control register */
2304 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2305 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2306 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2307 E1000_MRQC_RSS_FIELD_IPV6 |
2308 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2309 E1000_MRQC_RSS_FIELD_IPV4_UDP |
2310 E1000_MRQC_RSS_FIELD_IPV6_UDP |
2311 E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2312 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2313
2314 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2315
2316 /*
2317 * Disable Packet Checksum to enable RSS for multiple receive queues.
2318 *
2319 * The Packet Checksum is not ethernet CRC. It is another kind of
2320 * checksum offloading provided by the 82575 chipset besides the IP
2321 * header checksum offloading and the TCP/UDP checksum offloading.
2322 * The Packet Checksum is by default computed over the entire packet
2323 * from the first byte of the DA through the last byte of the CRC,
2324 * including the Ethernet and IP headers.
2325 *
2326 * It is a hardware limitation that Packet Checksum is mutually
2327 * exclusive with RSS.
2328 */
2329 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2330 rxcsum |= E1000_RXCSUM_PCSD;
2331 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2332 }
2333
2334 /*
2335 * igb_setup_mac_rss_classify - Setup MAC classification and rss
2336 */
2337 static void
2338 igb_setup_mac_rss_classify(igb_t *igb)
2339 {
2340 struct e1000_hw *hw = &igb->hw;
2341 uint32_t i, mrqc, vmdctl, rxcsum;
2342 uint32_t ring_per_group;
2343 int shift_group0, shift_group1;
2344 uint32_t random;
2345 union e1000_reta {
2346 uint32_t dword;
2347 uint8_t bytes[4];
2348 } reta;
2349
2350 ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2351
2352 /* Setup the Redirection Table, it is shared between two groups */
2353 shift_group0 = 2;
2354 shift_group1 = 6;
2355 for (i = 0; i < (32 * 4); i++) {
2356 reta.bytes[i & 3] = ((i % ring_per_group) << shift_group0) |
2357 ((ring_per_group + (i % ring_per_group)) << shift_group1);
2358 if ((i & 3) == 3) {
2359 E1000_WRITE_REG(hw,
2360 (E1000_RETA(0) + (i & ~3)), reta.dword);
2361 }
2362 }
2363
2364 /* Fill out hash function seeds */
2365 for (i = 0; i < 10; i++) {
2366 (void) random_get_pseudo_bytes((uint8_t *)&random,
2367 sizeof (uint32_t));
2368 E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
2369 }
2370
2371 /*
2372 * Setup the Multiple Receive Queue Control register,
2373 * enable VMDq based on packet destination MAC address and RSS.
2374 */
2375 mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_RSS_GROUP;
2376 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2377 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2378 E1000_MRQC_RSS_FIELD_IPV6 |
2379 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2380 E1000_MRQC_RSS_FIELD_IPV4_UDP |
2381 E1000_MRQC_RSS_FIELD_IPV6_UDP |
2382 E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2383 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2384
2385 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2386
2387
2388 /* Define the default group and default queues */
2389 vmdctl = E1000_VMDQ_MAC_GROUP_DEFAULT_QUEUE;
2390 E1000_WRITE_REG(hw, E1000_VT_CTL, vmdctl);
2391
2392 /*
2393 * Disable Packet Checksum to enable RSS for multiple receive queues.
2394 *
2395 * The Packet Checksum is not ethernet CRC. It is another kind of
2396 * checksum offloading provided by the 82575 chipset besides the IP
2397 * header checksum offloading and the TCP/UDP checksum offloading.
2398 * The Packet Checksum is by default computed over the entire packet
2399 * from the first byte of the DA through the last byte of the CRC,
2400 * including the Ethernet and IP headers.
2401 *
2402 * It is a hardware limitation that Packet Checksum is mutually
2403 * exclusive with RSS.
2404 */
2405 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2406 rxcsum |= E1000_RXCSUM_PCSD;
2407 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2408 }
2409
2410 /*
2411 * igb_setup_mac_classify - Setup MAC classification feature
2412 */
2413 static void
2414 igb_setup_mac_classify(igb_t *igb)
2415 {
2416 struct e1000_hw *hw = &igb->hw;
2417 uint32_t mrqc, rxcsum;
2418
2419 /*
2420 * Setup the Multiple Receive Queue Control register,
2421 * enable VMDq based on packet destination MAC address.
2422 */
2423 mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_GROUP;
2424 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2425
2426 /*
2427 * Disable Packet Checksum to enable RSS for multiple receive queues.
2428 *
2429 * The Packet Checksum is not ethernet CRC. It is another kind of
2430 * checksum offloading provided by the 82575 chipset besides the IP
2431 * header checksum offloading and the TCP/UDP checksum offloading.
2432 * The Packet Checksum is by default computed over the entire packet
2433 * from the first byte of the DA through the last byte of the CRC,
2434 * including the Ethernet and IP headers.
2435 *
2436 * It is a hardware limitation that Packet Checksum is mutually
2437 * exclusive with RSS.
2438 */
2439 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2440 rxcsum |= E1000_RXCSUM_PCSD;
2441 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2442
2443 }
2444
2445 /*
2446 * igb_init_unicst - Initialize the unicast addresses
2447 */
2448 static void
2449 igb_init_unicst(igb_t *igb)
2450 {
2451 struct e1000_hw *hw = &igb->hw;
2452 int slot;
2453
2454 /*
2455 * Here we should consider two situations:
2456 *
2457 * 1. Chipset is initialized the first time
2458 * Initialize the multiple unicast addresses, and
2459 * save the default MAC address.
2460 *
2461 * 2. Chipset is reset
2462 * Recover the multiple unicast addresses from the
2463 * software data structure to the RAR registers.
2464 */
2465
2466 /*
2467 * Clear the default MAC address in the RAR0 rgister,
2468 * which is loaded from EEPROM when system boot or chipreset,
2469 * this will cause the conficts with add_mac/rem_mac entry
2470 * points when VMDq is enabled. For this reason, the RAR0
2471 * must be cleared for both cases mentioned above.
2472 */
2473 e1000_rar_clear(hw, 0);
2474
2475 if (!igb->unicst_init) {
2476
2477 /* Initialize the multiple unicast addresses */
2478 igb->unicst_total = MAX_NUM_UNICAST_ADDRESSES;
2479 igb->unicst_avail = igb->unicst_total;
2480
2481 for (slot = 0; slot < igb->unicst_total; slot++)
2482 igb->unicst_addr[slot].mac.set = 0;
2483
2484 igb->unicst_init = B_TRUE;
2485 } else {
2486 /* Re-configure the RAR registers */
2487 for (slot = 0; slot < igb->unicst_total; slot++) {
2488 e1000_rar_set_vmdq(hw, igb->unicst_addr[slot].mac.addr,
2489 slot, igb->vmdq_mode,
2490 igb->unicst_addr[slot].mac.group_index);
2491 }
2492 }
2493 }
2494
2495 /*
2496 * igb_unicst_find - Find the slot for the specified unicast address
2497 */
2498 int
2499 igb_unicst_find(igb_t *igb, const uint8_t *mac_addr)
2500 {
2501 int slot;
2502
2503 ASSERT(mutex_owned(&igb->gen_lock));
2504
2505 for (slot = 0; slot < igb->unicst_total; slot++) {
2506 if (bcmp(igb->unicst_addr[slot].mac.addr,
2507 mac_addr, ETHERADDRL) == 0)
2508 return (slot);
2509 }
2510
2511 return (-1);
2512 }
2513
2514 /*
2515 * igb_unicst_set - Set the unicast address to the specified slot
2516 */
2517 int
2518 igb_unicst_set(igb_t *igb, const uint8_t *mac_addr,
2519 int slot)
2520 {
2521 struct e1000_hw *hw = &igb->hw;
2522
2523 ASSERT(mutex_owned(&igb->gen_lock));
2524
2525 /*
2526 * Save the unicast address in the software data structure
2527 */
2528 bcopy(mac_addr, igb->unicst_addr[slot].mac.addr, ETHERADDRL);
2529
2530 /*
2531 * Set the unicast address to the RAR register
2532 */
2533 e1000_rar_set(hw, (uint8_t *)mac_addr, slot);
2534
2535 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2536 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2537 return (EIO);
2538 }
2539
2540 return (0);
2541 }
2542
2543 /*
2544 * igb_multicst_add - Add a multicst address
2545 */
2546 int
2547 igb_multicst_add(igb_t *igb, const uint8_t *multiaddr)
2548 {
2549 struct ether_addr *new_table;
2550 size_t new_len;
2551 size_t old_len;
2552
2553 ASSERT(mutex_owned(&igb->gen_lock));
2554
2555 if ((multiaddr[0] & 01) == 0) {
2556 igb_error(igb, "Illegal multicast address");
2557 return (EINVAL);
2558 }
2559
2560 if (igb->mcast_count >= igb->mcast_max_num) {
2561 igb_error(igb, "Adapter requested more than %d mcast addresses",
2562 igb->mcast_max_num);
2563 return (ENOENT);
2564 }
2565
2566 if (igb->mcast_count == igb->mcast_alloc_count) {
2567 old_len = igb->mcast_alloc_count *
2568 sizeof (struct ether_addr);
2569 new_len = (igb->mcast_alloc_count + MCAST_ALLOC_COUNT) *
2570 sizeof (struct ether_addr);
2571
2572 new_table = kmem_alloc(new_len, KM_NOSLEEP);
2573 if (new_table == NULL) {
2574 igb_error(igb,
2575 "Not enough memory to alloc mcast table");
2576 return (ENOMEM);
2577 }
2578
2579 if (igb->mcast_table != NULL) {
2580 bcopy(igb->mcast_table, new_table, old_len);
2581 kmem_free(igb->mcast_table, old_len);
2582 }
2583 igb->mcast_alloc_count += MCAST_ALLOC_COUNT;
2584 igb->mcast_table = new_table;
2585 }
2586
2587 bcopy(multiaddr,
2588 &igb->mcast_table[igb->mcast_count], ETHERADDRL);
2589 igb->mcast_count++;
2590
2591 /*
2592 * Update the multicast table in the hardware
2593 */
2594 igb_setup_multicst(igb);
2595
2596 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2597 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2598 return (EIO);
2599 }
2600
2601 return (0);
2602 }
2603
2604 /*
2605 * igb_multicst_remove - Remove a multicst address
2606 */
2607 int
2608 igb_multicst_remove(igb_t *igb, const uint8_t *multiaddr)
2609 {
2610 struct ether_addr *new_table;
2611 size_t new_len;
2612 size_t old_len;
2613 int i;
2614
2615 ASSERT(mutex_owned(&igb->gen_lock));
2616
2617 for (i = 0; i < igb->mcast_count; i++) {
2618 if (bcmp(multiaddr, &igb->mcast_table[i],
2619 ETHERADDRL) == 0) {
2620 for (i++; i < igb->mcast_count; i++) {
2621 igb->mcast_table[i - 1] =
2622 igb->mcast_table[i];
2623 }
2624 igb->mcast_count--;
2625 break;
2626 }
2627 }
2628
2629 if ((igb->mcast_alloc_count - igb->mcast_count) >
2630 MCAST_ALLOC_COUNT) {
2631 old_len = igb->mcast_alloc_count *
2632 sizeof (struct ether_addr);
2633 new_len = (igb->mcast_alloc_count - MCAST_ALLOC_COUNT) *
2634 sizeof (struct ether_addr);
2635
2636 new_table = kmem_alloc(new_len, KM_NOSLEEP);
2637 if (new_table != NULL) {
2638 bcopy(igb->mcast_table, new_table, new_len);
2639 kmem_free(igb->mcast_table, old_len);
2640 igb->mcast_alloc_count -= MCAST_ALLOC_COUNT;
2641 igb->mcast_table = new_table;
2642 }
2643 }
2644
2645 /*
2646 * Update the multicast table in the hardware
2647 */
2648 igb_setup_multicst(igb);
2649
2650 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2651 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2652 return (EIO);
2653 }
2654
2655 return (0);
2656 }
2657
2658 static void
2659 igb_release_multicast(igb_t *igb)
2660 {
2661 if (igb->mcast_table != NULL) {
2662 kmem_free(igb->mcast_table,
2663 igb->mcast_alloc_count * sizeof (struct ether_addr));
2664 igb->mcast_table = NULL;
2665 }
2666 }
2667
2668 /*
2669 * igb_setup_multicast - setup multicast data structures
2670 *
2671 * This routine initializes all of the multicast related structures
2672 * and save them in the hardware registers.
2673 */
2674 static void
2675 igb_setup_multicst(igb_t *igb)
2676 {
2677 uint8_t *mc_addr_list;
2678 uint32_t mc_addr_count;
2679 struct e1000_hw *hw = &igb->hw;
2680
2681 ASSERT(mutex_owned(&igb->gen_lock));
2682 ASSERT(igb->mcast_count <= igb->mcast_max_num);
2683
2684 mc_addr_list = (uint8_t *)igb->mcast_table;
2685 mc_addr_count = igb->mcast_count;
2686
2687 /*
2688 * Update the multicase addresses to the MTA registers
2689 */
2690 e1000_update_mc_addr_list(hw, mc_addr_list, mc_addr_count);
2691 }
2692
2693 /*
2694 * igb_get_conf - Get driver configurations set in driver.conf
2695 *
2696 * This routine gets user-configured values out of the configuration
2697 * file igb.conf.
2698 *
2699 * For each configurable value, there is a minimum, a maximum, and a
2700 * default.
2701 * If user does not configure a value, use the default.
2702 * If user configures below the minimum, use the minumum.
2703 * If user configures above the maximum, use the maxumum.
2704 */
2705 static void
2706 igb_get_conf(igb_t *igb)
2707 {
2708 struct e1000_hw *hw = &igb->hw;
2709 uint32_t default_mtu;
2710 uint32_t flow_control;
2711 uint32_t ring_per_group;
2712 int i;
2713
2714 /*
2715 * igb driver supports the following user configurations:
2716 *
2717 * Link configurations:
2718 * adv_autoneg_cap
2719 * adv_1000fdx_cap
2720 * adv_100fdx_cap
2721 * adv_100hdx_cap
2722 * adv_10fdx_cap
2723 * adv_10hdx_cap
2724 * Note: 1000hdx is not supported.
2725 *
2726 * Jumbo frame configuration:
2727 * default_mtu
2728 *
2729 * Ethernet flow control configuration:
2730 * flow_control
2731 *
2732 * Multiple rings configurations:
2733 * tx_queue_number
2734 * tx_ring_size
2735 * rx_queue_number
2736 * rx_ring_size
2737 *
2738 * Call igb_get_prop() to get the value for a specific
2739 * configuration parameter.
2740 */
2741
2742 /*
2743 * Link configurations
2744 */
2745 igb->param_adv_autoneg_cap = igb_get_prop(igb,
2746 PROP_ADV_AUTONEG_CAP, 0, 1, 1);
2747 igb->param_adv_1000fdx_cap = igb_get_prop(igb,
2748 PROP_ADV_1000FDX_CAP, 0, 1, 1);
2749 igb->param_adv_100fdx_cap = igb_get_prop(igb,
2750 PROP_ADV_100FDX_CAP, 0, 1, 1);
2751 igb->param_adv_100hdx_cap = igb_get_prop(igb,
2752 PROP_ADV_100HDX_CAP, 0, 1, 1);
2753 igb->param_adv_10fdx_cap = igb_get_prop(igb,
2754 PROP_ADV_10FDX_CAP, 0, 1, 1);
2755 igb->param_adv_10hdx_cap = igb_get_prop(igb,
2756 PROP_ADV_10HDX_CAP, 0, 1, 1);
2757
2758 /*
2759 * Jumbo frame configurations
2760 */
2761 default_mtu = igb_get_prop(igb, PROP_DEFAULT_MTU,
2762 MIN_MTU, MAX_MTU, DEFAULT_MTU);
2763
2764 igb->max_frame_size = default_mtu +
2765 sizeof (struct ether_vlan_header) + ETHERFCSL;
2766
2767 /*
2768 * Ethernet flow control configuration
2769 */
2770 flow_control = igb_get_prop(igb, PROP_FLOW_CONTROL,
2771 e1000_fc_none, 4, e1000_fc_full);
2772 if (flow_control == 4)
2773 flow_control = e1000_fc_default;
2774
2775 hw->fc.requested_mode = flow_control;
2776
2777 /*
2778 * Multiple rings configurations
2779 */
2780 igb->tx_ring_size = igb_get_prop(igb, PROP_TX_RING_SIZE,
2781 MIN_TX_RING_SIZE, MAX_TX_RING_SIZE, DEFAULT_TX_RING_SIZE);
2782 igb->rx_ring_size = igb_get_prop(igb, PROP_RX_RING_SIZE,
2783 MIN_RX_RING_SIZE, MAX_RX_RING_SIZE, DEFAULT_RX_RING_SIZE);
2784
2785 igb->mr_enable = igb_get_prop(igb, PROP_MR_ENABLE, 0, 1, 0);
2786 igb->num_rx_groups = igb_get_prop(igb, PROP_RX_GROUP_NUM,
2787 MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM);
2788 /*
2789 * Currently we do not support VMDq for 82576 and 82580.
2790 * If it is e1000_82576, set num_rx_groups to 1.
2791 */
2792 if (hw->mac.type >= e1000_82576)
2793 igb->num_rx_groups = 1;
2794
2795 if (igb->mr_enable) {
2796 igb->num_tx_rings = igb->capab->def_tx_que_num;
2797 igb->num_rx_rings = igb->capab->def_rx_que_num;
2798 } else {
2799 igb->num_tx_rings = 1;
2800 igb->num_rx_rings = 1;
2801
2802 if (igb->num_rx_groups > 1) {
2803 igb_error(igb,
2804 "Invalid rx groups number. Please enable multiple "
2805 "rings first");
2806 igb->num_rx_groups = 1;
2807 }
2808 }
2809
2810 /*
2811 * Check the divisibility between rx rings and rx groups.
2812 */
2813 for (i = igb->num_rx_groups; i > 0; i--) {
2814 if ((igb->num_rx_rings % i) == 0)
2815 break;
2816 }
2817 if (i != igb->num_rx_groups) {
2818 igb_error(igb,
2819 "Invalid rx groups number. Downgrade the rx group "
2820 "number to %d.", i);
2821 igb->num_rx_groups = i;
2822 }
2823
2824 /*
2825 * Get the ring number per group.
2826 */
2827 ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2828
2829 if (igb->num_rx_groups == 1) {
2830 /*
2831 * One rx ring group, the rx ring number is num_rx_rings.
2832 */
2833 igb->vmdq_mode = E1000_VMDQ_OFF;
2834 } else if (ring_per_group == 1) {
2835 /*
2836 * Multiple rx groups, each group has one rx ring.
2837 */
2838 igb->vmdq_mode = E1000_VMDQ_MAC;
2839 } else {
2840 /*
2841 * Multiple groups and multiple rings.
2842 */
2843 igb->vmdq_mode = E1000_VMDQ_MAC_RSS;
2844 }
2845
2846 /*
2847 * Tunable used to force an interrupt type. The only use is
2848 * for testing of the lesser interrupt types.
2849 * 0 = don't force interrupt type
2850 * 1 = force interrupt type MSIX
2851 * 2 = force interrupt type MSI
2852 * 3 = force interrupt type Legacy
2853 */
2854 igb->intr_force = igb_get_prop(igb, PROP_INTR_FORCE,
2855 IGB_INTR_NONE, IGB_INTR_LEGACY, IGB_INTR_NONE);
2856
2857 igb->tx_hcksum_enable = igb_get_prop(igb, PROP_TX_HCKSUM_ENABLE,
2858 0, 1, 1);
2859 igb->rx_hcksum_enable = igb_get_prop(igb, PROP_RX_HCKSUM_ENABLE,
2860 0, 1, 1);
2861 igb->lso_enable = igb_get_prop(igb, PROP_LSO_ENABLE,
2862 0, 1, 1);
2863 igb->tx_head_wb_enable = igb_get_prop(igb, PROP_TX_HEAD_WB_ENABLE,
2864 0, 1, 1);
2865
2866 /*
2867 * igb LSO needs the tx h/w checksum support.
2868 * Here LSO will be disabled if tx h/w checksum has been disabled.
2869 */
2870 if (igb->tx_hcksum_enable == B_FALSE)
2871 igb->lso_enable = B_FALSE;
2872
2873 igb->tx_copy_thresh = igb_get_prop(igb, PROP_TX_COPY_THRESHOLD,
2874 MIN_TX_COPY_THRESHOLD, MAX_TX_COPY_THRESHOLD,
2875 DEFAULT_TX_COPY_THRESHOLD);
2876 igb->tx_recycle_thresh = igb_get_prop(igb, PROP_TX_RECYCLE_THRESHOLD,
2877 MIN_TX_RECYCLE_THRESHOLD, MAX_TX_RECYCLE_THRESHOLD,
2878 DEFAULT_TX_RECYCLE_THRESHOLD);
2879 igb->tx_overload_thresh = igb_get_prop(igb, PROP_TX_OVERLOAD_THRESHOLD,
2880 MIN_TX_OVERLOAD_THRESHOLD, MAX_TX_OVERLOAD_THRESHOLD,
2881 DEFAULT_TX_OVERLOAD_THRESHOLD);
2882 igb->tx_resched_thresh = igb_get_prop(igb, PROP_TX_RESCHED_THRESHOLD,
2883 MIN_TX_RESCHED_THRESHOLD,
2884 MIN(igb->tx_ring_size, MAX_TX_RESCHED_THRESHOLD),
2885 igb->tx_ring_size > DEFAULT_TX_RESCHED_THRESHOLD ?
2886 DEFAULT_TX_RESCHED_THRESHOLD : DEFAULT_TX_RESCHED_THRESHOLD_LOW);
2887
2888 igb->rx_copy_thresh = igb_get_prop(igb, PROP_RX_COPY_THRESHOLD,
2889 MIN_RX_COPY_THRESHOLD, MAX_RX_COPY_THRESHOLD,
2890 DEFAULT_RX_COPY_THRESHOLD);
2891 igb->rx_limit_per_intr = igb_get_prop(igb, PROP_RX_LIMIT_PER_INTR,
2892 MIN_RX_LIMIT_PER_INTR, MAX_RX_LIMIT_PER_INTR,
2893 DEFAULT_RX_LIMIT_PER_INTR);
2894
2895 igb->intr_throttling[0] = igb_get_prop(igb, PROP_INTR_THROTTLING,
2896 igb->capab->min_intr_throttle,
2897 igb->capab->max_intr_throttle,
2898 igb->capab->def_intr_throttle);
2899
2900 /*
2901 * Max number of multicast addresses
2902 */
2903 igb->mcast_max_num =
2904 igb_get_prop(igb, PROP_MCAST_MAX_NUM,
2905 MIN_MCAST_NUM, MAX_MCAST_NUM, DEFAULT_MCAST_NUM);
2906 }
2907
2908 /*
2909 * igb_get_prop - Get a property value out of the configuration file igb.conf
2910 *
2911 * Caller provides the name of the property, a default value, a minimum
2912 * value, and a maximum value.
2913 *
2914 * Return configured value of the property, with default, minimum and
2915 * maximum properly applied.
2916 */
2917 static int
2918 igb_get_prop(igb_t *igb,
2919 char *propname, /* name of the property */
2920 int minval, /* minimum acceptable value */
2921 int maxval, /* maximim acceptable value */
2922 int defval) /* default value */
2923 {
2924 int value;
2925
2926 /*
2927 * Call ddi_prop_get_int() to read the conf settings
2928 */
2929 value = ddi_prop_get_int(DDI_DEV_T_ANY, igb->dip,
2930 DDI_PROP_DONTPASS, propname, defval);
2931
2932 if (value > maxval)
2933 value = maxval;
2934
2935 if (value < minval)
2936 value = minval;
2937
2938 return (value);
2939 }
2940
2941 /*
2942 * igb_setup_link - Using the link properties to setup the link
2943 */
2944 int
2945 igb_setup_link(igb_t *igb, boolean_t setup_hw)
2946 {
2947 struct e1000_mac_info *mac;
2948 struct e1000_phy_info *phy;
2949 boolean_t invalid;
2950
2951 mac = &igb->hw.mac;
2952 phy = &igb->hw.phy;
2953 invalid = B_FALSE;
2954
2955 if (igb->param_adv_autoneg_cap == 1) {
2956 mac->autoneg = B_TRUE;
2957 phy->autoneg_advertised = 0;
2958
2959 /*
2960 * 1000hdx is not supported for autonegotiation
2961 */
2962 if (igb->param_adv_1000fdx_cap == 1)
2963 phy->autoneg_advertised |= ADVERTISE_1000_FULL;
2964
2965 if (igb->param_adv_100fdx_cap == 1)
2966 phy->autoneg_advertised |= ADVERTISE_100_FULL;
2967
2968 if (igb->param_adv_100hdx_cap == 1)
2969 phy->autoneg_advertised |= ADVERTISE_100_HALF;
2970
2971 if (igb->param_adv_10fdx_cap == 1)
2972 phy->autoneg_advertised |= ADVERTISE_10_FULL;
2973
2974 if (igb->param_adv_10hdx_cap == 1)
2975 phy->autoneg_advertised |= ADVERTISE_10_HALF;
2976
2977 if (phy->autoneg_advertised == 0)
2978 invalid = B_TRUE;
2979 } else {
2980 mac->autoneg = B_FALSE;
2981
2982 /*
2983 * 1000fdx and 1000hdx are not supported for forced link
2984 */
2985 if (igb->param_adv_100fdx_cap == 1)
2986 mac->forced_speed_duplex = ADVERTISE_100_FULL;
2987 else if (igb->param_adv_100hdx_cap == 1)
2988 mac->forced_speed_duplex = ADVERTISE_100_HALF;
2989 else if (igb->param_adv_10fdx_cap == 1)
2990 mac->forced_speed_duplex = ADVERTISE_10_FULL;
2991 else if (igb->param_adv_10hdx_cap == 1)
2992 mac->forced_speed_duplex = ADVERTISE_10_HALF;
2993 else
2994 invalid = B_TRUE;
2995 }
2996
2997 if (invalid) {
2998 igb_notice(igb, "Invalid link settings. Setup link to "
2999 "autonegotiation with full link capabilities.");
3000 mac->autoneg = B_TRUE;
3001 phy->autoneg_advertised = ADVERTISE_1000_FULL |
3002 ADVERTISE_100_FULL | ADVERTISE_100_HALF |
3003 ADVERTISE_10_FULL | ADVERTISE_10_HALF;
3004 }
3005
3006 if (setup_hw) {
3007 if (e1000_setup_link(&igb->hw) != E1000_SUCCESS)
3008 return (IGB_FAILURE);
3009 }
3010
3011 return (IGB_SUCCESS);
3012 }
3013
3014
3015 /*
3016 * igb_is_link_up - Check if the link is up
3017 */
3018 static boolean_t
3019 igb_is_link_up(igb_t *igb)
3020 {
3021 struct e1000_hw *hw = &igb->hw;
3022 boolean_t link_up = B_FALSE;
3023
3024 ASSERT(mutex_owned(&igb->gen_lock));
3025
3026 /*
3027 * get_link_status is set in the interrupt handler on link-status-change
3028 * or rx sequence error interrupt. get_link_status will stay
3029 * false until the e1000_check_for_link establishes link only
3030 * for copper adapters.
3031 */
3032 switch (hw->phy.media_type) {
3033 case e1000_media_type_copper:
3034 if (hw->mac.get_link_status) {
3035 (void) e1000_check_for_link(hw);
3036 link_up = !hw->mac.get_link_status;
3037 } else {
3038 link_up = B_TRUE;
3039 }
3040 break;
3041 case e1000_media_type_fiber:
3042 (void) e1000_check_for_link(hw);
3043 link_up = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU);
3044 break;
3045 case e1000_media_type_internal_serdes:
3046 (void) e1000_check_for_link(hw);
3047 link_up = hw->mac.serdes_has_link;
3048 break;
3049 }
3050
3051 return (link_up);
3052 }
3053
3054 /*
3055 * igb_link_check - Link status processing
3056 */
3057 static boolean_t
3058 igb_link_check(igb_t *igb)
3059 {
3060 struct e1000_hw *hw = &igb->hw;
3061 uint16_t speed = 0, duplex = 0;
3062 boolean_t link_changed = B_FALSE;
3063
3064 ASSERT(mutex_owned(&igb->gen_lock));
3065
3066 if (igb_is_link_up(igb)) {
3067 /*
3068 * The Link is up, check whether it was marked as down earlier
3069 */
3070 if (igb->link_state != LINK_STATE_UP) {
3071 (void) e1000_get_speed_and_duplex(hw, &speed, &duplex);
3072 igb->link_speed = speed;
3073 igb->link_duplex = duplex;
3074 igb->link_state = LINK_STATE_UP;
3075 link_changed = B_TRUE;
3076 if (!igb->link_complete)
3077 igb_stop_link_timer(igb);
3078 }
3079 } else if (igb->link_complete) {
3080 if (igb->link_state != LINK_STATE_DOWN) {
3081 igb->link_speed = 0;
3082 igb->link_duplex = 0;
3083 igb->link_state = LINK_STATE_DOWN;
3084 link_changed = B_TRUE;
3085 }
3086 }
3087
3088 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
3089 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
3090 return (B_FALSE);
3091 }
3092
3093 return (link_changed);
3094 }
3095
3096 /*
3097 * igb_local_timer - driver watchdog function
3098 *
3099 * This function will handle the hardware stall check, link status
3100 * check and other routines.
3101 */
3102 static void
3103 igb_local_timer(void *arg)
3104 {
3105 igb_t *igb = (igb_t *)arg;
3106 boolean_t link_changed = B_FALSE;
3107
3108 if (igb->igb_state & IGB_ERROR) {
3109 igb->reset_count++;
3110 if (igb_reset(igb) == IGB_SUCCESS)
3111 ddi_fm_service_impact(igb->dip, DDI_SERVICE_RESTORED);
3112
3113 igb_restart_watchdog_timer(igb);
3114 return;
3115 }
3116
3117 if (igb_stall_check(igb) || (igb->igb_state & IGB_STALL)) {
3118 igb_fm_ereport(igb, DDI_FM_DEVICE_STALL);
3119 ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
3120 igb->reset_count++;
3121 if (igb_reset(igb) == IGB_SUCCESS)
3122 ddi_fm_service_impact(igb->dip, DDI_SERVICE_RESTORED);
3123
3124 igb_restart_watchdog_timer(igb);
3125 return;
3126 }
3127
3128 mutex_enter(&igb->gen_lock);
3129 if (!(igb->igb_state & IGB_SUSPENDED) && (igb->igb_state & IGB_STARTED))
3130 link_changed = igb_link_check(igb);
3131 mutex_exit(&igb->gen_lock);
3132
3133 if (link_changed)
3134 mac_link_update(igb->mac_hdl, igb->link_state);
3135
3136 igb_restart_watchdog_timer(igb);
3137 }
3138
3139 /*
3140 * igb_link_timer - link setup timer function
3141 *
3142 * It is called when the timer for link setup is expired, which indicates
3143 * the completion of the link setup. The link state will not be updated
3144 * until the link setup is completed. And the link state will not be sent
3145 * to the upper layer through mac_link_update() in this function. It will
3146 * be updated in the local timer routine or the interrupts service routine
3147 * after the interface is started (plumbed).
3148 */
3149 static void
3150 igb_link_timer(void *arg)
3151 {
3152 igb_t *igb = (igb_t *)arg;
3153
3154 mutex_enter(&igb->link_lock);
3155 igb->link_complete = B_TRUE;
3156 igb->link_tid = 0;
3157 mutex_exit(&igb->link_lock);
3158 }
3159 /*
3160 * igb_stall_check - check for transmit stall
3161 *
3162 * This function checks if the adapter is stalled (in transmit).
3163 *
3164 * It is called each time the watchdog timeout is invoked.
3165 * If the transmit descriptor reclaim continuously fails,
3166 * the watchdog value will increment by 1. If the watchdog
3167 * value exceeds the threshold, the igb is assumed to
3168 * have stalled and need to be reset.
3169 */
3170 static boolean_t
3171 igb_stall_check(igb_t *igb)
3172 {
3173 igb_tx_ring_t *tx_ring;
3174 struct e1000_hw *hw = &igb->hw;
3175 boolean_t result;
3176 int i;
3177
3178 if (igb->link_state != LINK_STATE_UP)
3179 return (B_FALSE);
3180
3181 /*
3182 * If any tx ring is stalled, we'll reset the chipset
3183 */
3184 result = B_FALSE;
3185 for (i = 0; i < igb->num_tx_rings; i++) {
3186 tx_ring = &igb->tx_rings[i];
3187
3188 if (tx_ring->recycle_fail > 0)
3189 tx_ring->stall_watchdog++;
3190 else
3191 tx_ring->stall_watchdog = 0;
3192
3193 if (tx_ring->stall_watchdog >= STALL_WATCHDOG_TIMEOUT) {
3194 result = B_TRUE;
3195 if (hw->mac.type == e1000_82580) {
3196 hw->dev_spec._82575.global_device_reset
3197 = B_TRUE;
3198 }
3199 break;
3200 }
3201 }
3202
3203 if (result) {
3204 tx_ring->stall_watchdog = 0;
3205 tx_ring->recycle_fail = 0;
3206 }
3207
3208 return (result);
3209 }
3210
3211
3212 /*
3213 * is_valid_mac_addr - Check if the mac address is valid
3214 */
3215 static boolean_t
3216 is_valid_mac_addr(uint8_t *mac_addr)
3217 {
3218 const uint8_t addr_test1[6] = { 0, 0, 0, 0, 0, 0 };
3219 const uint8_t addr_test2[6] =
3220 { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
3221
3222 if (!(bcmp(addr_test1, mac_addr, ETHERADDRL)) ||
3223 !(bcmp(addr_test2, mac_addr, ETHERADDRL)))
3224 return (B_FALSE);
3225
3226 return (B_TRUE);
3227 }
3228
3229 static boolean_t
3230 igb_find_mac_address(igb_t *igb)
3231 {
3232 struct e1000_hw *hw = &igb->hw;
3233 #ifdef __sparc
3234 uchar_t *bytes;
3235 struct ether_addr sysaddr;
3236 uint_t nelts;
3237 int err;
3238 boolean_t found = B_FALSE;
3239
3240 /*
3241 * The "vendor's factory-set address" may already have
3242 * been extracted from the chip, but if the property
3243 * "local-mac-address" is set we use that instead.
3244 *
3245 * We check whether it looks like an array of 6
3246 * bytes (which it should, if OBP set it). If we can't
3247 * make sense of it this way, we'll ignore it.
3248 */
3249 err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
3250 DDI_PROP_DONTPASS, "local-mac-address", &bytes, &nelts);
3251 if (err == DDI_PROP_SUCCESS) {
3252 if (nelts == ETHERADDRL) {
3253 while (nelts--)
3254 hw->mac.addr[nelts] = bytes[nelts];
3255 found = B_TRUE;
3256 }
3257 ddi_prop_free(bytes);
3258 }
3259
3260 /*
3261 * Look up the OBP property "local-mac-address?". If the user has set
3262 * 'local-mac-address? = false', use "the system address" instead.
3263 */
3264 if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip, 0,
3265 "local-mac-address?", &bytes, &nelts) == DDI_PROP_SUCCESS) {
3266 if (strncmp("false", (caddr_t)bytes, (size_t)nelts) == 0) {
3267 if (localetheraddr(NULL, &sysaddr) != 0) {
3268 bcopy(&sysaddr, hw->mac.addr, ETHERADDRL);
3269 found = B_TRUE;
3270 }
3271 }
3272 ddi_prop_free(bytes);
3273 }
3274
3275 /*
3276 * Finally(!), if there's a valid "mac-address" property (created
3277 * if we netbooted from this interface), we must use this instead
3278 * of any of the above to ensure that the NFS/install server doesn't
3279 * get confused by the address changing as Solaris takes over!
3280 */
3281 err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
3282 DDI_PROP_DONTPASS, "mac-address", &bytes, &nelts);
3283 if (err == DDI_PROP_SUCCESS) {
3284 if (nelts == ETHERADDRL) {
3285 while (nelts--)
3286 hw->mac.addr[nelts] = bytes[nelts];
3287 found = B_TRUE;
3288 }
3289 ddi_prop_free(bytes);
3290 }
3291
3292 if (found) {
3293 bcopy(hw->mac.addr, hw->mac.perm_addr, ETHERADDRL);
3294 return (B_TRUE);
3295 }
3296 #endif
3297
3298 /*
3299 * Read the device MAC address from the EEPROM
3300 */
3301 if (e1000_read_mac_addr(hw) != E1000_SUCCESS)
3302 return (B_FALSE);
3303
3304 return (B_TRUE);
3305 }
3306
3307 #pragma inline(igb_arm_watchdog_timer)
3308
3309 static void
3310 igb_arm_watchdog_timer(igb_t *igb)
3311 {
3312 /*
3313 * Fire a watchdog timer
3314 */
3315 igb->watchdog_tid =
3316 timeout(igb_local_timer,
3317 (void *)igb, 1 * drv_usectohz(1000000));
3318
3319 }
3320
3321 /*
3322 * igb_enable_watchdog_timer - Enable and start the driver watchdog timer
3323 */
3324 void
3325 igb_enable_watchdog_timer(igb_t *igb)
3326 {
3327 mutex_enter(&igb->watchdog_lock);
3328
3329 if (!igb->watchdog_enable) {
3330 igb->watchdog_enable = B_TRUE;
3331 igb->watchdog_start = B_TRUE;
3332 igb_arm_watchdog_timer(igb);
3333 }
3334
3335 mutex_exit(&igb->watchdog_lock);
3336
3337 }
3338
3339 /*
3340 * igb_disable_watchdog_timer - Disable and stop the driver watchdog timer
3341 */
3342 void
3343 igb_disable_watchdog_timer(igb_t *igb)
3344 {
3345 timeout_id_t tid;
3346
3347 mutex_enter(&igb->watchdog_lock);
3348
3349 igb->watchdog_enable = B_FALSE;
3350 igb->watchdog_start = B_FALSE;
3351 tid = igb->watchdog_tid;
3352 igb->watchdog_tid = 0;
3353
3354 mutex_exit(&igb->watchdog_lock);
3355
3356 if (tid != 0)
3357 (void) untimeout(tid);
3358
3359 }
3360
3361 /*
3362 * igb_start_watchdog_timer - Start the driver watchdog timer
3363 */
3364 static void
3365 igb_start_watchdog_timer(igb_t *igb)
3366 {
3367 mutex_enter(&igb->watchdog_lock);
3368
3369 if (igb->watchdog_enable) {
3370 if (!igb->watchdog_start) {
3371 igb->watchdog_start = B_TRUE;
3372 igb_arm_watchdog_timer(igb);
3373 }
3374 }
3375
3376 mutex_exit(&igb->watchdog_lock);
3377 }
3378
3379 /*
3380 * igb_restart_watchdog_timer - Restart the driver watchdog timer
3381 */
3382 static void
3383 igb_restart_watchdog_timer(igb_t *igb)
3384 {
3385 mutex_enter(&igb->watchdog_lock);
3386
3387 if (igb->watchdog_start)
3388 igb_arm_watchdog_timer(igb);
3389
3390 mutex_exit(&igb->watchdog_lock);
3391 }
3392
3393 /*
3394 * igb_stop_watchdog_timer - Stop the driver watchdog timer
3395 */
3396 static void
3397 igb_stop_watchdog_timer(igb_t *igb)
3398 {
3399 timeout_id_t tid;
3400
3401 mutex_enter(&igb->watchdog_lock);
3402
3403 igb->watchdog_start = B_FALSE;
3404 tid = igb->watchdog_tid;
3405 igb->watchdog_tid = 0;
3406
3407 mutex_exit(&igb->watchdog_lock);
3408
3409 if (tid != 0)
3410 (void) untimeout(tid);
3411 }
3412
3413 /*
3414 * igb_start_link_timer - Start the link setup timer
3415 */
3416 static void
3417 igb_start_link_timer(struct igb *igb)
3418 {
3419 struct e1000_hw *hw = &igb->hw;
3420 clock_t link_timeout;
3421
3422 if (hw->mac.autoneg)
3423 link_timeout = PHY_AUTO_NEG_LIMIT *
3424 drv_usectohz(100000);
3425 else
3426 link_timeout = PHY_FORCE_LIMIT * drv_usectohz(100000);
3427
3428 mutex_enter(&igb->link_lock);
3429 if (hw->phy.autoneg_wait_to_complete) {
3430 igb->link_complete = B_TRUE;
3431 } else {
3432 igb->link_complete = B_FALSE;
3433 igb->link_tid = timeout(igb_link_timer, (void *)igb,
3434 link_timeout);
3435 }
3436 mutex_exit(&igb->link_lock);
3437 }
3438
3439 /*
3440 * igb_stop_link_timer - Stop the link setup timer
3441 */
3442 static void
3443 igb_stop_link_timer(struct igb *igb)
3444 {
3445 timeout_id_t tid;
3446
3447 mutex_enter(&igb->link_lock);
3448 igb->link_complete = B_TRUE;
3449 tid = igb->link_tid;
3450 igb->link_tid = 0;
3451 mutex_exit(&igb->link_lock);
3452
3453 if (tid != 0)
3454 (void) untimeout(tid);
3455 }
3456
3457 /*
3458 * igb_disable_adapter_interrupts - Clear/disable all hardware interrupts
3459 */
3460 static void
3461 igb_disable_adapter_interrupts(igb_t *igb)
3462 {
3463 struct e1000_hw *hw = &igb->hw;
3464
3465 /*
3466 * Set the IMC register to mask all the interrupts,
3467 * including the tx interrupts.
3468 */
3469 E1000_WRITE_REG(hw, E1000_IMC, ~0);
3470 E1000_WRITE_REG(hw, E1000_IAM, 0);
3471
3472 /*
3473 * Additional disabling for MSI-X
3474 */
3475 if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3476 E1000_WRITE_REG(hw, E1000_EIMC, ~0);
3477 E1000_WRITE_REG(hw, E1000_EIAC, 0);
3478 E1000_WRITE_REG(hw, E1000_EIAM, 0);
3479 }
3480
3481 E1000_WRITE_FLUSH(hw);
3482 }
3483
3484 /*
3485 * igb_enable_adapter_interrupts_82580 - Enable NIC interrupts for 82580
3486 */
3487 static void
3488 igb_enable_adapter_interrupts_82580(igb_t *igb)
3489 {
3490 struct e1000_hw *hw = &igb->hw;
3491
3492 /* Clear any pending interrupts */
3493 (void) E1000_READ_REG(hw, E1000_ICR);
3494 igb->ims_mask |= E1000_IMS_DRSTA;
3495
3496 if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3497
3498 /* Interrupt enabling for MSI-X */
3499 E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3500 E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3501 igb->ims_mask = (E1000_IMS_LSC | E1000_IMS_DRSTA);
3502 E1000_WRITE_REG(hw, E1000_IMS, igb->ims_mask);
3503 } else { /* Interrupt enabling for MSI and legacy */
3504 E1000_WRITE_REG(hw, E1000_IVAR0, E1000_IVAR_VALID);
3505 igb->ims_mask = IMS_ENABLE_MASK | E1000_IMS_TXQE;
3506 igb->ims_mask |= E1000_IMS_DRSTA;
3507 E1000_WRITE_REG(hw, E1000_IMS, igb->ims_mask);
3508 }
3509
3510 /* Disable auto-mask for ICR interrupt bits */
3511 E1000_WRITE_REG(hw, E1000_IAM, 0);
3512
3513 E1000_WRITE_FLUSH(hw);
3514 }
3515
3516 /*
3517 * igb_enable_adapter_interrupts_82576 - Enable NIC interrupts for 82576
3518 */
3519 static void
3520 igb_enable_adapter_interrupts_82576(igb_t *igb)
3521 {
3522 struct e1000_hw *hw = &igb->hw;
3523
3524 /* Clear any pending interrupts */
3525 (void) E1000_READ_REG(hw, E1000_ICR);
3526
3527 if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3528
3529 /* Interrupt enabling for MSI-X */
3530 E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3531 E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3532 igb->ims_mask = E1000_IMS_LSC;
3533 E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
3534 } else {
3535 /* Interrupt enabling for MSI and legacy */
3536 E1000_WRITE_REG(hw, E1000_IVAR0, E1000_IVAR_VALID);
3537 igb->ims_mask = IMS_ENABLE_MASK | E1000_IMS_TXQE;
3538 E1000_WRITE_REG(hw, E1000_IMS,
3539 (IMS_ENABLE_MASK | E1000_IMS_TXQE));
3540 }
3541
3542 /* Disable auto-mask for ICR interrupt bits */
3543 E1000_WRITE_REG(hw, E1000_IAM, 0);
3544
3545 E1000_WRITE_FLUSH(hw);
3546 }
3547
3548 /*
3549 * igb_enable_adapter_interrupts_82575 - Enable NIC interrupts for 82575
3550 */
3551 static void
3552 igb_enable_adapter_interrupts_82575(igb_t *igb)
3553 {
3554 struct e1000_hw *hw = &igb->hw;
3555 uint32_t reg;
3556
3557 /* Clear any pending interrupts */
3558 (void) E1000_READ_REG(hw, E1000_ICR);
3559
3560 if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3561 /* Interrupt enabling for MSI-X */
3562 E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3563 E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3564 igb->ims_mask = E1000_IMS_LSC;
3565 E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
3566
3567 /* Enable MSI-X PBA support */
3568 reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
3569 reg |= E1000_CTRL_EXT_PBA_CLR;
3570
3571 /* Non-selective interrupt clear-on-read */
3572 reg |= E1000_CTRL_EXT_IRCA; /* Called NSICR in the EAS */
3573
3574 E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
3575 } else {
3576 /* Interrupt enabling for MSI and legacy */
3577 igb->ims_mask = IMS_ENABLE_MASK;
3578 E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK);
3579 }
3580
3581 E1000_WRITE_FLUSH(hw);
3582 }
3583
3584 /*
3585 * Loopback Support
3586 */
3587 static lb_property_t lb_normal =
3588 { normal, "normal", IGB_LB_NONE };
3589 static lb_property_t lb_external =
3590 { external, "External", IGB_LB_EXTERNAL };
3591 static lb_property_t lb_phy =
3592 { internal, "PHY", IGB_LB_INTERNAL_PHY };
3593 static lb_property_t lb_serdes =
3594 { internal, "SerDes", IGB_LB_INTERNAL_SERDES };
3595
3596 enum ioc_reply
3597 igb_loopback_ioctl(igb_t *igb, struct iocblk *iocp, mblk_t *mp)
3598 {
3599 lb_info_sz_t *lbsp;
3600 lb_property_t *lbpp;
3601 struct e1000_hw *hw;
3602 uint32_t *lbmp;
3603 uint32_t size;
3604 uint32_t value;
3605
3606 hw = &igb->hw;
3607
3608 if (mp->b_cont == NULL)
3609 return (IOC_INVAL);
3610
3611 switch (iocp->ioc_cmd) {
3612 default:
3613 return (IOC_INVAL);
3614
3615 case LB_GET_INFO_SIZE:
3616 size = sizeof (lb_info_sz_t);
3617 if (iocp->ioc_count != size)
3618 return (IOC_INVAL);
3619
3620 value = sizeof (lb_normal);
3621 if (hw->phy.media_type == e1000_media_type_copper)
3622 value += sizeof (lb_phy);
3623 else
3624 value += sizeof (lb_serdes);
3625 value += sizeof (lb_external);
3626
3627 lbsp = (lb_info_sz_t *)(uintptr_t)mp->b_cont->b_rptr;
3628 *lbsp = value;
3629 break;
3630
3631 case LB_GET_INFO:
3632 value = sizeof (lb_normal);
3633 if (hw->phy.media_type == e1000_media_type_copper)
3634 value += sizeof (lb_phy);
3635 else
3636 value += sizeof (lb_serdes);
3637 value += sizeof (lb_external);
3638
3639 size = value;
3640 if (iocp->ioc_count != size)
3641 return (IOC_INVAL);
3642
3643 value = 0;
3644 lbpp = (lb_property_t *)(uintptr_t)mp->b_cont->b_rptr;
3645
3646 lbpp[value++] = lb_normal;
3647 if (hw->phy.media_type == e1000_media_type_copper)
3648 lbpp[value++] = lb_phy;
3649 else
3650 lbpp[value++] = lb_serdes;
3651 lbpp[value++] = lb_external;
3652 break;
3653
3654 case LB_GET_MODE:
3655 size = sizeof (uint32_t);
3656 if (iocp->ioc_count != size)
3657 return (IOC_INVAL);
3658
3659 lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
3660 *lbmp = igb->loopback_mode;
3661 break;
3662
3663 case LB_SET_MODE:
3664 size = 0;
3665 if (iocp->ioc_count != sizeof (uint32_t))
3666 return (IOC_INVAL);
3667
3668 lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
3669 if (!igb_set_loopback_mode(igb, *lbmp))
3670 return (IOC_INVAL);
3671 break;
3672 }
3673
3674 iocp->ioc_count = size;
3675 iocp->ioc_error = 0;
3676
3677 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
3678 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
3679 return (IOC_INVAL);
3680 }
3681
3682 return (IOC_REPLY);
3683 }
3684
3685 /*
3686 * igb_set_loopback_mode - Setup loopback based on the loopback mode
3687 */
3688 static boolean_t
3689 igb_set_loopback_mode(igb_t *igb, uint32_t mode)
3690 {
3691 struct e1000_hw *hw;
3692 int i;
3693
3694 if (mode == igb->loopback_mode)
3695 return (B_TRUE);
3696
3697 hw = &igb->hw;
3698
3699 igb->loopback_mode = mode;
3700
3701 if (mode == IGB_LB_NONE) {
3702 /* Reset the chip */
3703 hw->phy.autoneg_wait_to_complete = B_TRUE;
3704 (void) igb_reset(igb);
3705 hw->phy.autoneg_wait_to_complete = B_FALSE;
3706 return (B_TRUE);
3707 }
3708
3709 mutex_enter(&igb->gen_lock);
3710
3711 switch (mode) {
3712 default:
3713 mutex_exit(&igb->gen_lock);
3714 return (B_FALSE);
3715
3716 case IGB_LB_EXTERNAL:
3717 igb_set_external_loopback(igb);
3718 break;
3719
3720 case IGB_LB_INTERNAL_PHY:
3721 igb_set_internal_phy_loopback(igb);
3722 break;
3723
3724 case IGB_LB_INTERNAL_SERDES:
3725 igb_set_internal_serdes_loopback(igb);
3726 break;
3727 }
3728
3729 mutex_exit(&igb->gen_lock);
3730
3731 /*
3732 * When external loopback is set, wait up to 1000ms to get the link up.
3733 * According to test, 1000ms can work and it's an experimental value.
3734 */
3735 if (mode == IGB_LB_EXTERNAL) {
3736 for (i = 0; i <= 10; i++) {
3737 mutex_enter(&igb->gen_lock);
3738 (void) igb_link_check(igb);
3739 mutex_exit(&igb->gen_lock);
3740
3741 if (igb->link_state == LINK_STATE_UP)
3742 break;
3743
3744 msec_delay(100);
3745 }
3746
3747 if (igb->link_state != LINK_STATE_UP) {
3748 /*
3749 * Does not support external loopback.
3750 * Reset driver to loopback none.
3751 */
3752 igb->loopback_mode = IGB_LB_NONE;
3753
3754 /* Reset the chip */
3755 hw->phy.autoneg_wait_to_complete = B_TRUE;
3756 (void) igb_reset(igb);
3757 hw->phy.autoneg_wait_to_complete = B_FALSE;
3758
3759 IGB_DEBUGLOG_0(igb, "Set external loopback failed, "
3760 "reset to loopback none.");
3761
3762 return (B_FALSE);
3763 }
3764 }
3765
3766 return (B_TRUE);
3767 }
3768
3769 /*
3770 * igb_set_external_loopback - Set the external loopback mode
3771 */
3772 static void
3773 igb_set_external_loopback(igb_t *igb)
3774 {
3775 struct e1000_hw *hw;
3776 uint32_t ctrl_ext;
3777
3778 hw = &igb->hw;
3779
3780 /* Set link mode to PHY (00b) in the Extended Control register */
3781 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
3782 ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
3783 E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
3784
3785 (void) e1000_write_phy_reg(hw, 0x0, 0x0140);
3786 (void) e1000_write_phy_reg(hw, 0x9, 0x1a00);
3787 (void) e1000_write_phy_reg(hw, 0x12, 0x1610);
3788 (void) e1000_write_phy_reg(hw, 0x1f37, 0x3f1c);
3789 }
3790
3791 /*
3792 * igb_set_internal_phy_loopback - Set the internal PHY loopback mode
3793 */
3794 static void
3795 igb_set_internal_phy_loopback(igb_t *igb)
3796 {
3797 struct e1000_hw *hw;
3798 uint32_t ctrl_ext;
3799 uint16_t phy_ctrl;
3800 uint16_t phy_pconf;
3801
3802 hw = &igb->hw;
3803
3804 /* Set link mode to PHY (00b) in the Extended Control register */
3805 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
3806 ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
3807 E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
3808
3809 /*
3810 * Set PHY control register (0x4140):
3811 * Set full duplex mode
3812 * Set loopback bit
3813 * Clear auto-neg enable bit
3814 * Set PHY speed
3815 */
3816 phy_ctrl = MII_CR_FULL_DUPLEX | MII_CR_SPEED_1000 | MII_CR_LOOPBACK;
3817 (void) e1000_write_phy_reg(hw, PHY_CONTROL, phy_ctrl);
3818
3819 /* Set the link disable bit in the Port Configuration register */
3820 (void) e1000_read_phy_reg(hw, 0x10, &phy_pconf);
3821 phy_pconf |= (uint16_t)1 << 14;
3822 (void) e1000_write_phy_reg(hw, 0x10, phy_pconf);
3823 }
3824
3825 /*
3826 * igb_set_internal_serdes_loopback - Set the internal SerDes loopback mode
3827 */
3828 static void
3829 igb_set_internal_serdes_loopback(igb_t *igb)
3830 {
3831 struct e1000_hw *hw;
3832 uint32_t ctrl_ext;
3833 uint32_t ctrl;
3834 uint32_t pcs_lctl;
3835 uint32_t connsw;
3836
3837 hw = &igb->hw;
3838
3839 /* Set link mode to SerDes (11b) in the Extended Control register */
3840 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
3841 ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
3842 E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
3843
3844 /* Configure the SerDes to loopback */
3845 E1000_WRITE_REG(hw, E1000_SCTL, 0x410);
3846
3847 /* Set Device Control register */
3848 ctrl = E1000_READ_REG(hw, E1000_CTRL);
3849 ctrl |= (E1000_CTRL_FD | /* Force full duplex */
3850 E1000_CTRL_SLU); /* Force link up */
3851 ctrl &= ~(E1000_CTRL_RFCE | /* Disable receive flow control */
3852 E1000_CTRL_TFCE | /* Disable transmit flow control */
3853 E1000_CTRL_LRST); /* Clear link reset */
3854 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
3855
3856 /* Set PCS Link Control register */
3857 pcs_lctl = E1000_READ_REG(hw, E1000_PCS_LCTL);
3858 pcs_lctl |= (E1000_PCS_LCTL_FORCE_LINK |
3859 E1000_PCS_LCTL_FSD |
3860 E1000_PCS_LCTL_FDV_FULL |
3861 E1000_PCS_LCTL_FLV_LINK_UP);
3862 pcs_lctl &= ~E1000_PCS_LCTL_AN_ENABLE;
3863 E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_lctl);
3864
3865 /* Set the Copper/Fiber Switch Control - CONNSW register */
3866 connsw = E1000_READ_REG(hw, E1000_CONNSW);
3867 connsw &= ~E1000_CONNSW_ENRGSRC;
3868 E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
3869 }
3870
3871 #pragma inline(igb_intr_rx_work)
3872 /*
3873 * igb_intr_rx_work - rx processing of ISR
3874 */
3875 static void
3876 igb_intr_rx_work(igb_rx_ring_t *rx_ring)
3877 {
3878 mblk_t *mp;
3879
3880 mutex_enter(&rx_ring->rx_lock);
3881 mp = igb_rx(rx_ring, IGB_NO_POLL);
3882 mutex_exit(&rx_ring->rx_lock);
3883
3884 if (mp != NULL)
3885 mac_rx_ring(rx_ring->igb->mac_hdl, rx_ring->ring_handle, mp,
3886 rx_ring->ring_gen_num);
3887 }
3888
3889 #pragma inline(igb_intr_tx_work)
3890 /*
3891 * igb_intr_tx_work - tx processing of ISR
3892 */
3893 static void
3894 igb_intr_tx_work(igb_tx_ring_t *tx_ring)
3895 {
3896 igb_t *igb = tx_ring->igb;
3897
3898 /* Recycle the tx descriptors */
3899 tx_ring->tx_recycle(tx_ring);
3900
3901 /* Schedule the re-transmit */
3902 if (tx_ring->reschedule &&
3903 (tx_ring->tbd_free >= igb->tx_resched_thresh)) {
3904 tx_ring->reschedule = B_FALSE;
3905 mac_tx_ring_update(tx_ring->igb->mac_hdl, tx_ring->ring_handle);
3906 IGB_DEBUG_STAT(tx_ring->stat_reschedule);
3907 }
3908 }
3909
3910 #pragma inline(igb_intr_link_work)
3911 /*
3912 * igb_intr_link_work - link-status-change processing of ISR
3913 */
3914 static void
3915 igb_intr_link_work(igb_t *igb)
3916 {
3917 boolean_t link_changed;
3918
3919 igb_stop_watchdog_timer(igb);
3920
3921 mutex_enter(&igb->gen_lock);
3922
3923 /*
3924 * Because we got a link-status-change interrupt, force
3925 * e1000_check_for_link() to look at phy
3926 */
3927 igb->hw.mac.get_link_status = B_TRUE;
3928
3929 /* igb_link_check takes care of link status change */
3930 link_changed = igb_link_check(igb);
3931
3932 /* Get new phy state */
3933 igb_get_phy_state(igb);
3934
3935 mutex_exit(&igb->gen_lock);
3936
3937 if (link_changed)
3938 mac_link_update(igb->mac_hdl, igb->link_state);
3939
3940 igb_start_watchdog_timer(igb);
3941 }
3942
3943 /*
3944 * igb_intr_legacy - Interrupt handler for legacy interrupts
3945 */
3946 static uint_t
3947 igb_intr_legacy(void *arg1, void *arg2)
3948 {
3949 igb_t *igb = (igb_t *)arg1;
3950 igb_tx_ring_t *tx_ring;
3951 uint32_t icr;
3952 mblk_t *mp;
3953 boolean_t tx_reschedule;
3954 boolean_t link_changed;
3955 uint_t result;
3956
3957 _NOTE(ARGUNUSED(arg2));
3958
3959 mutex_enter(&igb->gen_lock);
3960
3961 if (igb->igb_state & IGB_SUSPENDED) {
3962 mutex_exit(&igb->gen_lock);
3963 return (DDI_INTR_UNCLAIMED);
3964 }
3965
3966 mp = NULL;
3967 tx_reschedule = B_FALSE;
3968 link_changed = B_FALSE;
3969 icr = E1000_READ_REG(&igb->hw, E1000_ICR);
3970
3971 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
3972 mutex_exit(&igb->gen_lock);
3973 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
3974 atomic_or_32(&igb->igb_state, IGB_ERROR);
3975 return (DDI_INTR_UNCLAIMED);
3976 }
3977
3978 if (icr & E1000_ICR_INT_ASSERTED) {
3979 /*
3980 * E1000_ICR_INT_ASSERTED bit was set:
3981 * Read(Clear) the ICR, claim this interrupt,
3982 * look for work to do.
3983 */
3984 ASSERT(igb->num_rx_rings == 1);
3985 ASSERT(igb->num_tx_rings == 1);
3986
3987 /* Make sure all interrupt causes cleared */
3988 (void) E1000_READ_REG(&igb->hw, E1000_EICR);
3989
3990 if (icr & E1000_ICR_RXT0) {
3991 mp = igb_rx(&igb->rx_rings[0], IGB_NO_POLL);
3992 }
3993
3994 if (icr & E1000_ICR_TXDW) {
3995 tx_ring = &igb->tx_rings[0];
3996
3997 /* Recycle the tx descriptors */
3998 tx_ring->tx_recycle(tx_ring);
3999
4000 /* Schedule the re-transmit */
4001 tx_reschedule = (tx_ring->reschedule &&
4002 (tx_ring->tbd_free >= igb->tx_resched_thresh));
4003 }
4004
4005 if (icr & E1000_ICR_LSC) {
4006 /*
4007 * Because we got a link-status-change interrupt, force
4008 * e1000_check_for_link() to look at phy
4009 */
4010 igb->hw.mac.get_link_status = B_TRUE;
4011
4012 /* igb_link_check takes care of link status change */
4013 link_changed = igb_link_check(igb);
4014
4015 /* Get new phy state */
4016 igb_get_phy_state(igb);
4017 }
4018
4019 if (icr & E1000_ICR_DRSTA) {
4020 /* 82580 Full Device Reset needed */
4021 atomic_or_32(&igb->igb_state, IGB_STALL);
4022 }
4023
4024 result = DDI_INTR_CLAIMED;
4025 } else {
4026 /*
4027 * E1000_ICR_INT_ASSERTED bit was not set:
4028 * Don't claim this interrupt.
4029 */
4030 result = DDI_INTR_UNCLAIMED;
4031 }
4032
4033 mutex_exit(&igb->gen_lock);
4034
4035 /*
4036 * Do the following work outside of the gen_lock
4037 */
4038 if (mp != NULL)
4039 mac_rx(igb->mac_hdl, NULL, mp);
4040
4041 if (tx_reschedule) {
4042 tx_ring->reschedule = B_FALSE;
4043 mac_tx_ring_update(igb->mac_hdl, tx_ring->ring_handle);
4044 IGB_DEBUG_STAT(tx_ring->stat_reschedule);
4045 }
4046
4047 if (link_changed)
4048 mac_link_update(igb->mac_hdl, igb->link_state);
4049
4050 return (result);
4051 }
4052
4053 /*
4054 * igb_intr_msi - Interrupt handler for MSI
4055 */
4056 static uint_t
4057 igb_intr_msi(void *arg1, void *arg2)
4058 {
4059 igb_t *igb = (igb_t *)arg1;
4060 uint32_t icr;
4061
4062 _NOTE(ARGUNUSED(arg2));
4063
4064 icr = E1000_READ_REG(&igb->hw, E1000_ICR);
4065
4066 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
4067 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
4068 atomic_or_32(&igb->igb_state, IGB_ERROR);
4069 return (DDI_INTR_CLAIMED);
4070 }
4071
4072 /* Make sure all interrupt causes cleared */
4073 (void) E1000_READ_REG(&igb->hw, E1000_EICR);
4074
4075 /*
4076 * For MSI interrupt, we have only one vector,
4077 * so we have only one rx ring and one tx ring enabled.
4078 */
4079 ASSERT(igb->num_rx_rings == 1);
4080 ASSERT(igb->num_tx_rings == 1);
4081
4082 if (icr & E1000_ICR_RXT0) {
4083 igb_intr_rx_work(&igb->rx_rings[0]);
4084 }
4085
4086 if (icr & E1000_ICR_TXDW) {
4087 igb_intr_tx_work(&igb->tx_rings[0]);
4088 }
4089
4090 if (icr & E1000_ICR_LSC) {
4091 igb_intr_link_work(igb);
4092 }
4093
4094 if (icr & E1000_ICR_DRSTA) {
4095 /* 82580 Full Device Reset needed */
4096 atomic_or_32(&igb->igb_state, IGB_STALL);
4097 }
4098
4099 return (DDI_INTR_CLAIMED);
4100 }
4101
4102 /*
4103 * igb_intr_rx - Interrupt handler for rx
4104 */
4105 static uint_t
4106 igb_intr_rx(void *arg1, void *arg2)
4107 {
4108 igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg1;
4109
4110 _NOTE(ARGUNUSED(arg2));
4111
4112 /*
4113 * Only used via MSI-X vector so don't check cause bits
4114 * and only clean the given ring.
4115 */
4116 igb_intr_rx_work(rx_ring);
4117
4118 return (DDI_INTR_CLAIMED);
4119 }
4120
4121 /*
4122 * igb_intr_tx - Interrupt handler for tx
4123 */
4124 static uint_t
4125 igb_intr_tx(void *arg1, void *arg2)
4126 {
4127 igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)arg1;
4128
4129 _NOTE(ARGUNUSED(arg2));
4130
4131 /*
4132 * Only used via MSI-X vector so don't check cause bits
4133 * and only clean the given ring.
4134 */
4135 igb_intr_tx_work(tx_ring);
4136
4137 return (DDI_INTR_CLAIMED);
4138 }
4139
4140 /*
4141 * igb_intr_tx_other - Interrupt handler for both tx and other
4142 *
4143 */
4144 static uint_t
4145 igb_intr_tx_other(void *arg1, void *arg2)
4146 {
4147 igb_t *igb = (igb_t *)arg1;
4148 uint32_t icr;
4149
4150 _NOTE(ARGUNUSED(arg2));
4151
4152 icr = E1000_READ_REG(&igb->hw, E1000_ICR);
4153
4154 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
4155 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
4156 atomic_or_32(&igb->igb_state, IGB_ERROR);
4157 return (DDI_INTR_CLAIMED);
4158 }
4159
4160 /*
4161 * Look for tx reclaiming work first. Remember, in the
4162 * case of only interrupt sharing, only one tx ring is
4163 * used
4164 */
4165 igb_intr_tx_work(&igb->tx_rings[0]);
4166
4167 /*
4168 * Check for "other" causes.
4169 */
4170 if (icr & E1000_ICR_LSC) {
4171 igb_intr_link_work(igb);
4172 }
4173
4174 /*
4175 * The DOUTSYNC bit indicates a tx packet dropped because
4176 * DMA engine gets "out of sync". There isn't a real fix
4177 * for this. The Intel recommendation is to count the number
4178 * of occurrences so user can detect when it is happening.
4179 * The issue is non-fatal and there's no recovery action
4180 * available.
4181 */
4182 if (icr & E1000_ICR_DOUTSYNC) {
4183 IGB_STAT(igb->dout_sync);
4184 }
4185
4186 if (icr & E1000_ICR_DRSTA) {
4187 /* 82580 Full Device Reset needed */
4188 atomic_or_32(&igb->igb_state, IGB_STALL);
4189 }
4190
4191 return (DDI_INTR_CLAIMED);
4192 }
4193
4194 /*
4195 * igb_alloc_intrs - Allocate interrupts for the driver
4196 *
4197 * Normal sequence is to try MSI-X; if not sucessful, try MSI;
4198 * if not successful, try Legacy.
4199 * igb->intr_force can be used to force sequence to start with
4200 * any of the 3 types.
4201 * If MSI-X is not used, number of tx/rx rings is forced to 1.
4202 */
4203 static int
4204 igb_alloc_intrs(igb_t *igb)
4205 {
4206 dev_info_t *devinfo;
4207 int intr_types;
4208 int rc;
4209
4210 devinfo = igb->dip;
4211
4212 /* Get supported interrupt types */
4213 rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4214
4215 if (rc != DDI_SUCCESS) {
4216 igb_log(igb,
4217 "Get supported interrupt types failed: %d", rc);
4218 return (IGB_FAILURE);
4219 }
4220 IGB_DEBUGLOG_1(igb, "Supported interrupt types: %x", intr_types);
4221
4222 igb->intr_type = 0;
4223
4224 /* Install MSI-X interrupts */
4225 if ((intr_types & DDI_INTR_TYPE_MSIX) &&
4226 (igb->intr_force <= IGB_INTR_MSIX)) {
4227 rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSIX);
4228
4229 if (rc == IGB_SUCCESS)
4230 return (IGB_SUCCESS);
4231
4232 igb_log(igb,
4233 "Allocate MSI-X failed, trying MSI interrupts...");
4234 }
4235
4236 /* MSI-X not used, force rings to 1 */
4237 igb->num_rx_rings = 1;
4238 igb->num_tx_rings = 1;
4239 igb_log(igb,
4240 "MSI-X not used, force rx and tx queue number to 1");
4241
4242 /* Install MSI interrupts */
4243 if ((intr_types & DDI_INTR_TYPE_MSI) &&
4244 (igb->intr_force <= IGB_INTR_MSI)) {
4245 rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSI);
4246
4247 if (rc == IGB_SUCCESS)
4248 return (IGB_SUCCESS);
4249
4250 igb_log(igb,
4251 "Allocate MSI failed, trying Legacy interrupts...");
4252 }
4253
4254 /* Install legacy interrupts */
4255 if (intr_types & DDI_INTR_TYPE_FIXED) {
4256 rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_FIXED);
4257
4258 if (rc == IGB_SUCCESS)
4259 return (IGB_SUCCESS);
4260
4261 igb_log(igb,
4262 "Allocate Legacy interrupts failed");
4263 }
4264
4265 /* If none of the 3 types succeeded, return failure */
4266 return (IGB_FAILURE);
4267 }
4268
4269 /*
4270 * igb_alloc_intr_handles - Allocate interrupt handles.
4271 *
4272 * For legacy and MSI, only 1 handle is needed. For MSI-X,
4273 * if fewer than 2 handles are available, return failure.
4274 * Upon success, this sets the number of Rx rings to a number that
4275 * matches the handles available for Rx interrupts.
4276 */
4277 static int
4278 igb_alloc_intr_handles(igb_t *igb, int intr_type)
4279 {
4280 dev_info_t *devinfo;
4281 int orig, request, count, avail, actual;
4282 int diff, minimum;
4283 int rc;
4284
4285 devinfo = igb->dip;
4286
4287 switch (intr_type) {
4288 case DDI_INTR_TYPE_FIXED:
4289 request = 1; /* Request 1 legacy interrupt handle */
4290 minimum = 1;
4291 IGB_DEBUGLOG_0(igb, "interrupt type: legacy");
4292 break;
4293
4294 case DDI_INTR_TYPE_MSI:
4295 request = 1; /* Request 1 MSI interrupt handle */
4296 minimum = 1;
4297 IGB_DEBUGLOG_0(igb, "interrupt type: MSI");
4298 break;
4299
4300 case DDI_INTR_TYPE_MSIX:
4301 /*
4302 * Number of vectors for the adapter is
4303 * # rx rings + # tx rings
4304 * One of tx vectors is for tx & other
4305 */
4306 request = igb->num_rx_rings + igb->num_tx_rings;
4307 orig = request;
4308 minimum = 2;
4309 IGB_DEBUGLOG_0(igb, "interrupt type: MSI-X");
4310 break;
4311
4312 default:
4313 igb_log(igb,
4314 "invalid call to igb_alloc_intr_handles(): %d\n",
4315 intr_type);
4316 return (IGB_FAILURE);
4317 }
4318 IGB_DEBUGLOG_2(igb, "interrupt handles requested: %d minimum: %d",
4319 request, minimum);
4320
4321 /*
4322 * Get number of supported interrupts
4323 */
4324 rc = ddi_intr_get_nintrs(devinfo, intr_type, &count);
4325 if ((rc != DDI_SUCCESS) || (count < minimum)) {
4326 igb_log(igb,
4327 "Get supported interrupt number failed. "
4328 "Return: %d, count: %d", rc, count);
4329 return (IGB_FAILURE);
4330 }
4331 IGB_DEBUGLOG_1(igb, "interrupts supported: %d", count);
4332
4333 /*
4334 * Get number of available interrupts
4335 */
4336 rc = ddi_intr_get_navail(devinfo, intr_type, &avail);
4337 if ((rc != DDI_SUCCESS) || (avail < minimum)) {
4338 igb_log(igb,
4339 "Get available interrupt number failed. "
4340 "Return: %d, available: %d", rc, avail);
4341 return (IGB_FAILURE);
4342 }
4343 IGB_DEBUGLOG_1(igb, "interrupts available: %d", avail);
4344
4345 if (avail < request) {
4346 igb_log(igb, "Request %d handles, %d available",
4347 request, avail);
4348 request = avail;
4349 }
4350
4351 actual = 0;
4352 igb->intr_cnt = 0;
4353
4354 /*
4355 * Allocate an array of interrupt handles
4356 */
4357 igb->intr_size = request * sizeof (ddi_intr_handle_t);
4358 igb->htable = kmem_alloc(igb->intr_size, KM_SLEEP);
4359
4360 rc = ddi_intr_alloc(devinfo, igb->htable, intr_type, 0,
4361 request, &actual, DDI_INTR_ALLOC_NORMAL);
4362 if (rc != DDI_SUCCESS) {
4363 igb_log(igb, "Allocate interrupts failed. "
4364 "return: %d, request: %d, actual: %d",
4365 rc, request, actual);
4366 goto alloc_handle_fail;
4367 }
4368 IGB_DEBUGLOG_1(igb, "interrupts actually allocated: %d", actual);
4369
4370 igb->intr_cnt = actual;
4371
4372 if (actual < minimum) {
4373 igb_log(igb, "Insufficient interrupt handles allocated: %d",
4374 actual);
4375 goto alloc_handle_fail;
4376 }
4377
4378 /*
4379 * For MSI-X, actual might force us to reduce number of tx & rx rings
4380 */
4381 if ((intr_type == DDI_INTR_TYPE_MSIX) && (orig > actual)) {
4382 diff = orig - actual;
4383 if (diff < igb->num_tx_rings) {
4384 igb_log(igb,
4385 "MSI-X vectors force Tx queue number to %d",
4386 igb->num_tx_rings - diff);
4387 igb->num_tx_rings -= diff;
4388 } else {
4389 igb_log(igb,
4390 "MSI-X vectors force Tx queue number to 1");
4391 igb->num_tx_rings = 1;
4392
4393 igb_log(igb,
4394 "MSI-X vectors force Rx queue number to %d",
4395 actual - 1);
4396 igb->num_rx_rings = actual - 1;
4397 }
4398 }
4399
4400 /*
4401 * Get priority for first vector, assume remaining are all the same
4402 */
4403 rc = ddi_intr_get_pri(igb->htable[0], &igb->intr_pri);
4404 if (rc != DDI_SUCCESS) {
4405 igb_log(igb,
4406 "Get interrupt priority failed: %d", rc);
4407 goto alloc_handle_fail;
4408 }
4409
4410 rc = ddi_intr_get_cap(igb->htable[0], &igb->intr_cap);
4411 if (rc != DDI_SUCCESS) {
4412 igb_log(igb,
4413 "Get interrupt cap failed: %d", rc);
4414 goto alloc_handle_fail;
4415 }
4416
4417 igb->intr_type = intr_type;
4418
4419 return (IGB_SUCCESS);
4420
4421 alloc_handle_fail:
4422 igb_rem_intrs(igb);
4423
4424 return (IGB_FAILURE);
4425 }
4426
4427 /*
4428 * igb_add_intr_handlers - Add interrupt handlers based on the interrupt type
4429 *
4430 * Before adding the interrupt handlers, the interrupt vectors have
4431 * been allocated, and the rx/tx rings have also been allocated.
4432 */
4433 static int
4434 igb_add_intr_handlers(igb_t *igb)
4435 {
4436 igb_rx_ring_t *rx_ring;
4437 igb_tx_ring_t *tx_ring;
4438 int vector;
4439 int rc;
4440 int i;
4441
4442 vector = 0;
4443
4444 switch (igb->intr_type) {
4445 case DDI_INTR_TYPE_MSIX:
4446 /* Add interrupt handler for tx + other */
4447 tx_ring = &igb->tx_rings[0];
4448 rc = ddi_intr_add_handler(igb->htable[vector],
4449 (ddi_intr_handler_t *)igb_intr_tx_other,
4450 (void *)igb, NULL);
4451
4452 if (rc != DDI_SUCCESS) {
4453 igb_log(igb,
4454 "Add tx/other interrupt handler failed: %d", rc);
4455 return (IGB_FAILURE);
4456 }
4457 tx_ring->intr_vector = vector;
4458 vector++;
4459
4460 /* Add interrupt handler for each rx ring */
4461 for (i = 0; i < igb->num_rx_rings; i++) {
4462 rx_ring = &igb->rx_rings[i];
4463
4464 rc = ddi_intr_add_handler(igb->htable[vector],
4465 (ddi_intr_handler_t *)igb_intr_rx,
4466 (void *)rx_ring, NULL);
4467
4468 if (rc != DDI_SUCCESS) {
4469 igb_log(igb,
4470 "Add rx interrupt handler failed. "
4471 "return: %d, rx ring: %d", rc, i);
4472 for (vector--; vector >= 0; vector--) {
4473 (void) ddi_intr_remove_handler(
4474 igb->htable[vector]);
4475 }
4476 return (IGB_FAILURE);
4477 }
4478
4479 rx_ring->intr_vector = vector;
4480
4481 vector++;
4482 }
4483
4484 /* Add interrupt handler for each tx ring from 2nd ring */
4485 for (i = 1; i < igb->num_tx_rings; i++) {
4486 tx_ring = &igb->tx_rings[i];
4487
4488 rc = ddi_intr_add_handler(igb->htable[vector],
4489 (ddi_intr_handler_t *)igb_intr_tx,
4490 (void *)tx_ring, NULL);
4491
4492 if (rc != DDI_SUCCESS) {
4493 igb_log(igb,
4494 "Add tx interrupt handler failed. "
4495 "return: %d, tx ring: %d", rc, i);
4496 for (vector--; vector >= 0; vector--) {
4497 (void) ddi_intr_remove_handler(
4498 igb->htable[vector]);
4499 }
4500 return (IGB_FAILURE);
4501 }
4502
4503 tx_ring->intr_vector = vector;
4504
4505 vector++;
4506 }
4507
4508 break;
4509
4510 case DDI_INTR_TYPE_MSI:
4511 /* Add interrupt handlers for the only vector */
4512 rc = ddi_intr_add_handler(igb->htable[vector],
4513 (ddi_intr_handler_t *)igb_intr_msi,
4514 (void *)igb, NULL);
4515
4516 if (rc != DDI_SUCCESS) {
4517 igb_log(igb,
4518 "Add MSI interrupt handler failed: %d", rc);
4519 return (IGB_FAILURE);
4520 }
4521
4522 rx_ring = &igb->rx_rings[0];
4523 rx_ring->intr_vector = vector;
4524
4525 vector++;
4526 break;
4527
4528 case DDI_INTR_TYPE_FIXED:
4529 /* Add interrupt handlers for the only vector */
4530 rc = ddi_intr_add_handler(igb->htable[vector],
4531 (ddi_intr_handler_t *)igb_intr_legacy,
4532 (void *)igb, NULL);
4533
4534 if (rc != DDI_SUCCESS) {
4535 igb_log(igb,
4536 "Add legacy interrupt handler failed: %d", rc);
4537 return (IGB_FAILURE);
4538 }
4539
4540 rx_ring = &igb->rx_rings[0];
4541 rx_ring->intr_vector = vector;
4542
4543 vector++;
4544 break;
4545
4546 default:
4547 return (IGB_FAILURE);
4548 }
4549
4550 ASSERT(vector == igb->intr_cnt);
4551
4552 return (IGB_SUCCESS);
4553 }
4554
4555 /*
4556 * igb_setup_msix_82575 - setup 82575 adapter to use MSI-X interrupts
4557 *
4558 * For each vector enabled on the adapter, Set the MSIXBM register accordingly
4559 */
4560 static void
4561 igb_setup_msix_82575(igb_t *igb)
4562 {
4563 uint32_t eims = 0;
4564 int i, vector;
4565 struct e1000_hw *hw = &igb->hw;
4566
4567 /*
4568 * Set vector for tx ring 0 and other causes.
4569 * NOTE assumption that it is vector 0.
4570 */
4571 vector = 0;
4572
4573 igb->eims_mask = E1000_EICR_TX_QUEUE0 | E1000_EICR_OTHER;
4574 E1000_WRITE_REG(hw, E1000_MSIXBM(vector), igb->eims_mask);
4575 vector++;
4576
4577 for (i = 0; i < igb->num_rx_rings; i++) {
4578 /*
4579 * Set vector for each rx ring
4580 */
4581 eims = (E1000_EICR_RX_QUEUE0 << i);
4582 E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
4583
4584 /*
4585 * Accumulate bits to enable in
4586 * igb_enable_adapter_interrupts_82575()
4587 */
4588 igb->eims_mask |= eims;
4589
4590 vector++;
4591 }
4592
4593 for (i = 1; i < igb->num_tx_rings; i++) {
4594 /*
4595 * Set vector for each tx ring from 2nd tx ring
4596 */
4597 eims = (E1000_EICR_TX_QUEUE0 << i);
4598 E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
4599
4600 /*
4601 * Accumulate bits to enable in
4602 * igb_enable_adapter_interrupts_82575()
4603 */
4604 igb->eims_mask |= eims;
4605
4606 vector++;
4607 }
4608
4609 ASSERT(vector == igb->intr_cnt);
4610
4611 /*
4612 * Disable IAM for ICR interrupt bits
4613 */
4614 E1000_WRITE_REG(hw, E1000_IAM, 0);
4615 E1000_WRITE_FLUSH(hw);
4616 }
4617
4618 /*
4619 * igb_setup_msix_82576 - setup 82576 adapter to use MSI-X interrupts
4620 *
4621 * 82576 uses a table based method for assigning vectors. Each queue has a
4622 * single entry in the table to which we write a vector number along with a
4623 * "valid" bit. The entry is a single byte in a 4-byte register. Vectors
4624 * take a different position in the 4-byte register depending on whether
4625 * they are numbered above or below 8.
4626 */
4627 static void
4628 igb_setup_msix_82576(igb_t *igb)
4629 {
4630 struct e1000_hw *hw = &igb->hw;
4631 uint32_t ivar, index, vector;
4632 int i;
4633
4634 /* must enable msi-x capability before IVAR settings */
4635 E1000_WRITE_REG(hw, E1000_GPIE,
4636 (E1000_GPIE_MSIX_MODE | E1000_GPIE_PBA | E1000_GPIE_NSICR));
4637
4638 /*
4639 * Set vector for tx ring 0 and other causes.
4640 * NOTE assumption that it is vector 0.
4641 * This is also interdependent with installation of interrupt service
4642 * routines in igb_add_intr_handlers().
4643 */
4644
4645 /* assign "other" causes to vector 0 */
4646 vector = 0;
4647 ivar = ((vector | E1000_IVAR_VALID) << 8);
4648 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
4649
4650 /* assign tx ring 0 to vector 0 */
4651 ivar = ((vector | E1000_IVAR_VALID) << 8);
4652 E1000_WRITE_REG(hw, E1000_IVAR0, ivar);
4653
4654 /* prepare to enable tx & other interrupt causes */
4655 igb->eims_mask = (1 << vector);
4656
4657 vector ++;
4658 for (i = 0; i < igb->num_rx_rings; i++) {
4659 /*
4660 * Set vector for each rx ring
4661 */
4662 index = (i & 0x7);
4663 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4664
4665 if (i < 8) {
4666 /* vector goes into low byte of register */
4667 ivar = ivar & 0xFFFFFF00;
4668 ivar |= (vector | E1000_IVAR_VALID);
4669 } else {
4670 /* vector goes into third byte of register */
4671 ivar = ivar & 0xFF00FFFF;
4672 ivar |= ((vector | E1000_IVAR_VALID) << 16);
4673 }
4674 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4675
4676 /* Accumulate interrupt-cause bits to enable */
4677 igb->eims_mask |= (1 << vector);
4678
4679 vector ++;
4680 }
4681
4682 for (i = 1; i < igb->num_tx_rings; i++) {
4683 /*
4684 * Set vector for each tx ring from 2nd tx ring.
4685 * Note assumption that tx vectors numericall follow rx vectors.
4686 */
4687 index = (i & 0x7);
4688 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4689
4690 if (i < 8) {
4691 /* vector goes into second byte of register */
4692 ivar = ivar & 0xFFFF00FF;
4693 ivar |= ((vector | E1000_IVAR_VALID) << 8);
4694 } else {
4695 /* vector goes into fourth byte of register */
4696 ivar = ivar & 0x00FFFFFF;
4697 ivar |= (vector | E1000_IVAR_VALID) << 24;
4698 }
4699 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4700
4701 /* Accumulate interrupt-cause bits to enable */
4702 igb->eims_mask |= (1 << vector);
4703
4704 vector ++;
4705 }
4706
4707 ASSERT(vector == igb->intr_cnt);
4708 }
4709
4710 /*
4711 * igb_setup_msix_82580 - setup 82580 adapter to use MSI-X interrupts
4712 *
4713 * 82580 uses same table approach at 82576 but has fewer entries. Each
4714 * queue has a single entry in the table to which we write a vector number
4715 * along with a "valid" bit. Vectors take a different position in the
4716 * register depending on * whether * they are numbered above or below 4.
4717 */
4718 static void
4719 igb_setup_msix_82580(igb_t *igb)
4720 {
4721 struct e1000_hw *hw = &igb->hw;
4722 uint32_t ivar, index, vector;
4723 int i;
4724
4725 /* must enable msi-x capability before IVAR settings */
4726 E1000_WRITE_REG(hw, E1000_GPIE, (E1000_GPIE_MSIX_MODE |
4727 E1000_GPIE_PBA | E1000_GPIE_NSICR | E1000_GPIE_EIAME));
4728 /*
4729 * Set vector for tx ring 0 and other causes.
4730 * NOTE assumption that it is vector 0.
4731 * This is also interdependent with installation of interrupt service
4732 * routines in igb_add_intr_handlers().
4733 */
4734
4735 /* assign "other" causes to vector 0 */
4736 vector = 0;
4737 ivar = ((vector | E1000_IVAR_VALID) << 8);
4738 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
4739
4740 /* assign tx ring 0 to vector 0 */
4741 ivar = ((vector | E1000_IVAR_VALID) << 8);
4742 E1000_WRITE_REG(hw, E1000_IVAR0, ivar);
4743
4744 /* prepare to enable tx & other interrupt causes */
4745 igb->eims_mask = (1 << vector);
4746
4747 vector ++;
4748
4749 for (i = 0; i < igb->num_rx_rings; i++) {
4750 /*
4751 * Set vector for each rx ring
4752 */
4753 index = (i >> 1);
4754 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4755
4756 if (i & 1) {
4757 /* vector goes into third byte of register */
4758 ivar = ivar & 0xFF00FFFF;
4759 ivar |= ((vector | E1000_IVAR_VALID) << 16);
4760 } else {
4761 /* vector goes into low byte of register */
4762 ivar = ivar & 0xFFFFFF00;
4763 ivar |= (vector | E1000_IVAR_VALID);
4764 }
4765 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4766
4767 /* Accumulate interrupt-cause bits to enable */
4768 igb->eims_mask |= (1 << vector);
4769
4770 vector ++;
4771 }
4772
4773 for (i = 1; i < igb->num_tx_rings; i++) {
4774 /*
4775 * Set vector for each tx ring from 2nd tx ring.
4776 * Note assumption that tx vectors numericall follow rx vectors.
4777 */
4778 index = (i >> 1);
4779 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4780
4781 if (i & 1) {
4782 /* vector goes into high byte of register */
4783 ivar = ivar & 0x00FFFFFF;
4784 ivar |= ((vector | E1000_IVAR_VALID) << 24);
4785 } else {
4786 /* vector goes into second byte of register */
4787 ivar = ivar & 0xFFFF00FF;
4788 ivar |= (vector | E1000_IVAR_VALID) << 8;
4789 }
4790 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4791
4792 /* Accumulate interrupt-cause bits to enable */
4793 igb->eims_mask |= (1 << vector);
4794
4795 vector ++;
4796 }
4797 ASSERT(vector == igb->intr_cnt);
4798 }
4799
4800 /*
4801 * igb_rem_intr_handlers - remove the interrupt handlers
4802 */
4803 static void
4804 igb_rem_intr_handlers(igb_t *igb)
4805 {
4806 int i;
4807 int rc;
4808
4809 for (i = 0; i < igb->intr_cnt; i++) {
4810 rc = ddi_intr_remove_handler(igb->htable[i]);
4811 if (rc != DDI_SUCCESS) {
4812 IGB_DEBUGLOG_1(igb,
4813 "Remove intr handler failed: %d", rc);
4814 }
4815 }
4816 }
4817
4818 /*
4819 * igb_rem_intrs - remove the allocated interrupts
4820 */
4821 static void
4822 igb_rem_intrs(igb_t *igb)
4823 {
4824 int i;
4825 int rc;
4826
4827 for (i = 0; i < igb->intr_cnt; i++) {
4828 rc = ddi_intr_free(igb->htable[i]);
4829 if (rc != DDI_SUCCESS) {
4830 IGB_DEBUGLOG_1(igb,
4831 "Free intr failed: %d", rc);
4832 }
4833 }
4834
4835 kmem_free(igb->htable, igb->intr_size);
4836 igb->htable = NULL;
4837 }
4838
4839 /*
4840 * igb_enable_intrs - enable all the ddi interrupts
4841 */
4842 static int
4843 igb_enable_intrs(igb_t *igb)
4844 {
4845 int i;
4846 int rc;
4847
4848 /* Enable interrupts */
4849 if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
4850 /* Call ddi_intr_block_enable() for MSI */
4851 rc = ddi_intr_block_enable(igb->htable, igb->intr_cnt);
4852 if (rc != DDI_SUCCESS) {
4853 igb_log(igb,
4854 "Enable block intr failed: %d", rc);
4855 return (IGB_FAILURE);
4856 }
4857 } else {
4858 /* Call ddi_intr_enable() for Legacy/MSI non block enable */
4859 for (i = 0; i < igb->intr_cnt; i++) {
4860 rc = ddi_intr_enable(igb->htable[i]);
4861 if (rc != DDI_SUCCESS) {
4862 igb_log(igb,
4863 "Enable intr failed: %d", rc);
4864 return (IGB_FAILURE);
4865 }
4866 }
4867 }
4868
4869 return (IGB_SUCCESS);
4870 }
4871
4872 /*
4873 * igb_disable_intrs - disable all the ddi interrupts
4874 */
4875 static int
4876 igb_disable_intrs(igb_t *igb)
4877 {
4878 int i;
4879 int rc;
4880
4881 /* Disable all interrupts */
4882 if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
4883 rc = ddi_intr_block_disable(igb->htable, igb->intr_cnt);
4884 if (rc != DDI_SUCCESS) {
4885 igb_log(igb,
4886 "Disable block intr failed: %d", rc);
4887 return (IGB_FAILURE);
4888 }
4889 } else {
4890 for (i = 0; i < igb->intr_cnt; i++) {
4891 rc = ddi_intr_disable(igb->htable[i]);
4892 if (rc != DDI_SUCCESS) {
4893 igb_log(igb,
4894 "Disable intr failed: %d", rc);
4895 return (IGB_FAILURE);
4896 }
4897 }
4898 }
4899
4900 return (IGB_SUCCESS);
4901 }
4902
4903 /*
4904 * igb_get_phy_state - Get and save the parameters read from PHY registers
4905 */
4906 static void
4907 igb_get_phy_state(igb_t *igb)
4908 {
4909 struct e1000_hw *hw = &igb->hw;
4910 uint16_t phy_ctrl;
4911 uint16_t phy_status;
4912 uint16_t phy_an_adv;
4913 uint16_t phy_an_exp;
4914 uint16_t phy_ext_status;
4915 uint16_t phy_1000t_ctrl;
4916 uint16_t phy_1000t_status;
4917 uint16_t phy_lp_able;
4918
4919 ASSERT(mutex_owned(&igb->gen_lock));
4920
4921 if (hw->phy.media_type == e1000_media_type_copper) {
4922 (void) e1000_read_phy_reg(hw, PHY_CONTROL, &phy_ctrl);
4923 (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status);
4924 (void) e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &phy_an_adv);
4925 (void) e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &phy_an_exp);
4926 (void) e1000_read_phy_reg(hw, PHY_EXT_STATUS, &phy_ext_status);
4927 (void) e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_1000t_ctrl);
4928 (void) e1000_read_phy_reg(hw,
4929 PHY_1000T_STATUS, &phy_1000t_status);
4930 (void) e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_lp_able);
4931
4932 igb->param_autoneg_cap =
4933 (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0;
4934 igb->param_pause_cap =
4935 (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
4936 igb->param_asym_pause_cap =
4937 (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
4938 igb->param_1000fdx_cap =
4939 ((phy_ext_status & IEEE_ESR_1000T_FD_CAPS) ||
4940 (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0;
4941 igb->param_1000hdx_cap =
4942 ((phy_ext_status & IEEE_ESR_1000T_HD_CAPS) ||
4943 (phy_ext_status & IEEE_ESR_1000X_HD_CAPS)) ? 1 : 0;
4944 igb->param_100t4_cap =
4945 (phy_status & MII_SR_100T4_CAPS) ? 1 : 0;
4946 igb->param_100fdx_cap = ((phy_status & MII_SR_100X_FD_CAPS) ||
4947 (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0;
4948 igb->param_100hdx_cap = ((phy_status & MII_SR_100X_HD_CAPS) ||
4949 (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0;
4950 igb->param_10fdx_cap =
4951 (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0;
4952 igb->param_10hdx_cap =
4953 (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0;
4954 igb->param_rem_fault =
4955 (phy_status & MII_SR_REMOTE_FAULT) ? 1 : 0;
4956
4957 igb->param_adv_autoneg_cap = hw->mac.autoneg;
4958 igb->param_adv_pause_cap =
4959 (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
4960 igb->param_adv_asym_pause_cap =
4961 (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
4962 igb->param_adv_1000hdx_cap =
4963 (phy_1000t_ctrl & CR_1000T_HD_CAPS) ? 1 : 0;
4964 igb->param_adv_100t4_cap =
4965 (phy_an_adv & NWAY_AR_100T4_CAPS) ? 1 : 0;
4966 igb->param_adv_rem_fault =
4967 (phy_an_adv & NWAY_AR_REMOTE_FAULT) ? 1 : 0;
4968 if (igb->param_adv_autoneg_cap == 1) {
4969 igb->param_adv_1000fdx_cap =
4970 (phy_1000t_ctrl & CR_1000T_FD_CAPS) ? 1 : 0;
4971 igb->param_adv_100fdx_cap =
4972 (phy_an_adv & NWAY_AR_100TX_FD_CAPS) ? 1 : 0;
4973 igb->param_adv_100hdx_cap =
4974 (phy_an_adv & NWAY_AR_100TX_HD_CAPS) ? 1 : 0;
4975 igb->param_adv_10fdx_cap =
4976 (phy_an_adv & NWAY_AR_10T_FD_CAPS) ? 1 : 0;
4977 igb->param_adv_10hdx_cap =
4978 (phy_an_adv & NWAY_AR_10T_HD_CAPS) ? 1 : 0;
4979 }
4980
4981 igb->param_lp_autoneg_cap =
4982 (phy_an_exp & NWAY_ER_LP_NWAY_CAPS) ? 1 : 0;
4983 igb->param_lp_pause_cap =
4984 (phy_lp_able & NWAY_LPAR_PAUSE) ? 1 : 0;
4985 igb->param_lp_asym_pause_cap =
4986 (phy_lp_able & NWAY_LPAR_ASM_DIR) ? 1 : 0;
4987 igb->param_lp_1000fdx_cap =
4988 (phy_1000t_status & SR_1000T_LP_FD_CAPS) ? 1 : 0;
4989 igb->param_lp_1000hdx_cap =
4990 (phy_1000t_status & SR_1000T_LP_HD_CAPS) ? 1 : 0;
4991 igb->param_lp_100t4_cap =
4992 (phy_lp_able & NWAY_LPAR_100T4_CAPS) ? 1 : 0;
4993 igb->param_lp_100fdx_cap =
4994 (phy_lp_able & NWAY_LPAR_100TX_FD_CAPS) ? 1 : 0;
4995 igb->param_lp_100hdx_cap =
4996 (phy_lp_able & NWAY_LPAR_100TX_HD_CAPS) ? 1 : 0;
4997 igb->param_lp_10fdx_cap =
4998 (phy_lp_able & NWAY_LPAR_10T_FD_CAPS) ? 1 : 0;
4999 igb->param_lp_10hdx_cap =
5000 (phy_lp_able & NWAY_LPAR_10T_HD_CAPS) ? 1 : 0;
5001 igb->param_lp_rem_fault =
5002 (phy_lp_able & NWAY_LPAR_REMOTE_FAULT) ? 1 : 0;
5003 } else {
5004 /*
5005 * 1Gig Fiber adapter only offers 1Gig Full Duplex.
5006 */
5007 igb->param_autoneg_cap = 0;
5008 igb->param_pause_cap = 1;
5009 igb->param_asym_pause_cap = 1;
5010 igb->param_1000fdx_cap = 1;
5011 igb->param_1000hdx_cap = 0;
5012 igb->param_100t4_cap = 0;
5013 igb->param_100fdx_cap = 0;
5014 igb->param_100hdx_cap = 0;
5015 igb->param_10fdx_cap = 0;
5016 igb->param_10hdx_cap = 0;
5017
5018 igb->param_adv_autoneg_cap = 0;
5019 igb->param_adv_pause_cap = 1;
5020 igb->param_adv_asym_pause_cap = 1;
5021 igb->param_adv_1000fdx_cap = 1;
5022 igb->param_adv_1000hdx_cap = 0;
5023 igb->param_adv_100t4_cap = 0;
5024 igb->param_adv_100fdx_cap = 0;
5025 igb->param_adv_100hdx_cap = 0;
5026 igb->param_adv_10fdx_cap = 0;
5027 igb->param_adv_10hdx_cap = 0;
5028
5029 igb->param_lp_autoneg_cap = 0;
5030 igb->param_lp_pause_cap = 0;
5031 igb->param_lp_asym_pause_cap = 0;
5032 igb->param_lp_1000fdx_cap = 0;
5033 igb->param_lp_1000hdx_cap = 0;
5034 igb->param_lp_100t4_cap = 0;
5035 igb->param_lp_100fdx_cap = 0;
5036 igb->param_lp_100hdx_cap = 0;
5037 igb->param_lp_10fdx_cap = 0;
5038 igb->param_lp_10hdx_cap = 0;
5039 igb->param_lp_rem_fault = 0;
5040 }
5041 }
5042
5043 /*
5044 * synchronize the adv* and en* parameters.
5045 *
5046 * See comments in <sys/dld.h> for details of the *_en_*
5047 * parameters. The usage of ndd for setting adv parameters will
5048 * synchronize all the en parameters with the e1000g parameters,
5049 * implicitly disabling any settings made via dladm.
5050 */
5051 static void
5052 igb_param_sync(igb_t *igb)
5053 {
5054 igb->param_en_1000fdx_cap = igb->param_adv_1000fdx_cap;
5055 igb->param_en_1000hdx_cap = igb->param_adv_1000hdx_cap;
5056 igb->param_en_100t4_cap = igb->param_adv_100t4_cap;
5057 igb->param_en_100fdx_cap = igb->param_adv_100fdx_cap;
5058 igb->param_en_100hdx_cap = igb->param_adv_100hdx_cap;
5059 igb->param_en_10fdx_cap = igb->param_adv_10fdx_cap;
5060 igb->param_en_10hdx_cap = igb->param_adv_10hdx_cap;
5061 }
5062
5063 /*
5064 * igb_get_driver_control
5065 */
5066 static void
5067 igb_get_driver_control(struct e1000_hw *hw)
5068 {
5069 uint32_t ctrl_ext;
5070
5071 /* Notify firmware that driver is in control of device */
5072 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
5073 ctrl_ext |= E1000_CTRL_EXT_DRV_LOAD;
5074 E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
5075 }
5076
5077 /*
5078 * igb_release_driver_control
5079 */
5080 static void
5081 igb_release_driver_control(struct e1000_hw *hw)
5082 {
5083 uint32_t ctrl_ext;
5084
5085 /* Notify firmware that driver is no longer in control of device */
5086 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
5087 ctrl_ext &= ~E1000_CTRL_EXT_DRV_LOAD;
5088 E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
5089 }
5090
5091 /*
5092 * igb_atomic_reserve - Atomic decrease operation
5093 */
5094 int
5095 igb_atomic_reserve(uint32_t *count_p, uint32_t n)
5096 {
5097 uint32_t oldval;
5098 uint32_t newval;
5099
5100 /* ATOMICALLY */
5101 do {
5102 oldval = *count_p;
5103 if (oldval < n)
5104 return (-1);
5105 newval = oldval - n;
5106 } while (atomic_cas_32(count_p, oldval, newval) != oldval);
5107
5108 return (newval);
5109 }
5110
5111 /*
5112 * FMA support
5113 */
5114
5115 int
5116 igb_check_acc_handle(ddi_acc_handle_t handle)
5117 {
5118 ddi_fm_error_t de;
5119
5120 ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
5121 ddi_fm_acc_err_clear(handle, DDI_FME_VERSION);
5122 return (de.fme_status);
5123 }
5124
5125 int
5126 igb_check_dma_handle(ddi_dma_handle_t handle)
5127 {
5128 ddi_fm_error_t de;
5129
5130 ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
5131 return (de.fme_status);
5132 }
5133
5134 /*
5135 * The IO fault service error handling callback function
5136 */
5137 /*ARGSUSED*/
5138 static int
5139 igb_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
5140 {
5141 /*
5142 * as the driver can always deal with an error in any dma or
5143 * access handle, we can just return the fme_status value.
5144 */
5145 pci_ereport_post(dip, err, NULL);
5146 return (err->fme_status);
5147 }
5148
5149 static void
5150 igb_fm_init(igb_t *igb)
5151 {
5152 ddi_iblock_cookie_t iblk;
5153 int fma_dma_flag;
5154
5155 /* Only register with IO Fault Services if we have some capability */
5156 if (igb->fm_capabilities & DDI_FM_ACCCHK_CAPABLE) {
5157 igb_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
5158 } else {
5159 igb_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC;
5160 }
5161
5162 if (igb->fm_capabilities & DDI_FM_DMACHK_CAPABLE) {
5163 fma_dma_flag = 1;
5164 } else {
5165 fma_dma_flag = 0;
5166 }
5167
5168 (void) igb_set_fma_flags(fma_dma_flag);
5169
5170 if (igb->fm_capabilities) {
5171
5172 /* Register capabilities with IO Fault Services */
5173 ddi_fm_init(igb->dip, &igb->fm_capabilities, &iblk);
5174
5175 /*
5176 * Initialize pci ereport capabilities if ereport capable
5177 */
5178 if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
5179 DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5180 pci_ereport_setup(igb->dip);
5181
5182 /*
5183 * Register error callback if error callback capable
5184 */
5185 if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5186 ddi_fm_handler_register(igb->dip,
5187 igb_fm_error_cb, (void*) igb);
5188 }
5189 }
5190
5191 static void
5192 igb_fm_fini(igb_t *igb)
5193 {
5194 /* Only unregister FMA capabilities if we registered some */
5195 if (igb->fm_capabilities) {
5196
5197 /*
5198 * Release any resources allocated by pci_ereport_setup()
5199 */
5200 if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
5201 DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5202 pci_ereport_teardown(igb->dip);
5203
5204 /*
5205 * Un-register error callback if error callback capable
5206 */
5207 if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5208 ddi_fm_handler_unregister(igb->dip);
5209
5210 /* Unregister from IO Fault Services */
5211 ddi_fm_fini(igb->dip);
5212 }
5213 }
5214
5215 void
5216 igb_fm_ereport(igb_t *igb, char *detail)
5217 {
5218 uint64_t ena;
5219 char buf[FM_MAX_CLASS];
5220
5221 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
5222 ena = fm_ena_generate(0, FM_ENA_FMT1);
5223 if (DDI_FM_EREPORT_CAP(igb->fm_capabilities)) {
5224 ddi_fm_ereport_post(igb->dip, buf, ena, DDI_NOSLEEP,
5225 FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
5226 }
5227 }