1 /*
   2 * CDDL HEADER START
   3 *
   4 * The contents of this file are subject to the terms of the
   5 * Common Development and Distribution License, v.1,  (the "License").
   6 * You may not use this file except in compliance with the License.
   7 *
   8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9 * or http://opensource.org/licenses/CDDL-1.0.
  10 * See the License for the specific language governing permissions
  11 * and limitations under the License.
  12 *
  13 * When distributing Covered Code, include this CDDL HEADER in each
  14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15 * If applicable, add the following below this CDDL HEADER, with the
  16 * fields enclosed by brackets "[]" replaced with your own identifying
  17 * information: Portions Copyright [yyyy] [name of copyright owner]
  18 *
  19 * CDDL HEADER END
  20 */
  21 
  22 /*
  23 * Copyright 2014-2017 Cavium, Inc. 
  24 * The contents of this file are subject to the terms of the Common Development 
  25 * and Distribution License, v.1,  (the "License").
  26 
  27 * You may not use this file except in compliance with the License.
  28 
  29 * You can obtain a copy of the License at available 
  30 * at http://opensource.org/licenses/CDDL-1.0
  31 
  32 * See the License for the specific language governing permissions and 
  33 * limitations under the License.
  34 */
  35 
  36 #include "qede.h"
  37 
  38 static qede_dma_handle_entry_t *
  39 qede_get_dmah_entry(qede_tx_ring_t *tx_ring)
  40 {
  41         qede_dma_handles_list_t *list = &tx_ring->dmah_list;
  42         qede_dma_handle_entry_t *dmah;
  43 
  44         mutex_enter(&list->lock);
  45         dmah = list->free_list[list->head];
  46         list->free_list[list->head] = NULL;
  47         list->head = (list->head + 1) & TX_RING_MASK;
  48         mutex_exit(&list->lock);
  49 
  50         return (dmah);
  51 }
  52 
  53 static void
  54 qede_put_dmah_entries(qede_tx_ring_t *tx_ring, qede_dma_handle_entry_t *dmah)
  55 {
  56         qede_dma_handles_list_t *list = &tx_ring->dmah_list;
  57         qede_dma_handle_entry_t *next;
  58         u16 index;
  59 
  60         mutex_enter(&list->lock);
  61         index = list->tail;
  62         
  63         while (dmah != NULL) {
  64                 next = dmah->next;
  65                 dmah->next = NULL;
  66                 list->free_list[index] = dmah;
  67                 index = (index + 1) & TX_RING_MASK;
  68                 dmah = next;
  69         }
  70 
  71         list->tail = index;
  72 
  73         mutex_exit(&list->lock);
  74 }
  75 
  76 static qede_tx_bcopy_pkt_t *
  77 qede_get_bcopy_pkt(qede_tx_ring_t *tx_ring)
  78 {
  79         qede_tx_bcopy_list_t *list = &tx_ring->bcopy_list;
  80         qede_tx_bcopy_pkt_t *pkt;
  81 
  82         mutex_enter(&list->lock);
  83         pkt = list->free_list[list->head];
  84         list->free_list[list->head] = NULL;
  85         list->head = (list->head + 1) & TX_RING_MASK;
  86         mutex_exit(&list->lock);
  87 
  88         return (pkt);
  89 }
  90 
  91 static void
  92 qede_put_bcopy_pkt(qede_tx_ring_t *tx_ring, qede_tx_bcopy_pkt_t *pkt)
  93 {
  94         qede_tx_bcopy_list_t *list = &tx_ring->bcopy_list;
  95 
  96         mutex_enter(&list->lock);
  97         list->free_list[list->tail] = pkt;
  98         list->tail = (list->tail + 1) & TX_RING_MASK;
  99         mutex_exit(&list->lock);
 100 }
 101 
 102 void 
 103 qede_print_tx_indexes(qede_tx_ring_t *tx_ring)
 104 {
 105         uint16_t hw_consumer = LE_16(*tx_ring->hw_cons_ptr);
 106         uint16_t chain_idx = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
 107         hw_consumer &= TX_RING_MASK;
 108         chain_idx &= TX_RING_MASK;
 109         qede_print_err("!indices: hw_cons %d, chain_cons = %d, sw_prod = %d",
 110             hw_consumer, chain_idx, tx_ring->sw_tx_prod); 
 111 }
 112 
 113 void 
 114 qede_print_rx_indexes(qede_rx_ring_t *rx_ring)
 115 {
 116         u16 hw_bd_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
 117         u16 sw_bd_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
 118 
 119         hw_bd_cons &= (rx_ring->qede->rx_ring_size - 1);
 120         sw_bd_cons &= (rx_ring->qede->rx_ring_size - 1);
 121         qede_print_err("!RX indices: hw_cons %d, chain_cons = %d",
 122             hw_bd_cons, sw_bd_cons); 
 123 }
 124 
 125 
 126 /*
 127  * Called from tx_completion intr handler.
 128  * NOTE: statu_block dma mem. must be sync'ed
 129  * in the interrupt handler
 130  */
 131 int 
 132 qede_process_tx_completions(qede_tx_ring_t *tx_ring)
 133 {
 134         int count = 0;
 135         u16 hw_consumer;
 136         struct eth_tx_bd *tx_bd;
 137         uint16_t chain_idx;
 138         u16 nbd, sw_consumer = tx_ring->sw_tx_cons;
 139         struct eth_tx_1st_bd *first_bd;
 140         u16 bd_consumed = 0;
 141         qede_tx_recycle_list_t *recycle_entry;
 142         qede_dma_handle_entry_t *dmah, *head = NULL, *tail = NULL;
 143         qede_tx_bcopy_pkt_t *bcopy_pkt;
 144 
 145         hw_consumer = LE_16(*tx_ring->hw_cons_ptr);
 146         chain_idx = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
 147 
 148         while (hw_consumer != chain_idx) {
 149                 nbd = 0;
 150                 bd_consumed = 0;
 151                 first_bd = NULL;
 152 
 153                 recycle_entry = &tx_ring->tx_recycle_list[sw_consumer];
 154                 if (recycle_entry->dmah_entry != NULL) {
 155                         dmah = recycle_entry->dmah_entry;
 156 
 157                         head = dmah;
 158 
 159                         if (head->mp) {
 160                                 freemsg(head->mp);
 161                         }
 162 
 163                         while (dmah != NULL) {
 164                                 (void) ddi_dma_unbind_handle(dmah->dma_handle);
 165                                 dmah = dmah->next;
 166                         }
 167 
 168 
 169                         qede_put_dmah_entries(tx_ring,
 170                             head);
 171                         recycle_entry->dmah_entry = NULL;
 172                 } else if (recycle_entry->bcopy_pkt != NULL) {
 173                         bcopy_pkt = recycle_entry->bcopy_pkt;
 174 
 175                         qede_put_bcopy_pkt(tx_ring, bcopy_pkt);
 176                         recycle_entry->bcopy_pkt = NULL;
 177                 } else {
 178                         qede_warn(tx_ring->qede,
 179                             "Invalid completion at index %d",
 180                             sw_consumer);
 181                 }
 182 
 183                 sw_consumer = (sw_consumer + 1) & TX_RING_MASK;
 184 
 185                 first_bd =
 186                     (struct eth_tx_1st_bd *)ecore_chain_consume(
 187                     &tx_ring->tx_bd_ring);
 188                 bd_consumed++;
 189                 
 190                 nbd = first_bd->data.nbds;
 191 
 192                 while (bd_consumed++ < nbd) {
 193                         ecore_chain_consume(&tx_ring->tx_bd_ring);
 194                 }
 195 
 196                 chain_idx = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
 197                 count++;
 198         }
 199 
 200         tx_ring->sw_tx_cons = sw_consumer;
 201 
 202         if (count && tx_ring->tx_q_sleeping) {
 203                 tx_ring->tx_q_sleeping = 0;
 204 #ifndef NO_CROSSBOW
 205                 RESUME_TX(tx_ring);
 206 #else
 207                 mac_tx_update(tx_ring->qede->mac_handle);
 208 #endif
 209         }
 210 
 211         return (count);
 212 }
 213 
 214 static int
 215 qede_has_tx_work(qede_tx_ring_t *tx_ring)
 216 {
 217         u16 hw_bd_cons = LE_16(*tx_ring->hw_cons_ptr);
 218         u16 sw_bd_cons = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
 219 
 220         if (sw_bd_cons == (hw_bd_cons + 1)) {
 221                 return (0);
 222         }
 223         return (hw_bd_cons != sw_bd_cons);
 224 }
 225 
 226 static int
 227 qede_has_rx_work(qede_rx_ring_t *rx_ring)
 228 {
 229         u16 hw_bd_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
 230         u16 sw_bd_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
 231         return (hw_bd_cons != sw_bd_cons);
 232 }
 233 
 234 static void
 235 qede_set_cksum_flags(mblk_t *mp,
 236     uint16_t parse_flags)
 237 {
 238         uint32_t cksum_flags = 0;
 239         int error = 0;
 240         bool l4_is_calc, l4_csum_err, iphdr_len_err;
 241         
 242         l4_is_calc =
 243             (parse_flags >> PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT)
 244             & PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK;
 245         l4_csum_err = (parse_flags >> PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT)
 246             & PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK;
 247         iphdr_len_err = (parse_flags >> PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT)
 248             & PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK;
 249 
 250         if (l4_is_calc) {
 251                 if (l4_csum_err) {
 252                         error = 1;
 253                 } else if (iphdr_len_err) {
 254                         error = 2;
 255                 } else {
 256                         cksum_flags =  HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM_OK;
 257                 }
 258         }
 259         
 260         if (error == 1) {
 261                 qede_print_err("!%s: got L4 csum error",__func__);
 262         } else if (error == 2) {
 263                 qede_print_err("!%s: got IPHDER csum error" ,__func__);
 264         }
 265 
 266         mac_hcksum_set(mp, 0, 0, 0, 0, cksum_flags);
 267 }
 268 
 269 static qede_rx_buffer_t *
 270 qede_get_next_rx_buffer(qede_rx_ring_t *rx_ring,
 271     uint32_t *free_buffer_count)
 272 {
 273         qede_rx_buffer_t *rx_buffer;
 274         uint32_t num_entries;
 275 
 276         rx_buffer = qede_get_from_active_list(rx_ring, &num_entries);
 277         ASSERT(rx_buffer != NULL);
 278         ecore_chain_consume(&rx_ring->rx_bd_ring);
 279         *free_buffer_count = num_entries;
 280 
 281         return (rx_buffer);
 282 }
 283 
 284 static uint32_t
 285 qede_get_next_lro_buffer(qede_rx_ring_t *rx_ring,
 286     qede_lro_info_t *lro_info)
 287 {
 288         lro_info->rx_buffer[lro_info->bd_count] =
 289             qede_get_next_rx_buffer(rx_ring,
 290             &lro_info->free_buffer_count);
 291         lro_info->bd_count++;
 292         return (DDI_SUCCESS);
 293 }
 294 #ifdef DEBUG_LRO
 295 int agg_count = 0;
 296 bool agg_print = B_TRUE;
 297 #endif
 298 static void
 299 qede_lro_start(qede_rx_ring_t *rx_ring,
 300     struct eth_fast_path_rx_tpa_start_cqe *cqe)
 301 {
 302         qede_lro_info_t *lro_info;
 303         int i, len_on_first_bd, seg_len; 
 304 
 305         lro_info = &rx_ring->lro_info[cqe->tpa_agg_index];
 306 
 307         /* ASSERT(lro_info->agg_state != QEDE_AGG_STATE_NONE); */
 308 
 309 #ifdef DEBUG_LRO
 310         if (agg_count++ < 30)  {
 311                 qede_dump_start_lro_cqe(cqe);
 312         } else { 
 313                 agg_print = B_FALSE;
 314         }
 315 #endif
 316 
 317         memset(lro_info, 0, sizeof (qede_lro_info_t));
 318         lro_info->agg_state = QEDE_AGG_STATE_START;
 319         rx_ring->lro_active_count++;
 320 
 321         /* Parsing and error flags from the parser */;
 322                 
 323         lro_info->pars_flags = LE_16(cqe->pars_flags.flags);
 324         lro_info->pad = LE_16(cqe->placement_offset);
 325         lro_info->header_len = (uint32_t)cqe->header_len;
 326         lro_info->vlan_tag = LE_16(cqe->vlan_tag);
 327         lro_info->rss_hash = LE_32(cqe->rss_hash);
 328 
 329         seg_len = (int)LE_16(cqe->seg_len); 
 330         len_on_first_bd = (int)LE_16(cqe->len_on_first_bd);
 331         /*
 332          * Get the first bd
 333          */
 334         qede_get_next_lro_buffer(rx_ring, lro_info);
 335 
 336         if (len_on_first_bd < seg_len) {
 337                 /*
 338                  * We end up here with jumbo frames
 339                  * since a TCP segment can span
 340                  * multiple buffer descriptors.
 341                  */
 342                 for (i = 0; i < ETH_TPA_CQE_START_LEN_LIST_SIZE; i++) {
 343                         if (cqe->ext_bd_len_list[i] == 0) {
 344                             break;
 345                         }
 346                         qede_get_next_lro_buffer(rx_ring, lro_info);
 347                 }
 348         }
 349 }
 350 
 351 static void
 352 qede_lro_cont(qede_rx_ring_t *rx_ring,
 353     struct eth_fast_path_rx_tpa_cont_cqe *cqe)
 354 {
 355         qede_lro_info_t *lro_info;
 356         int i;
 357 
 358         lro_info = &rx_ring->lro_info[cqe->tpa_agg_index];
 359 
 360         /* ASSERT(lro_info->agg_state != QEDE_AGG_STATE_START); */
 361 #ifdef DEBUG_LRO
 362         if (agg_print) {
 363                 qede_dump_cont_lro_cqe(cqe);
 364         }
 365 #endif
 366 
 367         for (i = 0; i < ETH_TPA_CQE_CONT_LEN_LIST_SIZE; i++) {
 368                 if (cqe->len_list[i] == 0) {
 369                         break;
 370                 }
 371                 qede_get_next_lro_buffer(rx_ring, lro_info);
 372         }
 373 }
 374 
 375 static mblk_t *
 376 qede_lro_end(qede_rx_ring_t *rx_ring,
 377     struct eth_fast_path_rx_tpa_end_cqe *cqe,
 378     int *pkt_bytes)
 379 {
 380         qede_lro_info_t *lro_info;
 381         mblk_t *head = NULL, *tail = NULL, *mp = NULL;
 382         qede_rx_buffer_t *rx_buffer;
 383         int i, bd_len;
 384         uint16_t work_length, total_packet_length;
 385         uint32_t rx_buf_size = rx_ring->rx_buf_size;
 386         qede_dma_info_t *dma_info;
 387 
 388         lro_info = &rx_ring->lro_info[cqe->tpa_agg_index];
 389 
 390         /* ASSERT(lro_info->agg_state != QEDE_AGG_STATE_START); */
 391 
 392 #ifdef DEBUG_LRO
 393         if (agg_print) {
 394                 qede_dump_end_lro_cqe(cqe);
 395         }
 396 #endif
 397 
 398         work_length = total_packet_length = LE_16(cqe->total_packet_len);
 399 
 400         /*
 401          * Get any buffer descriptors for this cqe
 402          */
 403         for (i=0; i<ETH_TPA_CQE_END_LEN_LIST_SIZE; i++) {
 404                 if (cqe->len_list[i] == 0) {
 405                     break;
 406                 }
 407                 qede_get_next_lro_buffer(rx_ring, lro_info);
 408         }
 409 
 410         /* ASSERT(lro_info->bd_count != cqe->num_of_bds); */
 411 
 412         if (lro_info->free_buffer_count < 
 413             rx_ring->rx_low_buffer_threshold) {
 414                 for (i = 0; i < lro_info->bd_count; i++) {
 415                         qede_recycle_copied_rx_buffer(
 416                             lro_info->rx_buffer[i]);
 417                         lro_info->rx_buffer[i] = NULL;
 418                 }
 419                 rx_ring->rx_low_water_cnt++;
 420                 lro_info->agg_state = QEDE_AGG_STATE_NONE;
 421                 return (NULL);
 422         }
 423         /*
 424          * Loop through list of buffers for this
 425          * aggregation.  For each one:
 426          * 1. Calculate the buffer length
 427          * 2. Adjust the mblk read/write pointers
 428          * 3. Link the mblk to the local chain using
 429          *    b_cont pointers.
 430          * Note: each buffer will be rx_buf_size except
 431          * the first (subtract the placement_offset)
 432          * and the last which contains the remainder
 433          * of cqe_end->total_packet_len minus length
 434          * of all other buffers.
 435          */
 436         for (i = 0; i < lro_info->bd_count; i++) {
 437 
 438                 rx_buffer = lro_info->rx_buffer[i];
 439 
 440                 bd_len = 
 441                     (work_length > rx_buf_size) ? rx_buf_size : work_length;
 442                 if (i == 0 &&
 443                     (cqe->num_of_bds > 1)) {
 444                         bd_len -= lro_info->pad;
 445                 }
 446 
 447                 dma_info = &rx_buffer->dma_info;         
 448                 ddi_dma_sync(dma_info->dma_handle,
 449                     dma_info->offset,
 450                     rx_buf_size,
 451                     DDI_DMA_SYNC_FORKERNEL);
 452 
 453                 mp = rx_buffer->mp;
 454                 mp->b_next = mp->b_cont = NULL;
 455 
 456                 if (head == NULL) {
 457                         head = tail = mp;
 458                         mp->b_rptr += lro_info->pad;
 459                 } else {
 460                         tail->b_cont = mp;
 461                         tail = mp;
 462                 }
 463 
 464                 mp->b_wptr = (uchar_t *)((unsigned long)mp->b_rptr + bd_len);
 465                 work_length -= bd_len;
 466         }
 467 
 468         qede_set_cksum_flags(head, lro_info->pars_flags);
 469  
 470         rx_ring->rx_lro_pkt_cnt++;
 471         rx_ring->lro_active_count--; 
 472         lro_info->agg_state = QEDE_AGG_STATE_NONE;
 473 
 474 #ifdef DEBUG_LRO
 475         if (agg_print) {
 476                 qede_dump_mblk_chain_bcont_ptr(rx_ring->qede, head);
 477         }
 478 #endif
 479         *pkt_bytes = (int)total_packet_length;
 480         return (head);
 481 }
 482 
 483 
 484 
 485 #ifdef DEBUG_JUMBO
 486 int jumbo_count = 0;
 487 bool jumbo_print = B_TRUE;
 488 #endif
 489 static mblk_t *
 490 qede_reg_jumbo_cqe(qede_rx_ring_t *rx_ring,
 491    struct eth_fast_path_rx_reg_cqe *cqe)
 492 {
 493         int i;
 494         qede_rx_buffer_t *rx_buf, *rx_buffer[ETH_RX_MAX_BUFF_PER_PKT];
 495         mblk_t *mp = NULL, *head = NULL, *tail = NULL;
 496         uint32_t free_buffer_count = 0;
 497         uint16_t work_length;
 498         uint32_t rx_buf_size = rx_ring->rx_buf_size, bd_len;
 499         qede_dma_info_t *dma_info;
 500         u8 pad = cqe->placement_offset;
 501 
 502 #ifdef DEBUG_JUMBO
 503         if (jumbo_count++ < 8) { 
 504                 qede_dump_reg_cqe(cqe);
 505         } else {
 506                 jumbo_print = B_FALSE;
 507         }
 508 #endif
 509 
 510         work_length = HOST_TO_LE_16(cqe->pkt_len);
 511 
 512         /*
 513          * Get the buffers/mps for this cqe
 514          */
 515         for (i = 0; i < cqe->bd_num; i++) {
 516                 rx_buffer[i] =
 517                     qede_get_next_rx_buffer(rx_ring, &free_buffer_count);
 518         }
 519 
 520         /*
 521          * If the buffer ring is running low, drop the
 522          * packet and return these buffers.
 523          */
 524         if (free_buffer_count < 
 525             rx_ring->rx_low_buffer_threshold) {
 526                 for (i = 0; i < cqe->bd_num; i++) {
 527                         qede_recycle_copied_rx_buffer(rx_buffer[i]);
 528                 }
 529                 rx_ring->rx_low_water_cnt++;
 530                 return (NULL);
 531         }
 532 
 533         for (i = 0; i < cqe->bd_num; i++) {
 534                 rx_buf = rx_buffer[i];
 535 
 536                 bd_len = 
 537                     (work_length > rx_buf_size) ? rx_buf_size : work_length;
 538 
 539                 /*
 540                  * Adjust for placement offset
 541                  * on first bufffer.
 542                  */
 543                 if (i == 0) {
 544                         bd_len -= pad;
 545                 }
 546 
 547                 dma_info = &rx_buf->dma_info;            
 548                 ddi_dma_sync(dma_info->dma_handle,
 549                     dma_info->offset,
 550                     rx_buf_size,
 551                     DDI_DMA_SYNC_FORKERNEL);
 552 
 553                 mp = rx_buf->mp;
 554                 mp->b_next = mp->b_cont = NULL;
 555                 /*
 556                  * Adjust for placement offset
 557                  * on first bufffer.
 558                  */
 559                 if (i == 0) {
 560                         mp->b_rptr += pad;
 561                 }
 562 
 563                 mp->b_wptr = (uchar_t *)((unsigned long)mp->b_rptr + bd_len);
 564 
 565                 if (head == NULL) {
 566                         head = tail = mp;
 567                 } else {
 568                         tail->b_cont = mp;
 569                         tail = mp;
 570                 }
 571 
 572                 work_length -= bd_len;
 573         }
 574 
 575         qede_set_cksum_flags(head,
 576                     HOST_TO_LE_16(cqe->pars_flags.flags));
 577 #ifdef DEBUG_JUMBO
 578         if (jumbo_print) {
 579                 qede_dump_mblk_chain_bcont_ptr(rx_ring->qede, head);
 580         }
 581 #endif
 582         rx_ring->rx_jumbo_pkt_cnt++;
 583         return (head);
 584 }
 585 
 586 static mblk_t *
 587 qede_reg_cqe(qede_rx_ring_t *rx_ring,
 588     struct eth_fast_path_rx_reg_cqe *cqe,
 589     int *pkt_bytes)
 590 {
 591         qede_t *qede = rx_ring->qede;
 592         qede_rx_buffer_t *rx_buffer;
 593         uint32_t free_buffer_count;
 594         mblk_t *mp;
 595         uint16_t pkt_len = HOST_TO_LE_16(cqe->pkt_len);
 596         u8 pad = cqe->placement_offset;
 597         qede_dma_info_t *dma_info;
 598         ddi_dma_handle_t dma_handle;
 599         char *virt_addr;
 600 
 601         /*
 602          * Update the byte count as it will
 603          * be the same for normal and jumbo
 604          */
 605         *pkt_bytes = (int)pkt_len;
 606 
 607         if (cqe->bd_num > 1) {
 608                 /*
 609                  * If this cqe uses more than one
 610                  * rx buffer then it must be
 611                  * jumbo.  Call another handler
 612                  * for this because the process is
 613                  * quite different.
 614                  */
 615                 return (qede_reg_jumbo_cqe(rx_ring, cqe));
 616         }
 617         
 618         
 619         rx_buffer = qede_get_next_rx_buffer(rx_ring,
 620             &free_buffer_count);
 621 
 622         if (free_buffer_count < 
 623             rx_ring->rx_low_buffer_threshold) {
 624                 qede_recycle_copied_rx_buffer(rx_buffer);
 625                 rx_ring->rx_low_water_cnt++;
 626                 *pkt_bytes = 0;
 627                 return (NULL);
 628         }
 629 
 630         dma_info = &rx_buffer->dma_info;         
 631         virt_addr = dma_info->virt_addr;
 632         dma_handle = dma_info->dma_handle;
 633         ddi_dma_sync(dma_handle,
 634             0, 0, DDI_DMA_SYNC_FORKERNEL);
 635 
 636         if (pkt_len <= rx_ring->rx_copy_threshold) {
 637                 mp = allocb(pkt_len + 2, 0); /* IP HDR_ALIGN */
 638                 if (mp != NULL) {
 639                         virt_addr += pad;
 640                         bcopy(virt_addr, mp->b_rptr, pkt_len);
 641                 } else {
 642                         /*
 643                          * Post the buffer back to fw and
 644                          * drop packet
 645                          */
 646                         qede_print_err("!%s(%d): allocb failed",
 647                             __func__,
 648                             rx_ring->qede->instance);
 649                         qede->allocbFailures++;
 650                         goto freebuf;
 651                 }
 652                 /* 
 653                  * We've copied it (or not) and are done with it
 654                  * so put it back into the passive list.
 655                  */
 656                 ddi_dma_sync(dma_handle,
 657                     0, 0, DDI_DMA_SYNC_FORDEV);
 658                 qede_recycle_copied_rx_buffer(rx_buffer);
 659                 rx_ring->rx_copy_cnt++;
 660         } else {
 661 
 662                 /*
 663                  * We are going to send this mp/buffer
 664                  * up to the mac layer.  Adjust the 
 665                  * pointeres and link it to our chain.
 666                  * the rx_buffer is returned to us in
 667                  * the recycle function so we drop it
 668                  * here.
 669                  */
 670                 mp = rx_buffer->mp;
 671                 mp->b_rptr += pad;
 672         }
 673         mp->b_cont = mp->b_next = NULL;
 674         mp->b_wptr = (uchar_t *)((unsigned long)mp->b_rptr + pkt_len);
 675 
 676         qede_set_cksum_flags(mp,
 677             HOST_TO_LE_16(cqe->pars_flags.flags));
 678 #ifdef DEBUG_JUMBO
 679         if (jumbo_print) {
 680             qede_dump_mblk_chain_bnext_ptr(rx_ring->qede, mp);
 681         }
 682 #endif
 683 
 684         rx_ring->rx_reg_pkt_cnt++;
 685         return (mp);    
 686 
 687 freebuf:
 688         qede_recycle_copied_rx_buffer(rx_buffer);
 689         return (NULL);
 690 }
 691 
 692 /*
 693  * Routine to process the rx packets on the
 694  * passed rx_ring. Can be called for intr or
 695  * poll context/routines
 696  */
 697 static mblk_t *
 698 qede_process_rx_ring(qede_rx_ring_t *rx_ring, int nbytes, int npkts)
 699 {
 700         union eth_rx_cqe *cqe;
 701         u16 last_cqe_consumer = rx_ring->last_cqe_consumer;
 702         enum eth_rx_cqe_type cqe_type;
 703         u16 sw_comp_cons, hw_comp_cons;
 704         mblk_t *mp = NULL, *first_mp = NULL, *last_mp = NULL;
 705         int pkt_bytes = 0, byte_cnt = 0, pkt_cnt = 0;
 706 
 707         hw_comp_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
 708 
 709         /* Completion ring sw consumer */
 710         sw_comp_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
 711         
 712         while (sw_comp_cons != hw_comp_cons) {
 713                 if ((byte_cnt >= nbytes) ||
 714                     (pkt_cnt >= npkts)) {
 715                         break;
 716                 }
 717 
 718                 cqe = (union eth_rx_cqe *)
 719                     ecore_chain_consume(&rx_ring->rx_cqe_ring);
 720                 /* Get next element and increment the cons_idx */
 721 
 722                 (void) ddi_dma_sync(rx_ring->rx_cqe_dmah,
 723                     last_cqe_consumer, sizeof (*cqe),
 724                     DDI_DMA_SYNC_FORKERNEL);
 725 
 726                 cqe_type = cqe->fast_path_regular.type;
 727 
 728                 switch (cqe_type) {
 729                 case ETH_RX_CQE_TYPE_SLOW_PATH:
 730                         ecore_eth_cqe_completion(&rx_ring->qede->edev.hwfns[0],
 731                             (struct eth_slow_path_rx_cqe *)cqe);
 732                         goto next_cqe;
 733                 case ETH_RX_CQE_TYPE_REGULAR:
 734                         mp = qede_reg_cqe(rx_ring,
 735                             &cqe->fast_path_regular,
 736                             &pkt_bytes);
 737                         break;
 738                 case ETH_RX_CQE_TYPE_TPA_START:
 739                         qede_lro_start(rx_ring,
 740                             &cqe->fast_path_tpa_start);
 741                         goto next_cqe;
 742                 case ETH_RX_CQE_TYPE_TPA_CONT:
 743                         qede_lro_cont(rx_ring,
 744                             &cqe->fast_path_tpa_cont);
 745                         goto next_cqe;
 746                 case ETH_RX_CQE_TYPE_TPA_END:
 747                         mp = qede_lro_end(rx_ring,
 748                             &cqe->fast_path_tpa_end,
 749                             &pkt_bytes);
 750                         break;
 751                 default:
 752                         if (cqe_type != 0) {
 753                                 qede_print_err("!%s(%d): cqe_type %x not "
 754                                     "supported", __func__,
 755                                     rx_ring->qede->instance,
 756                                     cqe_type);
 757                         }
 758                         goto exit_rx;
 759                 }
 760 
 761                 /* 
 762                  * If we arrive here with no mp,
 763                  * then we hit an RX buffer threshold
 764                  * where we had to drop the packet and
 765                  * give the buffers back to the device.
 766                  */
 767                 if (mp == NULL) {
 768                         rx_ring->rx_drop_cnt++;
 769                         goto next_cqe;
 770                 }
 771 
 772                 if (first_mp) {
 773                         last_mp->b_next = mp;
 774                 } else {
 775                         first_mp = mp;
 776                 }
 777                 last_mp = mp;
 778                 pkt_cnt++;
 779                 byte_cnt += pkt_bytes;
 780 next_cqe:
 781                 ecore_chain_recycle_consumed(&rx_ring->rx_cqe_ring);
 782                 last_cqe_consumer = sw_comp_cons;
 783                 sw_comp_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
 784                 if (!(qede_has_rx_work(rx_ring))) {
 785                         ecore_sb_update_sb_idx(rx_ring->fp->sb_info);
 786                 }
 787                 hw_comp_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
 788         }
 789         rx_ring->rx_pkt_cnt += pkt_cnt;
 790         rx_ring->rx_byte_cnt += byte_cnt;
 791 
 792 exit_rx:
 793         if (first_mp) {
 794                 last_mp->b_next = NULL;
 795         }
 796 
 797         /*
 798          * Since prod update will result in
 799          * reading of the bd's, do a dma_sync
 800          */
 801         qede_replenish_rx_buffers(rx_ring);
 802         qede_update_rx_q_producer(rx_ring);
 803         rx_ring->last_cqe_consumer = last_cqe_consumer;
 804 
 805         return (first_mp);
 806 }
 807 
 808 mblk_t *
 809 qede_process_fastpath(qede_fastpath_t *fp,
 810     int nbytes, int npkts, int *work_done)
 811 {
 812         int i = 0;
 813         qede_tx_ring_t *tx_ring;
 814         qede_rx_ring_t *rx_ring;
 815         mblk_t *mp = NULL;
 816 
 817         rx_ring = fp->rx_ring;
 818 
 819         for (i = 0; i < fp->qede->num_tc; i++) {
 820                 tx_ring = fp->tx_ring[i];
 821                 if (qede_has_tx_work(tx_ring)) {
 822                 /* process tx completions */
 823                         if (mutex_tryenter(&tx_ring->tx_lock) != 0) {
 824                                 *work_done +=
 825                                     qede_process_tx_completions(tx_ring);
 826                                 mutex_exit(&tx_ring->tx_lock);
 827                         }
 828                 }
 829         }
 830 
 831         if (!(qede_has_rx_work(rx_ring))) {
 832                 ecore_sb_update_sb_idx(fp->sb_info);
 833         }
 834 
 835         rx_ring = fp->rx_ring;
 836         if (qede_has_rx_work(rx_ring)) {
 837                 mutex_enter(&rx_ring->rx_lock);
 838                 mp = qede_process_rx_ring(rx_ring,
 839                     nbytes, npkts);
 840                 if (mp) {
 841                         *work_done += 1;
 842                 }
 843                 mutex_exit(&rx_ring->rx_lock);
 844         }
 845 
 846         return (mp);
 847 }
 848 
 849 /*
 850  * Parse the mblk to extract information
 851  * from the protocol headers.
 852  * The routine assumes that the l4 header is tcp. Also
 853  * it does not account for ipv6 headers since ipv6 lso is
 854  * unsupported
 855  */
 856 static void
 857 qede_pkt_parse_lso_headers(qede_tx_pktinfo_t *pktinfo, mblk_t *mp)
 858 {
 859         struct ether_header *eth_hdr =
 860             (struct ether_header *)(void *)mp->b_rptr;
 861         ipha_t *ip_hdr;
 862         struct tcphdr *tcp_hdr;
 863 
 864         /* mac header type and len */
 865         if (ntohs(eth_hdr->ether_type) == ETHERTYPE_IP) {
 866                 pktinfo->ether_type = ntohs(eth_hdr->ether_type);
 867                 pktinfo->mac_hlen = sizeof (struct ether_header);
 868         } else if (ntohs(eth_hdr->ether_type) == ETHERTYPE_VLAN) {
 869                 struct ether_vlan_header *vlan_hdr =
 870                     (struct ether_vlan_header *)(void *)mp->b_rptr;
 871                 pktinfo->ether_type = ntohs(vlan_hdr->ether_type);
 872                 pktinfo->mac_hlen = sizeof (struct ether_vlan_header);
 873         }
 874 
 875         /* ip header type and len */
 876         ip_hdr = (ipha_t *)(void *)((u8 *)mp->b_rptr + pktinfo->mac_hlen);
 877         pktinfo->ip_hlen = IPH_HDR_LENGTH(ip_hdr);
 878 
 879         /* Assume TCP protocol */
 880         pktinfo->l4_proto = 0x06;
 881 
 882         tcp_hdr = (struct tcphdr *)(void *)
 883             ((u8 *)mp->b_rptr + pktinfo->mac_hlen + pktinfo->ip_hlen);
 884         pktinfo->l4_hlen = TCP_HDR_LENGTH(tcp_hdr);
 885 
 886         
 887         pktinfo->total_hlen =
 888             pktinfo->mac_hlen +
 889             pktinfo->ip_hlen +
 890             pktinfo->l4_hlen;
 891 }
 892 
 893 static void
 894 qede_get_pkt_offload_info(qede_t *qede, mblk_t *mp,
 895     u32 *use_cksum, boolean_t *use_lso, uint16_t *mss)
 896 {
 897         u32 pflags;
 898 
 899         mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags);
 900 
 901         *use_cksum = pflags;
 902         if (qede->lso_enable) {
 903                 u32 pkt_mss = 0;
 904                 u32 lso_flags = 0;
 905 
 906                 mac_lso_get(mp, &pkt_mss, &lso_flags);
 907                 *use_lso = (lso_flags == HW_LSO);
 908                 *mss = (u16)pkt_mss;
 909         }
 910 }
 911 
 912 static void
 913 /* LINTED E_FUNC_ARG_UNUSED */
 914 qede_get_pkt_info(qede_t *qede, mblk_t *mp,
 915     qede_tx_pktinfo_t *pktinfo)
 916 {
 917         mblk_t *bp;
 918         size_t size;
 919         struct ether_header *eth_hdr =
 920             (struct ether_header *)(void *)mp->b_rptr;
 921 
 922         pktinfo->total_len = 0;
 923         pktinfo->mblk_no = 0;
 924 
 925         /*
 926          * Count the total length and the number of
 927          * chained mblks in the packet
 928          */
 929         for (bp = mp; bp != NULL; bp = bp->b_cont) {
 930                 size = MBLKL(bp);
 931                 if (size == 0) {
 932                         continue;
 933                 }
 934 
 935                 pktinfo->total_len += size;
 936                 pktinfo->mblk_no++;
 937         }
 938         /* mac header type and len */
 939         if (ntohs(eth_hdr->ether_type) == ETHERTYPE_IP) {
 940                 pktinfo->ether_type = ntohs(eth_hdr->ether_type);
 941                 pktinfo->mac_hlen = sizeof (struct ether_header);
 942         } else if (ntohs(eth_hdr->ether_type) == ETHERTYPE_VLAN) {
 943                 struct ether_vlan_header *vlan_hdr =
 944                     (struct ether_vlan_header *)(void *)mp->b_rptr;
 945                 pktinfo->ether_type = ntohs(vlan_hdr->ether_type);
 946                 pktinfo->mac_hlen = sizeof (struct ether_vlan_header);
 947         }
 948 
 949 }
 950 
 951 /*
 952  * Routine to sync dma mem for multiple
 953  * descriptors in a chain
 954  */
 955 void
 956 qede_desc_dma_mem_sync(ddi_dma_handle_t *dma_handle,
 957     uint_t start, uint_t count, uint_t range,
 958     uint_t unit_size, uint_t direction)
 959 {
 960         if ((start + count) < range) {
 961                 (void) ddi_dma_sync(*dma_handle,
 962                     start * unit_size, count * unit_size, direction);
 963         } else {
 964                 (void) ddi_dma_sync(*dma_handle, start * unit_size,
 965                     0, direction);
 966                 (void) ddi_dma_sync(*dma_handle, 0,
 967                     (start + count - range) * unit_size,
 968                     direction);
 969         }
 970 }
 971 
 972 /*
 973  * Send tx pkt by copying incoming packet in a
 974  * preallocated and mapped dma buffer
 975  * Not designed to handle lso for now
 976  */
 977 static enum qede_xmit_status
 978 qede_tx_bcopy(qede_tx_ring_t *tx_ring, mblk_t *mp, qede_tx_pktinfo_t *pktinfo)
 979 {
 980         qede_tx_bcopy_pkt_t *bcopy_pkt = NULL;
 981         /* Only one bd will be needed for bcopy packets */
 982         struct eth_tx_1st_bd *first_bd;
 983         u16 last_producer = tx_ring->sw_tx_prod;
 984         uint8_t *txb;
 985         mblk_t *bp;
 986         u32 mblen;
 987 
 988         bcopy_pkt = qede_get_bcopy_pkt(tx_ring);
 989         if (bcopy_pkt == NULL) {
 990                 qede_print_err("!%s(%d): entry NULL at _tx_ bcopy_list head",
 991                     __func__, tx_ring->qede->instance);
 992                 return (XMIT_FAILED);
 993         }
 994 
 995         /*
 996          * Copy the packet data to our copy
 997          * buffer
 998          */
 999         txb = bcopy_pkt->virt_addr;
1000 
1001         for (bp = mp; bp != NULL; bp = bp->b_cont) {
1002                 mblen = MBLKL(bp);
1003                 if (mblen == 0) {
1004                         continue;
1005                 }
1006                 bcopy(bp->b_rptr, txb, mblen);
1007                 txb += mblen;
1008         }
1009 
1010         (void) ddi_dma_sync(bcopy_pkt->dma_handle,
1011             0, pktinfo->total_len,
1012             DDI_DMA_SYNC_FORDEV);
1013 
1014 
1015         mutex_enter(&tx_ring->tx_lock);
1016         if (ecore_chain_get_elem_left(&tx_ring->tx_bd_ring)<
1017             QEDE_TX_COPY_PATH_PAUSE_THRESHOLD) {
1018                 tx_ring->tx_q_sleeping = 1;
1019                 qede_put_bcopy_pkt(tx_ring, bcopy_pkt);
1020                 mutex_exit(&tx_ring->tx_lock);
1021 #ifdef  DEBUG_TX_RECYCLE
1022                 qede_print_err("!%s(%d): Pausing tx queue",
1023                     __func__, tx_ring->qede->instance);
1024 #endif
1025                 return (XMIT_PAUSE_QUEUE);
1026         }
1027 
1028         first_bd = ecore_chain_produce(&tx_ring->tx_bd_ring);
1029         bzero(first_bd, sizeof (*first_bd));
1030         first_bd->data.nbds = 1;
1031         first_bd->data.bd_flags.bitfields =
1032             (1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
1033 
1034         if (pktinfo->cksum_flags & HCK_IPV4_HDRCKSUM) {
1035                 first_bd->data.bd_flags.bitfields |=
1036                     (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT);
1037         }
1038 
1039         if (pktinfo->cksum_flags & HCK_FULLCKSUM) {
1040                 first_bd->data.bd_flags.bitfields |=
1041                     (1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT);
1042         }
1043 
1044         BD_SET_ADDR_LEN(first_bd,
1045             bcopy_pkt->phys_addr,
1046             pktinfo->total_len);
1047 
1048         first_bd->data.bitfields |=
1049                 (pktinfo->total_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) 
1050                 << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
1051 
1052         tx_ring->tx_db.data.bd_prod =
1053             HOST_TO_LE_16(ecore_chain_get_prod_idx(&tx_ring->tx_bd_ring));
1054 
1055         tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].bcopy_pkt = bcopy_pkt;
1056         tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].dmah_entry =  NULL;
1057 
1058         tx_ring->sw_tx_prod++;
1059         tx_ring->sw_tx_prod &= TX_RING_MASK;
1060 
1061         (void) ddi_dma_sync(tx_ring->tx_bd_dmah,
1062             last_producer, sizeof (*first_bd),
1063             DDI_DMA_SYNC_FORDEV);
1064 
1065         QEDE_DOORBELL_WR(tx_ring, tx_ring->tx_db.raw);
1066         mutex_exit(&tx_ring->tx_lock);
1067 
1068         freemsg(mp);
1069 
1070         return (XMIT_DONE);
1071 }
1072 
1073 /*
1074  * Send tx packet by mapping the mp(kernel addr)
1075  * to an existing dma_handle in the driver
1076  */
1077 static enum qede_xmit_status
1078 qede_tx_mapped(qede_tx_ring_t *tx_ring, mblk_t *mp, qede_tx_pktinfo_t *pktinfo)
1079 {
1080         enum qede_xmit_status status = XMIT_FAILED;
1081         int ret;
1082         qede_dma_handle_entry_t *dmah_entry = NULL; 
1083         qede_dma_handle_entry_t *head = NULL, *tail = NULL, *hdl;
1084         struct eth_tx_1st_bd *first_bd;
1085         struct eth_tx_2nd_bd *second_bd = 0;
1086         struct eth_tx_3rd_bd *third_bd = 0;
1087         struct eth_tx_bd *tx_data_bd;
1088         struct eth_tx_bd local_bd[64] = { 0 };
1089         ddi_dma_cookie_t cookie[64];
1090         u32 ncookies, total_cookies = 0, max_cookies = 0, index = 0;
1091         ddi_dma_handle_t dma_handle;
1092         mblk_t *bp;
1093         u32 mblen;
1094         bool is_premapped = B_FALSE;
1095         u64 dma_premapped = 0, dma_bound = 0;
1096         u32 hdl_reserved = 0;
1097         u8 nbd = 0;
1098         int i, bd_index;
1099         u16 last_producer;
1100         qede_tx_recycle_list_t *tx_recycle_list = tx_ring->tx_recycle_list;
1101         u64 data_addr;
1102         size_t data_size;
1103 
1104         if (pktinfo->use_lso) {
1105                 /*
1106                  * For tso pkt, we can use as many as 255 bds
1107                  */
1108                 max_cookies = ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1;
1109                 qede_pkt_parse_lso_headers(pktinfo, mp);
1110         } else {
1111                 /*
1112                  * For non-tso packet, only 18 bds can be used
1113                  */
1114                 max_cookies = ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1;
1115         }
1116 
1117         for (bp = mp; bp != NULL; bp = bp->b_cont) {
1118                 mblen = MBLKL(bp);
1119                 if (mblen == 0) {
1120                         continue;
1121                 }
1122                 is_premapped = B_FALSE;
1123                 /*
1124                  * If the mblk is premapped then get the
1125                  * dma_handle and sync the dma mem. otherwise
1126                  * reserve an handle from the driver dma
1127                  * handles list
1128                  */
1129 #ifdef  DBLK_DMA_PREMAP
1130                 if (bp->b_datap->db_flags & DBLK_DMA_PREMAP) {
1131 #ifdef  DEBUG_PREMAP
1132                         qede_info(tx_ring->qede, "mp is premapped");
1133 #endif
1134                         tx_ring->tx_premap_count++;
1135                         ret = dblk_dma_info_get(tx_ring->pm_handle,
1136                             bp->b_rptr, mblen,
1137                             bp->b_datap, &cookie[index],
1138                             &ncookies, &dma_handle);
1139                         if (ret == DDI_DMA_MAPPED) {
1140                                 is_premapped = B_TRUE;
1141                                 dma_premapped++;
1142                                 (void) ddi_dma_sync(dma_handle, 0, 0,
1143                                     DDI_DMA_SYNC_FORDEV);
1144                         } else {
1145                                 tx_ring->tx_premap_fail++;
1146                         }
1147                 }
1148 #endif  /* DBLK_DMA_PREMAP */
1149 
1150                 if (!is_premapped) {
1151                         dmah_entry = qede_get_dmah_entry(tx_ring);
1152                         if (dmah_entry == NULL) {
1153                                 qede_info(tx_ring->qede, "dmah_entry NULL, "
1154                                     "Fallback to copy mode...");
1155                                 status = XMIT_FAILED;
1156                                 goto err_map;
1157                         }
1158         
1159                         if (ddi_dma_addr_bind_handle(dmah_entry->dma_handle,
1160                             NULL, (caddr_t)bp->b_rptr, mblen,
1161                             DDI_DMA_STREAMING | DDI_DMA_WRITE,
1162                             DDI_DMA_DONTWAIT, NULL, &cookie[index], &ncookies)
1163                             != DDI_DMA_MAPPED) {
1164 
1165 #ifdef DEBUG_PULLUP
1166                         qede_info(tx_ring->qede, "addr_bind() failed for "
1167                             "handle %p, len %d mblk_no %d tot_len 0x%x" 
1168                             " use_lso %d",  dmah_entry->dma_handle,
1169                             mblen, pktinfo->mblk_no, pktinfo->total_len, 
1170                             pktinfo->use_lso);
1171 
1172                         qede_info(tx_ring->qede, "Falling back to pullup");
1173 #endif
1174                                 status = XMIT_FALLBACK_PULLUP;
1175                                 tx_ring->tx_bind_fail++;
1176                                 goto err_map;
1177                         }
1178                         tx_ring->tx_bind_count++;
1179 
1180                         if (index == 0) {
1181                                 dmah_entry->mp = mp;
1182                         } else {
1183                                 dmah_entry->mp = NULL;
1184                         }
1185 
1186                         /* queue into recycle list for tx completion routine */
1187                         if (tail == NULL) {
1188                                 head = tail = dmah_entry;
1189                         } else {
1190                                 tail->next = dmah_entry;
1191                                 tail = dmah_entry;
1192                         }
1193 
1194                         hdl_reserved++;
1195                         dma_bound++;
1196                 } 
1197 
1198                 total_cookies += ncookies;
1199                 if (total_cookies > max_cookies) {
1200                         tx_ring->tx_too_many_cookies++;
1201 #ifdef DEBUG_PULLUP
1202                         qede_info(tx_ring->qede, 
1203                             "total_cookies > max_cookies, "
1204                             "pktlen %d, mb num %d",
1205                             pktinfo->total_len, pktinfo->mblk_no);
1206 #endif
1207                         status = XMIT_TOO_MANY_COOKIES;
1208                         goto err_map_sec;
1209                 }
1210 
1211                 if (is_premapped) {
1212                         index += ncookies;
1213                 } else {
1214                         index++;
1215                         /*
1216                          * Dec. ncookies since we already stored cookie[0]
1217                          */
1218                         ncookies--;
1219 
1220                         for (i = 0; i < ncookies; i++, index++)
1221                                 ddi_dma_nextcookie(dmah_entry->dma_handle,
1222                                     &cookie[index]);
1223                 }
1224         }
1225 
1226         /*
1227          * Guard against the case where we get a series of mblks that cause us
1228          * not to end up with any mapped data.
1229          */
1230         if (total_cookies == 0) {
1231                 status = XMIT_FAILED;
1232                 goto err_map_sec;
1233         }
1234 
1235         if (total_cookies > max_cookies) {
1236                 tx_ring->tx_too_many_cookies++;
1237                 status = XMIT_TOO_MANY_COOKIES;
1238                 goto err_map_sec;
1239         }
1240         first_bd = (struct eth_tx_1st_bd *)&local_bd[0];
1241 
1242         /*
1243          * Mark this bd as start bd
1244          */
1245         first_bd->data.bd_flags.bitfields =
1246             (1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
1247 
1248         if (pktinfo->cksum_flags & HCK_IPV4_HDRCKSUM) {
1249                 first_bd->data.bd_flags.bitfields |=
1250                     (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT);
1251         }
1252 
1253         if (pktinfo->cksum_flags & HCK_FULLCKSUM) {
1254                 first_bd->data.bd_flags.bitfields |=
1255                     (1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT);
1256         }
1257 
1258 
1259         /* Fill-up local bds with the tx data and flags */
1260         for (i = 0, bd_index = 0; i < total_cookies; i++, bd_index++) {
1261                 if (bd_index == 0) {
1262                         BD_SET_ADDR_LEN(first_bd,
1263                             cookie[i].dmac_laddress,
1264                             cookie[i].dmac_size);
1265 
1266                         if (pktinfo->use_lso) {
1267                         first_bd->data.bd_flags.bitfields |=
1268                             1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT;
1269 
1270                         second_bd = (struct eth_tx_2nd_bd *)&local_bd[1];
1271 
1272                         /*
1273                          * If the fisrt bd contains
1274                          * hdr + data (partial or full data), then spilt
1275                          * the hdr and data between 1st and 2nd
1276                          * bd respectively
1277                          */
1278                         if (first_bd->nbytes > pktinfo->total_hlen) {
1279                                 data_addr = cookie[0].dmac_laddress
1280                                     + pktinfo->total_hlen;
1281                                 data_size = cookie[i].dmac_size
1282                                     - pktinfo->total_hlen;
1283 
1284                                 BD_SET_ADDR_LEN(second_bd,
1285                                     data_addr,
1286                                     data_size);
1287 
1288                                 /*
1289                                  * First bd already contains the addr to
1290                                  * to start of pkt, just adjust the dma
1291                                  * len of first_bd
1292                                  */
1293                                 first_bd->nbytes = pktinfo->total_hlen;
1294                                 bd_index++;
1295                         } else if (first_bd->nbytes < pktinfo->total_hlen) {
1296 #ifdef DEBUG_PULLUP
1297                                 qede_info(tx_ring->qede, 
1298                                     "Headers not in single bd");
1299 #endif
1300                                 status = XMIT_FALLBACK_PULLUP;
1301                                 goto err_map_sec;
1302 
1303                         }
1304 
1305                         /*
1306                          * Third bd is used to indicates to fw
1307                          * that tso needs to be performed. It should
1308                          * be present even if only two cookies are
1309                          * needed for the mblk
1310                          */
1311                         third_bd = (struct eth_tx_3rd_bd *)&local_bd[2];
1312                         third_bd->data.lso_mss |=
1313                             HOST_TO_LE_16(pktinfo->mss);
1314                         third_bd->data.bitfields |=
1315                             1 << ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT;
1316                         }
1317 
1318                         continue;
1319                 }
1320 
1321                 tx_data_bd = &local_bd[bd_index];
1322                 BD_SET_ADDR_LEN(tx_data_bd,
1323                     cookie[i].dmac_laddress,
1324                     cookie[i].dmac_size);
1325         }
1326 
1327         if (pktinfo->use_lso) {
1328                 if (bd_index < 3) {
1329                         nbd = 3;
1330                 } else {
1331                         nbd = bd_index;
1332                 }
1333         } else {
1334                 nbd = total_cookies;
1335                 first_bd->data.bitfields |=
1336                     (pktinfo->total_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) 
1337                     << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
1338         }
1339 
1340         first_bd->data.nbds = nbd;
1341 
1342         mutex_enter(&tx_ring->tx_lock);
1343 
1344         /*
1345          * Before copying the local bds into actual,
1346          * check if we have enough on the bd_chain
1347          */
1348         if (ecore_chain_get_elem_left(&tx_ring->tx_bd_ring) <
1349             nbd) {
1350                 tx_ring->tx_q_sleeping = 1;
1351                 status = XMIT_PAUSE_QUEUE;
1352 #ifdef  DEBUG_TX_RECYCLE
1353                         qede_info(tx_ring->qede, "Pausing tx queue...");
1354 #endif
1355                 mutex_exit(&tx_ring->tx_lock);
1356                 goto err_map_sec ;
1357         }
1358 
1359         /* Copy the local_bd(s) into the actual bds */
1360         for (i = 0; i < nbd; i++) {
1361                 tx_data_bd = ecore_chain_produce(&tx_ring->tx_bd_ring);
1362                 bcopy(&local_bd[i], tx_data_bd, sizeof (*tx_data_bd));
1363         }
1364 
1365         last_producer = tx_ring->sw_tx_prod;
1366 
1367         tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].dmah_entry = head;
1368         tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].bcopy_pkt = NULL;
1369         tx_ring->sw_tx_prod = (tx_ring->sw_tx_prod + 1) & TX_RING_MASK;
1370 
1371         tx_ring->tx_db.data.bd_prod =
1372             HOST_TO_LE_16(ecore_chain_get_prod_idx(&tx_ring->tx_bd_ring));
1373 
1374         /* Sync the tx_bd dma mem */
1375         qede_desc_dma_mem_sync(&tx_ring->tx_bd_dmah,
1376             last_producer, nbd,
1377             tx_ring->tx_ring_size,
1378             sizeof (struct eth_tx_bd),
1379             DDI_DMA_SYNC_FORDEV);
1380 
1381         /*
1382          * Write to doorbell bar
1383          */
1384         QEDE_DOORBELL_WR(tx_ring, tx_ring->tx_db.raw);
1385 
1386         mutex_exit(&tx_ring->tx_lock);
1387 
1388         return (XMIT_DONE);
1389 err_map:
1390         if (dmah_entry != NULL) {
1391                 if (tail == NULL) {
1392                         head = tail = dmah_entry;
1393                 } else {
1394                         tail->next = dmah_entry;
1395                         tail = dmah_entry;
1396                 }
1397                 hdl_reserved++;
1398         }
1399 
1400 err_map_sec:
1401 
1402         hdl = head;
1403 
1404         while (hdl != NULL) {
1405                 (void) ddi_dma_unbind_handle(hdl->dma_handle);
1406                 hdl = hdl->next;
1407         }
1408 
1409         if (head != NULL) {
1410                 qede_put_dmah_entries(tx_ring, head);
1411         }
1412 
1413         return (status);
1414 }
1415 
1416 static enum qede_xmit_status
1417 qede_send_tx_packet(qede_t *qede, qede_tx_ring_t *tx_ring, mblk_t *mp)
1418 {
1419         boolean_t force_pullup = B_FALSE;
1420         enum qede_xmit_status status = XMIT_FAILED;
1421         enum qede_xmit_mode xmit_mode = USE_BCOPY;
1422         qede_tx_pktinfo_t pktinfo;
1423         mblk_t *original_mp = NULL, *pulled_up_mp = NULL;
1424         struct ether_vlan_header *ethvhdr;
1425 
1426         mutex_enter(&tx_ring->tx_lock);
1427         if (ecore_chain_get_elem_left(&tx_ring->tx_bd_ring) <
1428             qede->tx_recycle_threshold) {
1429 #ifdef  DEBUG_TX_RECYCLE
1430                 qede_info(qede, "Recyclycling from tx routine");
1431 #endif
1432                 if (qede_process_tx_completions(tx_ring) <
1433                     qede->tx_recycle_threshold) {
1434 #ifdef  DEBUG_TX_RECYCLE
1435                         qede_info(qede, "Still not enough bd after cleanup, "
1436                             "pausing tx queue...");
1437 #endif
1438                         tx_ring->tx_q_sleeping = 1;
1439                         mutex_exit(&tx_ring->tx_lock);
1440                         return (XMIT_PAUSE_QUEUE);
1441                 }
1442         }
1443 
1444         mutex_exit(&tx_ring->tx_lock);
1445 
1446         bzero(&pktinfo, sizeof (pktinfo));
1447 
1448         /* Get the offload reqd. on the pkt */
1449         qede_get_pkt_offload_info(qede, mp, &pktinfo.cksum_flags,
1450             &pktinfo.use_lso, &pktinfo.mss);
1451 
1452 do_pullup:
1453         if (force_pullup) {
1454                 tx_ring->tx_pullup_count++;
1455 #ifdef  DEBUG_PULLUP
1456                 qede_info(qede, "Pulling up original mp %p", mp);
1457 #endif
1458                 /*
1459                  * Try to accumulate all mblks of this pkt
1460                  * into a single mblk
1461                  */
1462                 original_mp = mp;
1463                 if ((pulled_up_mp = msgpullup(mp, -1)) != NULL) {
1464 #ifdef  DEBUG_PULLUP
1465                         qede_info(qede, "New mp %p, ori %p", pulled_up_mp, mp);
1466 #endif
1467                         /*
1468                          * Proceed with the new single
1469                          * mp
1470                          */
1471                         mp = pulled_up_mp;
1472                         xmit_mode = XMIT_MODE_UNUSED;
1473                         pktinfo.pulled_up = B_TRUE;
1474                 } else {
1475 #ifdef  DEBUG_PULLUP
1476                         qede_info(tx_ring->qede, "Pullup failed");
1477 #endif
1478                         status = XMIT_FAILED;
1479                         goto exit;
1480                 }
1481         }
1482 
1483         qede_get_pkt_info(qede, mp, &pktinfo);
1484 
1485 
1486         if ((!pktinfo.use_lso) && 
1487                  (pktinfo.total_len > (qede->mtu + pktinfo.mac_hlen))) {
1488                 qede_info(tx_ring->qede, 
1489                     "Packet drop as packet len 0x%x > 0x%x",
1490                     pktinfo.total_len, (qede->mtu + QEDE_MAX_ETHER_HDR));
1491                 status = XMIT_FAILED;
1492                 goto exit;
1493         }
1494 
1495 
1496 #ifdef  DEBUG_PULLUP
1497         if (force_pullup) {
1498         qede_print_err("!%s: mp %p, pktinfo : total_len %d,"
1499             " mblk_no %d, ether_type %d\n"
1500             "mac_hlen %d, ip_hlen %d, l4_hlen %d\n"
1501             "l4_proto %d, use_cksum:use_lso %d:%d mss %d", __func__, mp,
1502             pktinfo.total_len, pktinfo.mblk_no, pktinfo.ether_type,
1503             pktinfo.mac_hlen, pktinfo.ip_hlen, pktinfo.l4_hlen,
1504             pktinfo.l4_proto, pktinfo.cksum_flags, pktinfo.use_lso,
1505             pktinfo.mss);
1506         }
1507 #endif
1508 
1509 #ifdef  DEBUG_PREMAP
1510         if (DBLK_IS_PREMAPPED(mp->b_datap)) {
1511                 qede_print_err("!%s(%d): mp %p id PREMAPPMED",
1512                     __func__, qede->instance);
1513         }
1514 #endif
1515 
1516 #ifdef  DBLK_DMA_PREMAP 
1517         if (DBLK_IS_PREMAPPED(mp->b_datap) ||
1518             pktinfo.total_len > qede->tx_bcopy_threshold) {
1519                 xmit_mode = USE_DMA_BIND;
1520         }
1521 #else
1522         if (pktinfo.total_len > qede->tx_bcopy_threshold) {
1523                 xmit_mode = USE_DMA_BIND;
1524         }
1525 #endif
1526         
1527         if (pktinfo.total_len <= qede->tx_bcopy_threshold) {
1528                 xmit_mode = USE_BCOPY;
1529         }
1530 
1531         /*
1532          * if mac + ip hdr not in one contiguous block,
1533          * use copy mode
1534          */
1535         if (MBLKL(mp) < (ETHER_HEADER_LEN + IP_HEADER_LEN)) {
1536                 /*qede_info(qede, "mblk too small, using copy mode, len = %d", MBLKL(mp));*/
1537                 xmit_mode = USE_BCOPY;
1538         }
1539 
1540         if ((uintptr_t)mp->b_rptr & 1) {
1541                 xmit_mode = USE_BCOPY;
1542         }
1543 
1544         /*
1545          * if too many mblks and hence the dma cookies, needed
1546          * for tx, then use bcopy or pullup on packet
1547          * currently, ETH_TX_MAX_BDS_PER_NON_LSO_PACKET = 18
1548          */
1549         if (pktinfo.mblk_no > (ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1)) {
1550                 if (force_pullup) {
1551                         tx_ring->tx_too_many_mblks++;
1552                         status = XMIT_FAILED;
1553                         goto exit;
1554                 } else {
1555                         xmit_mode = USE_PULLUP;
1556                 }
1557         }
1558 
1559 #ifdef  TX_FORCE_COPY_MODE
1560         xmit_mode = USE_BCOPY;
1561 #elif   TX_FORCE_MAPPED_MODE
1562         xmit_mode = USE_DMA_BIND;
1563 #endif
1564 
1565 #ifdef  DEBUG_PULLUP
1566         if (force_pullup) {
1567                 qede_info(qede, "using mode %d on pulled mp %p",
1568                     xmit_mode, mp);
1569         }
1570 #endif
1571 
1572         /*
1573          * Use Mapped mode for the packet
1574          */
1575         if (xmit_mode == USE_DMA_BIND) {
1576                 status = qede_tx_mapped(tx_ring, mp, &pktinfo);
1577                 if (status == XMIT_DONE) {
1578                         if (pktinfo.use_lso) {
1579                                 tx_ring->tx_lso_pkt_count++;
1580                         } else if(pktinfo.total_len > 1518) {
1581                                 tx_ring->tx_jumbo_pkt_count++;
1582                         }
1583                         tx_ring->tx_mapped_pkts++;
1584                         goto exit;
1585                 } else if ((status == XMIT_TOO_MANY_COOKIES ||
1586                     (status == XMIT_FALLBACK_PULLUP)) && !force_pullup) {
1587                         xmit_mode = USE_PULLUP;
1588                 } else {
1589                         status = XMIT_FAILED;
1590                         goto exit;
1591                 }
1592         }
1593 
1594         if (xmit_mode == USE_BCOPY) {
1595                 status = qede_tx_bcopy(tx_ring, mp, &pktinfo);
1596                 if (status == XMIT_DONE) {
1597                         tx_ring->tx_copy_count++;
1598                         goto exit;
1599                 } else if ((status == XMIT_FALLBACK_PULLUP) &&
1600                     !force_pullup) {
1601                         xmit_mode = USE_PULLUP;
1602                 } else {
1603                         goto exit;
1604                 }
1605         }
1606 
1607         if (xmit_mode == USE_PULLUP) {
1608                 force_pullup = B_TRUE;
1609                 tx_ring->tx_pullup_count++;
1610                 goto do_pullup;
1611         }
1612 
1613 exit:
1614         if (status != XMIT_DONE) {
1615                 /*
1616                  * if msgpullup succeeded, but something else  failed,
1617                  * free the pulled-up msg and return original mblk to
1618                  * stack, indicating tx failure
1619                  */
1620                 if (pulled_up_mp) {
1621                         qede_info(qede, "tx failed, free pullup pkt %p", mp);
1622                         freemsg(pulled_up_mp);
1623                         mp = original_mp;
1624                 }
1625         } else {
1626                 tx_ring->tx_byte_count += pktinfo.total_len;
1627                 /*
1628                  * If tx was successfull after a pullup, then free the
1629                  * original mp. The pulled-up will be freed as part of
1630                  * tx completions processing
1631                  */
1632                 if (pulled_up_mp) {
1633 #ifdef  DEBUG_PULLUP
1634                         qede_info(qede, 
1635                             "success, free ori mp %p", original_mp);
1636 #endif
1637                         freemsg(original_mp);
1638                 }
1639         }
1640 
1641         return (status);
1642 }
1643 
1644 typedef uint32_t        ub4; /* unsigned 4-byte quantities */
1645 typedef uint8_t         ub1;
1646 
1647 #define hashsize(n)     ((ub4)1<<(n))
1648 #define hashmask(n)     (hashsize(n)-1)
1649 
1650 #define mix(a, b, c) \
1651 { \
1652         a -= b; a -= c; a ^= (c>>13); \
1653         b -= c; b -= a; b ^= (a<<8); \
1654         c -= a; c -= b; c ^= (b>>13); \
1655         a -= b; a -= c; a ^= (c>>12);  \
1656         b -= c; b -= a; b ^= (a<<16); \
1657         c -= a; c -= b; c ^= (b>>5); \
1658         a -= b; a -= c; a ^= (c>>3);  \
1659         b -= c; b -= a; b ^= (a<<10); \
1660         c -= a; c -= b; c ^= (b>>15); \
1661 }
1662 
1663 ub4
1664 hash(k, length, initval)
1665 register ub1 *k;        /* the key */
1666 register ub4 length;    /* the length of the key */
1667 register ub4 initval;   /* the previous hash, or an arbitrary value */
1668 {
1669         register ub4 a, b, c, len;
1670 
1671         /* Set up the internal state */
1672         len = length;
1673         a = b = 0x9e3779b9;     /* the golden ratio; an arbitrary value */
1674         c = initval;            /* the previous hash value */
1675 
1676         /* handle most of the key */
1677         while (len >= 12) 
1678         {
1679                 a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
1680                 b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
1681                 c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
1682                 mix(a, b, c);
1683                 k += 12;
1684                 len -= 12;
1685         }
1686 
1687         /* handle the last 11 bytes */
1688         c += length;
1689         /* all the case statements fall through */
1690         switch (len) 
1691         {
1692         /* FALLTHRU */
1693         case 11: 
1694                 c += ((ub4)k[10]<<24);
1695         /* FALLTHRU */
1696         case 10: 
1697                 c += ((ub4)k[9]<<16);
1698         /* FALLTHRU */
1699         case 9 : 
1700                 c += ((ub4)k[8]<<8);
1701         /* the first byte of c is reserved for the length */
1702         /* FALLTHRU */
1703         case 8 : 
1704                 b += ((ub4)k[7]<<24);
1705         /* FALLTHRU */
1706         case 7 : 
1707                 b += ((ub4)k[6]<<16);
1708         /* FALLTHRU */
1709         case 6 : 
1710                 b += ((ub4)k[5]<<8);
1711         /* FALLTHRU */
1712         case 5 : 
1713                 b += k[4];
1714         /* FALLTHRU */
1715         case 4 : 
1716                 a += ((ub4)k[3]<<24);
1717         /* FALLTHRU */
1718         case 3 : 
1719                 a += ((ub4)k[2]<<16);
1720         /* FALLTHRU */
1721         case 2 : 
1722                 a += ((ub4)k[1]<<8);
1723         /* FALLTHRU */
1724         case 1 : 
1725                 a += k[0];
1726         /* case 0: nothing left to add */
1727         }
1728         mix(a, b, c);
1729         /* report the result */
1730         return (c);
1731 }
1732 
1733 #ifdef  NO_CROSSBOW
1734 static uint8_t
1735 qede_hash_get_txq(qede_t *qede, caddr_t bp)
1736 {
1737         struct ip *iphdr = NULL;
1738         struct ether_header *ethhdr;
1739         struct ether_vlan_header *ethvhdr;
1740         struct tcphdr *tcp_hdr;
1741         struct udphdr *udp_hdr;
1742         uint32_t etherType;
1743         int mac_hdr_len, ip_hdr_len;
1744         uint32_t h = 0; /* 0 by default */
1745         uint8_t tx_ring_id = 0;
1746         uint32_t ip_src_addr = 0;
1747         uint32_t ip_desc_addr = 0;
1748         uint16_t src_port = 0;
1749         uint16_t dest_port = 0;
1750         uint8_t key[12];
1751 
1752         if (qede->num_fp == 1) {
1753                 return (tx_ring_id);
1754         }
1755 
1756         ethhdr = (struct ether_header *)((void *)bp);
1757         ethvhdr = (struct ether_vlan_header *)((void *)bp);
1758 
1759         /* Is this vlan packet? */
1760         if (ntohs(ethvhdr->ether_tpid) == ETHERTYPE_VLAN) {
1761                 mac_hdr_len = sizeof (struct ether_vlan_header);
1762                 etherType = ntohs(ethvhdr->ether_type);
1763         } else {
1764                 mac_hdr_len = sizeof (struct ether_header);
1765                 etherType = ntohs(ethhdr->ether_type);
1766         }
1767         /* Is this IPv4 or IPv6 packet? */
1768         if (etherType == ETHERTYPE_IP /* 0800 */) {
1769                 if (IPH_HDR_VERSION((ipha_t *)(void *)(bp+mac_hdr_len))
1770                     == IPV4_VERSION) {
1771                         iphdr = (struct ip *)(void *)(bp+mac_hdr_len);
1772                 }
1773                 if (((unsigned long)iphdr) & 0x3) {
1774                         /*  IP hdr not 4-byte aligned */
1775                         return (tx_ring_id);
1776                 }
1777         }
1778         /* ipV4 packets */
1779         if (iphdr) {
1780 
1781                 ip_hdr_len = IPH_HDR_LENGTH(iphdr);
1782                 ip_src_addr = iphdr->ip_src.s_addr;
1783                 ip_desc_addr = iphdr->ip_dst.s_addr;
1784 
1785                 if (iphdr->ip_p == IPPROTO_TCP) {
1786                         tcp_hdr = (struct tcphdr *)(void *)
1787                             ((uint8_t *)iphdr + ip_hdr_len);
1788                         src_port = tcp_hdr->th_sport;
1789                         dest_port = tcp_hdr->th_dport;
1790                 } else if (iphdr->ip_p == IPPROTO_UDP) {
1791                         udp_hdr = (struct udphdr *)(void *)
1792                             ((uint8_t *)iphdr + ip_hdr_len);
1793                         src_port = udp_hdr->uh_sport;
1794                         dest_port = udp_hdr->uh_dport;
1795                 }
1796                 key[0] = (uint8_t)((ip_src_addr) &0xFF);
1797                 key[1] = (uint8_t)((ip_src_addr >> 8) &0xFF);
1798                 key[2] = (uint8_t)((ip_src_addr >> 16) &0xFF);
1799                 key[3] = (uint8_t)((ip_src_addr >> 24) &0xFF);
1800                 key[4] = (uint8_t)((ip_desc_addr) &0xFF);
1801                 key[5] = (uint8_t)((ip_desc_addr >> 8) &0xFF);
1802                 key[6] = (uint8_t)((ip_desc_addr >> 16) &0xFF);
1803                 key[7] = (uint8_t)((ip_desc_addr >> 24) &0xFF);
1804                 key[8] = (uint8_t)((src_port) &0xFF);
1805                 key[9] = (uint8_t)((src_port >> 8) &0xFF);
1806                 key[10] = (uint8_t)((dest_port) &0xFF);
1807                 key[11] = (uint8_t)((dest_port >> 8) &0xFF);
1808                 h = hash(key, 12, 0); /* return 32 bit */
1809                 tx_ring_id = (h & (qede->num_fp - 1));
1810                 if (tx_ring_id >= qede->num_fp) {
1811                         cmn_err(CE_WARN, "%s bad tx_ring_id %d\n",
1812                             __func__, tx_ring_id);
1813                         tx_ring_id = 0;
1814                 }
1815         }
1816         return (tx_ring_id);
1817 }
1818 #endif
1819 
1820 mblk_t *
1821 qede_ring_tx(void *arg, mblk_t *mp)
1822 {
1823         qede_fastpath_t *fp = (qede_fastpath_t *)arg;
1824         qede_t *qede = fp->qede;
1825 #ifndef NO_CROSSBOW
1826         qede_tx_ring_t *tx_ring = fp->tx_ring[0];
1827 #else
1828         qede_tx_ring_t *tx_ring;
1829 #endif
1830         uint32_t ring_id;
1831         mblk_t *next = NULL;
1832         enum qede_xmit_status status = XMIT_FAILED;
1833         caddr_t bp;
1834 
1835         ASSERT(mp->b_next == NULL);
1836 
1837 #ifndef NO_CROSSBOW
1838         if (!fp || !tx_ring) {
1839                 qede_print_err("!%s: error, fp %p, tx_ring %p",
1840                     __func__, fp, tx_ring);
1841                 goto exit;
1842         }
1843 #endif
1844         if (qede->qede_state != QEDE_STATE_STARTED) {
1845                 qede_print_err("!%s(%d): qede_state %d invalid",
1846                     __func__, qede->instance, qede->qede_state);
1847                 goto exit;
1848         }
1849 
1850         if (!qede->params.link_state) {
1851                 qede_print_err("!%s(%d): Link !up for xmit",
1852                     __func__, qede->instance);
1853                 goto exit;
1854         }
1855 
1856         while (mp != NULL) {
1857 #ifdef  NO_CROSSBOW
1858                 /*
1859                  * Figure out which tx ring to send this packet to.
1860                  * Currently multiple rings are not exposed to mac layer
1861                  * and fanout done by driver
1862                  */
1863                 bp = (caddr_t)mp->b_rptr;
1864                 ring_id = qede_hash_get_txq(qede, bp);
1865                 fp = &qede->fp_array[ring_id];
1866                 tx_ring = fp->tx_ring[0];
1867 
1868                 if (qede->num_tc > 1) {
1869                         qede_info(qede, 
1870                             "Traffic classes(%d) > 1 not supported",
1871                             qede->num_tc);
1872                         goto exit;
1873                 }
1874 #endif
1875                 next = mp->b_next;
1876                 mp->b_next = NULL;
1877 
1878                 status = qede_send_tx_packet(qede, tx_ring, mp);
1879                 if (status == XMIT_DONE) {
1880                         tx_ring->tx_pkt_count++;
1881                         mp = next;
1882                 } else if (status == XMIT_PAUSE_QUEUE) {
1883                         tx_ring->tx_ring_pause++;
1884                         mp->b_next = next;
1885                         break;
1886                 } else if (status == XMIT_FAILED) {
1887                         goto exit;
1888                 }
1889         }
1890 
1891         return (mp);
1892 exit:
1893         tx_ring->tx_pkt_dropped++;
1894         freemsgchain(mp);
1895         mp = NULL;
1896         return (mp);
1897 }