1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
  24  * Copyright 2017 Joyent, Inc.
  25  */
  26 
  27 /*
  28  * Data-Link Driver
  29  */
  30 #include <sys/sysmacros.h>
  31 #include <sys/strsubr.h>
  32 #include <sys/strsun.h>
  33 #include <sys/vlan.h>
  34 #include <sys/dld_impl.h>
  35 #include <sys/mac_client.h>
  36 #include <sys/mac_client_impl.h>
  37 #include <sys/mac_client_priv.h>
  38 
  39 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
  40 
  41 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
  42     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
  43     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
  44     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
  45     proto_notify_req, proto_passive_req, proto_exclusive_req;
  46 
  47 static void proto_capability_advertise(dld_str_t *, mblk_t *);
  48 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
  49 static boolean_t check_mod_above(queue_t *, const char *);
  50 
  51 #define DL_ACK_PENDING(state) \
  52         ((state) == DL_ATTACH_PENDING || \
  53         (state) == DL_DETACH_PENDING || \
  54         (state) == DL_BIND_PENDING || \
  55         (state) == DL_UNBIND_PENDING)
  56 
  57 /*
  58  * Process a DLPI protocol message.
  59  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
  60  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
  61  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
  62  * as 'passive' and forbids it from being subsequently made 'active'
  63  * by the above primitives.
  64  */
  65 void
  66 dld_proto(dld_str_t *dsp, mblk_t *mp)
  67 {
  68         t_uscalar_t             prim;
  69 
  70         if (MBLKL(mp) < sizeof (t_uscalar_t)) {
  71                 freemsg(mp);
  72                 return;
  73         }
  74         prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
  75 
  76         switch (prim) {
  77         case DL_INFO_REQ:
  78                 proto_info_req(dsp, mp);
  79                 break;
  80         case DL_BIND_REQ:
  81                 proto_bind_req(dsp, mp);
  82                 break;
  83         case DL_UNBIND_REQ:
  84                 proto_unbind_req(dsp, mp);
  85                 break;
  86         case DL_UNITDATA_REQ:
  87                 proto_unitdata_req(dsp, mp);
  88                 break;
  89         case DL_UDQOS_REQ:
  90                 proto_udqos_req(dsp, mp);
  91                 break;
  92         case DL_ATTACH_REQ:
  93                 proto_attach_req(dsp, mp);
  94                 break;
  95         case DL_DETACH_REQ:
  96                 proto_detach_req(dsp, mp);
  97                 break;
  98         case DL_ENABMULTI_REQ:
  99                 proto_enabmulti_req(dsp, mp);
 100                 break;
 101         case DL_DISABMULTI_REQ:
 102                 proto_disabmulti_req(dsp, mp);
 103                 break;
 104         case DL_PROMISCON_REQ:
 105                 proto_promiscon_req(dsp, mp);
 106                 break;
 107         case DL_PROMISCOFF_REQ:
 108                 proto_promiscoff_req(dsp, mp);
 109                 break;
 110         case DL_PHYS_ADDR_REQ:
 111                 proto_physaddr_req(dsp, mp);
 112                 break;
 113         case DL_SET_PHYS_ADDR_REQ:
 114                 proto_setphysaddr_req(dsp, mp);
 115                 break;
 116         case DL_NOTIFY_REQ:
 117                 proto_notify_req(dsp, mp);
 118                 break;
 119         case DL_CAPABILITY_REQ:
 120                 proto_capability_req(dsp, mp);
 121                 break;
 122         case DL_PASSIVE_REQ:
 123                 proto_passive_req(dsp, mp);
 124                 break;
 125         case DL_EXCLUSIVE_REQ:
 126                 proto_exclusive_req(dsp, mp);
 127                 break;
 128         default:
 129                 proto_req(dsp, mp);
 130                 break;
 131         }
 132 }
 133 
 134 #define NEG(x)  -(x)
 135 typedef struct dl_info_ack_wrapper {
 136         dl_info_ack_t           dl_info;
 137         uint8_t                 dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
 138         uint8_t                 dl_brdcst_addr[MAXMACADDRLEN];
 139         dl_qos_cl_range1_t      dl_qos_range1;
 140         dl_qos_cl_sel1_t        dl_qos_sel1;
 141 } dl_info_ack_wrapper_t;
 142 
 143 /*
 144  * DL_INFO_REQ
 145  */
 146 static void
 147 proto_info_req(dld_str_t *dsp, mblk_t *mp)
 148 {
 149         dl_info_ack_wrapper_t   *dlwp;
 150         dl_info_ack_t           *dlp;
 151         dl_qos_cl_sel1_t        *selp;
 152         dl_qos_cl_range1_t      *rangep;
 153         uint8_t                 *addr;
 154         uint8_t                 *brdcst_addr;
 155         uint_t                  addr_length;
 156         uint_t                  sap_length;
 157         mac_info_t              minfo;
 158         mac_info_t              *minfop;
 159         queue_t                 *q = dsp->ds_wq;
 160 
 161         /*
 162          * Swap the request message for one large enough to contain the
 163          * wrapper structure defined above.
 164          */
 165         if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
 166             M_PCPROTO, 0)) == NULL)
 167                 return;
 168 
 169         bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
 170         dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
 171 
 172         dlp = &(dlwp->dl_info);
 173         ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
 174 
 175         dlp->dl_primitive = DL_INFO_ACK;
 176 
 177         /*
 178          * Set up the sub-structure pointers.
 179          */
 180         addr = dlwp->dl_addr;
 181         brdcst_addr = dlwp->dl_brdcst_addr;
 182         rangep = &(dlwp->dl_qos_range1);
 183         selp = &(dlwp->dl_qos_sel1);
 184 
 185         /*
 186          * This driver supports only version 2 connectionless DLPI provider
 187          * nodes.
 188          */
 189         dlp->dl_service_mode = DL_CLDLS;
 190         dlp->dl_version = DL_VERSION_2;
 191 
 192         /*
 193          * Set the style of the provider
 194          */
 195         dlp->dl_provider_style = dsp->ds_style;
 196         ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
 197             dlp->dl_provider_style == DL_STYLE2);
 198 
 199         /*
 200          * Set the current DLPI state.
 201          */
 202         dlp->dl_current_state = dsp->ds_dlstate;
 203 
 204         /*
 205          * Gratuitously set the media type. This is to deal with modules
 206          * that assume the media type is known prior to DL_ATTACH_REQ
 207          * being completed.
 208          */
 209         dlp->dl_mac_type = DL_ETHER;
 210 
 211         /*
 212          * If the stream is not at least attached we try to retrieve the
 213          * mac_info using mac_info_get()
 214          */
 215         if (dsp->ds_dlstate == DL_UNATTACHED ||
 216             dsp->ds_dlstate == DL_ATTACH_PENDING ||
 217             dsp->ds_dlstate == DL_DETACH_PENDING) {
 218                 if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
 219                         /*
 220                          * Cannot find mac_info. giving up.
 221                          */
 222                         goto done;
 223                 }
 224                 minfop = &minfo;
 225         } else {
 226                 minfop = (mac_info_t *)dsp->ds_mip;
 227                 /* We can only get the sdu if we're attached. */
 228                 mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
 229         }
 230 
 231         /*
 232          * Set the media type (properly this time).
 233          */
 234         if (dsp->ds_native)
 235                 dlp->dl_mac_type = minfop->mi_nativemedia;
 236         else
 237                 dlp->dl_mac_type = minfop->mi_media;
 238 
 239         /*
 240          * Set the DLSAP length. We only support 16 bit values and they
 241          * appear after the MAC address portion of DLSAP addresses.
 242          */
 243         sap_length = sizeof (uint16_t);
 244         dlp->dl_sap_length = NEG(sap_length);
 245 
 246         addr_length = minfop->mi_addr_length;
 247 
 248         /*
 249          * Copy in the media broadcast address.
 250          */
 251         if (minfop->mi_brdcst_addr != NULL) {
 252                 dlp->dl_brdcst_addr_offset =
 253                     (uintptr_t)brdcst_addr - (uintptr_t)dlp;
 254                 bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
 255                 dlp->dl_brdcst_addr_length = addr_length;
 256         }
 257 
 258         /* Only VLAN links and links that have a normal tag mode support QOS. */
 259         if ((dsp->ds_mch != NULL &&
 260             mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
 261             (dsp->ds_dlp != NULL &&
 262             dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
 263                 dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
 264                 dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
 265 
 266                 rangep->dl_qos_type = DL_QOS_CL_RANGE1;
 267                 rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
 268                 rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
 269                 rangep->dl_protection.dl_min = DL_UNKNOWN;
 270                 rangep->dl_protection.dl_max = DL_UNKNOWN;
 271                 rangep->dl_residual_error = DL_UNKNOWN;
 272 
 273                 /*
 274                  * Specify the supported range of priorities.
 275                  */
 276                 rangep->dl_priority.dl_min = 0;
 277                 rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
 278 
 279                 dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
 280                 dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
 281 
 282                 selp->dl_qos_type = DL_QOS_CL_SEL1;
 283                 selp->dl_trans_delay = DL_UNKNOWN;
 284                 selp->dl_protection = DL_UNKNOWN;
 285                 selp->dl_residual_error = DL_UNKNOWN;
 286 
 287                 /*
 288                  * Specify the current priority (which can be changed by
 289                  * the DL_UDQOS_REQ primitive).
 290                  */
 291                 selp->dl_priority = dsp->ds_pri;
 292         }
 293 
 294         dlp->dl_addr_length = addr_length + sizeof (uint16_t);
 295         if (dsp->ds_dlstate == DL_IDLE) {
 296                 /*
 297                  * The stream is bound. Therefore we can formulate a valid
 298                  * DLSAP address.
 299                  */
 300                 dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
 301                 if (addr_length > 0)
 302                         mac_unicast_primary_get(dsp->ds_mh, addr);
 303 
 304                 *(uint16_t *)(addr + addr_length) = dsp->ds_sap;
 305         }
 306 
 307 done:
 308         IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
 309         IMPLY(dlp->dl_qos_range_offset != 0,
 310             dlp->dl_qos_range_length != 0);
 311         IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
 312         IMPLY(dlp->dl_brdcst_addr_offset != 0,
 313             dlp->dl_brdcst_addr_length != 0);
 314 
 315         qreply(q, mp);
 316 }
 317 
 318 /*
 319  * DL_ATTACH_REQ
 320  */
 321 static void
 322 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
 323 {
 324         dl_attach_req_t *dlp = (dl_attach_req_t *)mp->b_rptr;
 325         int             err = 0;
 326         t_uscalar_t     dl_err;
 327         queue_t         *q = dsp->ds_wq;
 328 
 329         if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
 330             dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
 331                 dl_err = DL_BADPRIM;
 332                 goto failed;
 333         }
 334 
 335         if (dsp->ds_dlstate != DL_UNATTACHED) {
 336                 dl_err = DL_OUTSTATE;
 337                 goto failed;
 338         }
 339 
 340         dsp->ds_dlstate = DL_ATTACH_PENDING;
 341 
 342         err = dld_str_attach(dsp, dlp->dl_ppa);
 343         if (err != 0) {
 344                 switch (err) {
 345                 case ENOENT:
 346                         dl_err = DL_BADPPA;
 347                         err = 0;
 348                         break;
 349                 default:
 350                         dl_err = DL_SYSERR;
 351                         break;
 352                 }
 353                 dsp->ds_dlstate = DL_UNATTACHED;
 354                 goto failed;
 355         }
 356         ASSERT(dsp->ds_dlstate == DL_UNBOUND);
 357         dlokack(q, mp, DL_ATTACH_REQ);
 358         return;
 359 
 360 failed:
 361         dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
 362 }
 363 
 364 /*
 365  * DL_DETACH_REQ
 366  */
 367 static void
 368 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
 369 {
 370         queue_t         *q = dsp->ds_wq;
 371         t_uscalar_t     dl_err;
 372 
 373         if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
 374                 dl_err = DL_BADPRIM;
 375                 goto failed;
 376         }
 377 
 378         if (dsp->ds_dlstate != DL_UNBOUND) {
 379                 dl_err = DL_OUTSTATE;
 380                 goto failed;
 381         }
 382 
 383         if (dsp->ds_style == DL_STYLE1) {
 384                 dl_err = DL_BADPRIM;
 385                 goto failed;
 386         }
 387 
 388         ASSERT(dsp->ds_datathr_cnt == 0);
 389         dsp->ds_dlstate = DL_DETACH_PENDING;
 390 
 391         dld_str_detach(dsp);
 392         dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
 393         return;
 394 
 395 failed:
 396         dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
 397 }
 398 
 399 /*
 400  * DL_BIND_REQ
 401  */
 402 static void
 403 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
 404 {
 405         dl_bind_req_t   *dlp = (dl_bind_req_t *)mp->b_rptr;
 406         int             err = 0;
 407         uint8_t         dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
 408         uint_t          dlsap_addr_length;
 409         t_uscalar_t     dl_err;
 410         t_scalar_t      sap;
 411         queue_t         *q = dsp->ds_wq;
 412         mac_perim_handle_t      mph;
 413         void            *mdip;
 414         int32_t         intr_cpu;
 415 
 416         if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
 417                 dl_err = DL_BADPRIM;
 418                 goto failed;
 419         }
 420 
 421         if (dlp->dl_xidtest_flg != 0) {
 422                 dl_err = DL_NOAUTO;
 423                 goto failed;
 424         }
 425 
 426         if (dlp->dl_service_mode != DL_CLDLS) {
 427                 dl_err = DL_UNSUPPORTED;
 428                 goto failed;
 429         }
 430 
 431         if (dsp->ds_dlstate != DL_UNBOUND) {
 432                 dl_err = DL_OUTSTATE;
 433                 goto failed;
 434         }
 435 
 436         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 437 
 438         if ((err = dls_active_set(dsp)) != 0) {
 439                 dl_err = DL_SYSERR;
 440                 goto failed2;
 441         }
 442 
 443         dsp->ds_dlstate = DL_BIND_PENDING;
 444         /*
 445          * Set the receive callback.
 446          */
 447         dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
 448             dld_str_rx_raw : dld_str_rx_unitdata, dsp);
 449 
 450         /*
 451          * Bind the channel such that it can receive packets.
 452          */
 453         sap = dlp->dl_sap;
 454         dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
 455             !check_mod_above(dsp->ds_rq, "arp");
 456 
 457         err = dls_bind(dsp, sap);
 458         if (err != 0) {
 459                 switch (err) {
 460                 case EINVAL:
 461                         dl_err = DL_BADADDR;
 462                         err = 0;
 463                         break;
 464                 default:
 465                         dl_err = DL_SYSERR;
 466                         break;
 467                 }
 468 
 469                 dsp->ds_dlstate = DL_UNBOUND;
 470                 dls_active_clear(dsp, B_FALSE);
 471                 goto failed2;
 472         }
 473 
 474         intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
 475         mdip = mac_get_devinfo(dsp->ds_mh);
 476         mac_perim_exit(mph);
 477 
 478         /*
 479          * We do this after we get out of the perim to avoid deadlocks
 480          * etc. since part of mac_client_retarget_intr is to walk the
 481          * device tree in order to find and retarget the interrupts.
 482          */
 483         if (intr_cpu != -1)
 484                 mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
 485 
 486         /*
 487          * Copy in MAC address.
 488          */
 489         dlsap_addr_length = dsp->ds_mip->mi_addr_length;
 490         mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
 491 
 492         /*
 493          * Copy in the SAP.
 494          */
 495         *(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
 496         dlsap_addr_length += sizeof (uint16_t);
 497 
 498         dsp->ds_dlstate = DL_IDLE;
 499         dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
 500         return;
 501 
 502 failed2:
 503         mac_perim_exit(mph);
 504 failed:
 505         dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
 506 }
 507 
 508 /*
 509  * DL_UNBIND_REQ
 510  */
 511 static void
 512 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
 513 {
 514         queue_t         *q = dsp->ds_wq;
 515         t_uscalar_t     dl_err;
 516         mac_perim_handle_t      mph;
 517 
 518         if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
 519                 dl_err = DL_BADPRIM;
 520                 goto failed;
 521         }
 522 
 523         if (dsp->ds_dlstate != DL_IDLE) {
 524                 dl_err = DL_OUTSTATE;
 525                 goto failed;
 526         }
 527 
 528         mutex_enter(&dsp->ds_lock);
 529         while (dsp->ds_datathr_cnt != 0)
 530                 cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
 531 
 532         dsp->ds_dlstate = DL_UNBIND_PENDING;
 533         mutex_exit(&dsp->ds_lock);
 534 
 535         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 536         /*
 537          * Unbind the channel to stop packets being received.
 538          */
 539         dls_unbind(dsp);
 540 
 541         /*
 542          * Disable polling mode, if it is enabled.
 543          */
 544         (void) dld_capab_poll_disable(dsp, NULL);
 545 
 546         /*
 547          * Clear LSO flags.
 548          */
 549         dsp->ds_lso = B_FALSE;
 550         dsp->ds_lso_max = 0;
 551 
 552         /*
 553          * Clear the receive callback.
 554          */
 555         dls_rx_set(dsp, NULL, NULL);
 556         dsp->ds_direct = B_FALSE;
 557 
 558         /*
 559          * Set the mode back to the default (unitdata).
 560          */
 561         dsp->ds_mode = DLD_UNITDATA;
 562         dsp->ds_dlstate = DL_UNBOUND;
 563 
 564         dls_active_clear(dsp, B_FALSE);
 565         mac_perim_exit(mph);
 566         dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
 567         return;
 568 failed:
 569         dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
 570 }
 571 
 572 /*
 573  * DL_PROMISCON_REQ
 574  */
 575 static void
 576 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
 577 {
 578         dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
 579         int             err = 0;
 580         t_uscalar_t     dl_err;
 581         uint32_t        new_flags, promisc_saved;
 582         queue_t         *q = dsp->ds_wq;
 583         mac_perim_handle_t      mph;
 584 
 585         if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
 586                 dl_err = DL_BADPRIM;
 587                 goto failed;
 588         }
 589 
 590         if (dsp->ds_dlstate == DL_UNATTACHED ||
 591             DL_ACK_PENDING(dsp->ds_dlstate)) {
 592                 dl_err = DL_OUTSTATE;
 593                 goto failed;
 594         }
 595 
 596         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 597 
 598         new_flags = promisc_saved = dsp->ds_promisc;
 599         switch (dlp->dl_level) {
 600         case DL_PROMISC_SAP:
 601                 new_flags |= DLS_PROMISC_SAP;
 602                 break;
 603 
 604         case DL_PROMISC_MULTI:
 605                 new_flags |= DLS_PROMISC_MULTI;
 606                 break;
 607 
 608         case DL_PROMISC_PHYS:
 609                 new_flags |= DLS_PROMISC_PHYS;
 610                 break;
 611 
 612         case DL_PROMISC_RX_ONLY:
 613                 new_flags |= DLS_PROMISC_RX_ONLY;
 614                 break;
 615 
 616         case DL_PROMISC_FIXUPS:
 617                 new_flags |= DLS_PROMISC_FIXUPS;
 618                 break;
 619 
 620         default:
 621                 dl_err = DL_NOTSUPPORTED;
 622                 goto failed2;
 623         }
 624 
 625         if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
 626                 ASSERT(dsp->ds_promisc == promisc_saved);
 627                 dl_err = DL_SYSERR;
 628                 goto failed2;
 629         }
 630 
 631         /*
 632          * Adjust channel promiscuity.
 633          */
 634         err = dls_promisc(dsp, new_flags);
 635 
 636         if (err != 0) {
 637                 dl_err = DL_SYSERR;
 638                 dsp->ds_promisc = promisc_saved;
 639                 if (promisc_saved == 0)
 640                         dls_active_clear(dsp, B_FALSE);
 641                 goto failed2;
 642         }
 643 
 644         mac_perim_exit(mph);
 645 
 646         dlokack(q, mp, DL_PROMISCON_REQ);
 647         return;
 648 
 649 failed2:
 650         mac_perim_exit(mph);
 651 failed:
 652         dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
 653 }
 654 
 655 /*
 656  * DL_PROMISCOFF_REQ
 657  */
 658 static void
 659 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
 660 {
 661         dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
 662         int             err = 0;
 663         t_uscalar_t     dl_err;
 664         uint32_t        new_flags;
 665         queue_t         *q = dsp->ds_wq;
 666         mac_perim_handle_t      mph;
 667 
 668         if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
 669                 dl_err = DL_BADPRIM;
 670                 goto failed;
 671         }
 672 
 673         if (dsp->ds_dlstate == DL_UNATTACHED ||
 674             DL_ACK_PENDING(dsp->ds_dlstate)) {
 675                 dl_err = DL_OUTSTATE;
 676                 goto failed;
 677         }
 678 
 679         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 680 
 681         new_flags = dsp->ds_promisc;
 682         switch (dlp->dl_level) {
 683         case DL_PROMISC_SAP:
 684                 if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
 685                         dl_err = DL_NOTENAB;
 686                         goto failed2;
 687                 }
 688                 new_flags &= ~DLS_PROMISC_SAP;
 689                 break;
 690 
 691         case DL_PROMISC_MULTI:
 692                 if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
 693                         dl_err = DL_NOTENAB;
 694                         goto failed2;
 695                 }
 696                 new_flags &= ~DLS_PROMISC_MULTI;
 697                 break;
 698 
 699         case DL_PROMISC_PHYS:
 700                 if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
 701                         dl_err = DL_NOTENAB;
 702                         goto failed2;
 703                 }
 704                 new_flags &= ~DLS_PROMISC_PHYS;
 705                 break;
 706 
 707         case DL_PROMISC_RX_ONLY:
 708                 if (!(dsp->ds_promisc & DLS_PROMISC_RX_ONLY)) {
 709                         dl_err = DL_NOTENAB;
 710                         goto failed2;
 711                 }
 712                 new_flags &= ~DLS_PROMISC_RX_ONLY;
 713                 break;
 714 
 715         case DL_PROMISC_FIXUPS:
 716                 if (!(dsp->ds_promisc & DLS_PROMISC_FIXUPS)) {
 717                         dl_err = DL_NOTENAB;
 718                         goto failed2;
 719                 }
 720                 new_flags &= ~DLS_PROMISC_FIXUPS;
 721                 break;
 722 
 723         default:
 724                 dl_err = DL_NOTSUPPORTED;
 725                 goto failed2;
 726         }
 727 
 728         /*
 729          * Adjust channel promiscuity.
 730          */
 731         err = dls_promisc(dsp, new_flags);
 732 
 733         if (err != 0) {
 734                 dl_err = DL_SYSERR;
 735                 goto failed2;
 736         }
 737 
 738         ASSERT(dsp->ds_promisc == new_flags);
 739         if (dsp->ds_promisc == 0)
 740                 dls_active_clear(dsp, B_FALSE);
 741 
 742         mac_perim_exit(mph);
 743 
 744         dlokack(q, mp, DL_PROMISCOFF_REQ);
 745         return;
 746 failed2:
 747         mac_perim_exit(mph);
 748 failed:
 749         dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
 750 }
 751 
 752 /*
 753  * DL_ENABMULTI_REQ
 754  */
 755 static void
 756 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
 757 {
 758         dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
 759         int             err = 0;
 760         t_uscalar_t     dl_err;
 761         queue_t         *q = dsp->ds_wq;
 762         mac_perim_handle_t      mph;
 763 
 764         if (dsp->ds_dlstate == DL_UNATTACHED ||
 765             DL_ACK_PENDING(dsp->ds_dlstate)) {
 766                 dl_err = DL_OUTSTATE;
 767                 goto failed;
 768         }
 769 
 770         if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
 771             !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
 772             dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
 773                 dl_err = DL_BADPRIM;
 774                 goto failed;
 775         }
 776 
 777         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 778 
 779         if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
 780                 dl_err = DL_SYSERR;
 781                 goto failed2;
 782         }
 783 
 784         err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
 785         if (err != 0) {
 786                 switch (err) {
 787                 case EINVAL:
 788                         dl_err = DL_BADADDR;
 789                         err = 0;
 790                         break;
 791                 case ENOSPC:
 792                         dl_err = DL_TOOMANY;
 793                         err = 0;
 794                         break;
 795                 default:
 796                         dl_err = DL_SYSERR;
 797                         break;
 798                 }
 799                 if (dsp->ds_dmap == NULL)
 800                         dls_active_clear(dsp, B_FALSE);
 801                 goto failed2;
 802         }
 803 
 804         mac_perim_exit(mph);
 805 
 806         dlokack(q, mp, DL_ENABMULTI_REQ);
 807         return;
 808 
 809 failed2:
 810         mac_perim_exit(mph);
 811 failed:
 812         dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
 813 }
 814 
 815 /*
 816  * DL_DISABMULTI_REQ
 817  */
 818 static void
 819 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
 820 {
 821         dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
 822         int             err = 0;
 823         t_uscalar_t     dl_err;
 824         queue_t         *q = dsp->ds_wq;
 825         mac_perim_handle_t      mph;
 826 
 827         if (dsp->ds_dlstate == DL_UNATTACHED ||
 828             DL_ACK_PENDING(dsp->ds_dlstate)) {
 829                 dl_err = DL_OUTSTATE;
 830                 goto failed;
 831         }
 832 
 833         if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
 834             !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
 835             dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
 836                 dl_err = DL_BADPRIM;
 837                 goto failed;
 838         }
 839 
 840         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 841         err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
 842         if ((err == 0) && (dsp->ds_dmap == NULL))
 843                 dls_active_clear(dsp, B_FALSE);
 844         mac_perim_exit(mph);
 845 
 846         if (err != 0) {
 847         switch (err) {
 848                 case EINVAL:
 849                         dl_err = DL_BADADDR;
 850                         err = 0;
 851                         break;
 852 
 853                 case ENOENT:
 854                         dl_err = DL_NOTENAB;
 855                         err = 0;
 856                         break;
 857 
 858                 default:
 859                         dl_err = DL_SYSERR;
 860                         break;
 861                 }
 862                 goto failed;
 863         }
 864         dlokack(q, mp, DL_DISABMULTI_REQ);
 865         return;
 866 failed:
 867         dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
 868 }
 869 
 870 /*
 871  * DL_PHYS_ADDR_REQ
 872  */
 873 static void
 874 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
 875 {
 876         dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
 877         queue_t         *q = dsp->ds_wq;
 878         t_uscalar_t     dl_err = 0;
 879         char            *addr = NULL;
 880         uint_t          addr_length;
 881 
 882         if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
 883                 dl_err = DL_BADPRIM;
 884                 goto done;
 885         }
 886 
 887         if (dsp->ds_dlstate == DL_UNATTACHED ||
 888             DL_ACK_PENDING(dsp->ds_dlstate)) {
 889                 dl_err = DL_OUTSTATE;
 890                 goto done;
 891         }
 892 
 893         addr_length = dsp->ds_mip->mi_addr_length;
 894         if (addr_length > 0) {
 895                 addr = kmem_alloc(addr_length, KM_SLEEP);
 896                 switch (dlp->dl_addr_type) {
 897                 case DL_CURR_PHYS_ADDR:
 898                         mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
 899                         break;
 900                 case DL_FACT_PHYS_ADDR:
 901                         bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
 902                         break;
 903                 case DL_CURR_DEST_ADDR:
 904                         if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
 905                                 dl_err = DL_NOTSUPPORTED;
 906                         break;
 907                 default:
 908                         dl_err = DL_UNSUPPORTED;
 909                 }
 910         }
 911 done:
 912         if (dl_err == 0)
 913                 dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
 914         else
 915                 dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
 916         if (addr != NULL)
 917                 kmem_free(addr, addr_length);
 918 }
 919 
 920 /*
 921  * DL_SET_PHYS_ADDR_REQ
 922  */
 923 static void
 924 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
 925 {
 926         dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
 927         int             err = 0;
 928         t_uscalar_t     dl_err;
 929         queue_t         *q = dsp->ds_wq;
 930         mac_perim_handle_t      mph;
 931 
 932         if (dsp->ds_dlstate == DL_UNATTACHED ||
 933             DL_ACK_PENDING(dsp->ds_dlstate)) {
 934                 dl_err = DL_OUTSTATE;
 935                 goto failed;
 936         }
 937 
 938         if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
 939             !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
 940             dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
 941                 dl_err = DL_BADPRIM;
 942                 goto failed;
 943         }
 944 
 945         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 946 
 947         if ((err = dls_active_set(dsp)) != 0) {
 948                 dl_err = DL_SYSERR;
 949                 goto failed2;
 950         }
 951 
 952         /*
 953          * If mac-nospoof is enabled and the link is owned by a
 954          * non-global zone, changing the mac address is not allowed.
 955          */
 956         if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
 957             mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
 958                 dls_active_clear(dsp, B_FALSE);
 959                 err = EACCES;
 960                 goto failed2;
 961         }
 962 
 963         err = mac_unicast_primary_set(dsp->ds_mh,
 964             mp->b_rptr + dlp->dl_addr_offset);
 965         if (err != 0) {
 966                 switch (err) {
 967                 case EINVAL:
 968                         dl_err = DL_BADADDR;
 969                         err = 0;
 970                         break;
 971 
 972                 default:
 973                         dl_err = DL_SYSERR;
 974                         break;
 975                 }
 976                 dls_active_clear(dsp, B_FALSE);
 977                 goto failed2;
 978 
 979         }
 980 
 981         mac_perim_exit(mph);
 982 
 983         dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
 984         return;
 985 
 986 failed2:
 987         mac_perim_exit(mph);
 988 failed:
 989         dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
 990 }
 991 
 992 /*
 993  * DL_UDQOS_REQ
 994  */
 995 static void
 996 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
 997 {
 998         dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
 999         dl_qos_cl_sel1_t *selp;
1000         int             off, len;
1001         t_uscalar_t     dl_err;
1002         queue_t         *q = dsp->ds_wq;
1003 
1004         off = dlp->dl_qos_offset;
1005         len = dlp->dl_qos_length;
1006 
1007         if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
1008                 dl_err = DL_BADPRIM;
1009                 goto failed;
1010         }
1011 
1012         selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
1013         if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
1014                 dl_err = DL_BADQOSTYPE;
1015                 goto failed;
1016         }
1017 
1018         if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1019             selp->dl_priority < 0) {
1020                 dl_err = DL_BADQOSPARAM;
1021                 goto failed;
1022         }
1023 
1024         dsp->ds_pri = selp->dl_priority;
1025         dlokack(q, mp, DL_UDQOS_REQ);
1026         return;
1027 failed:
1028         dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1029 }
1030 
1031 static boolean_t
1032 check_mod_above(queue_t *q, const char *mod)
1033 {
1034         queue_t         *next_q;
1035         boolean_t       ret = B_TRUE;
1036 
1037         claimstr(q);
1038         next_q = q->q_next;
1039         if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1040                 ret = B_FALSE;
1041         releasestr(q);
1042         return (ret);
1043 }
1044 
1045 /*
1046  * DL_CAPABILITY_REQ
1047  */
1048 static void
1049 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1050 {
1051         dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1052         dl_capability_sub_t *sp;
1053         size_t          size, len;
1054         offset_t        off, end;
1055         t_uscalar_t     dl_err;
1056         queue_t         *q = dsp->ds_wq;
1057 
1058         if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1059                 dl_err = DL_BADPRIM;
1060                 goto failed;
1061         }
1062 
1063         if (dsp->ds_dlstate == DL_UNATTACHED ||
1064             DL_ACK_PENDING(dsp->ds_dlstate)) {
1065                 dl_err = DL_OUTSTATE;
1066                 goto failed;
1067         }
1068 
1069         /*
1070          * This request is overloaded. If there are no requested capabilities
1071          * then we just want to acknowledge with all the capabilities we
1072          * support. Otherwise we enable the set of capabilities requested.
1073          */
1074         if (dlp->dl_sub_length == 0) {
1075                 proto_capability_advertise(dsp, mp);
1076                 return;
1077         }
1078 
1079         if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1080                 dl_err = DL_BADPRIM;
1081                 goto failed;
1082         }
1083 
1084         dlp->dl_primitive = DL_CAPABILITY_ACK;
1085 
1086         off = dlp->dl_sub_offset;
1087         len = dlp->dl_sub_length;
1088 
1089         /*
1090          * Walk the list of capabilities to be enabled.
1091          */
1092         for (end = off + len; off < end; ) {
1093                 sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1094                 size = sizeof (dl_capability_sub_t) + sp->dl_length;
1095 
1096                 if (off + size > end ||
1097                     !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1098                         dl_err = DL_BADPRIM;
1099                         goto failed;
1100                 }
1101 
1102                 switch (sp->dl_cap) {
1103                 /*
1104                  * TCP/IP checksum offload to hardware.
1105                  */
1106                 case DL_CAPAB_HCKSUM: {
1107                         dl_capab_hcksum_t *hcksump;
1108                         dl_capab_hcksum_t hcksum;
1109 
1110                         hcksump = (dl_capab_hcksum_t *)&sp[1];
1111                         /*
1112                          * Copy for alignment.
1113                          */
1114                         bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1115                         dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1116                         bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1117                         break;
1118                 }
1119 
1120                 case DL_CAPAB_DLD: {
1121                         dl_capab_dld_t  *dldp;
1122                         dl_capab_dld_t  dld;
1123 
1124                         dldp = (dl_capab_dld_t *)&sp[1];
1125                         /*
1126                          * Copy for alignment.
1127                          */
1128                         bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1129                         dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1130                         bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1131                         break;
1132                 }
1133                 default:
1134                         break;
1135                 }
1136                 off += size;
1137         }
1138         qreply(q, mp);
1139         return;
1140 failed:
1141         dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1142 }
1143 
1144 /*
1145  * DL_NOTIFY_REQ
1146  */
1147 static void
1148 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1149 {
1150         dl_notify_req_t *dlp = (dl_notify_req_t *)mp->b_rptr;
1151         t_uscalar_t     dl_err;
1152         queue_t         *q = dsp->ds_wq;
1153         uint_t          note =
1154             DL_NOTE_PROMISC_ON_PHYS |
1155             DL_NOTE_PROMISC_OFF_PHYS |
1156             DL_NOTE_PHYS_ADDR |
1157             DL_NOTE_LINK_UP |
1158             DL_NOTE_LINK_DOWN |
1159             DL_NOTE_CAPAB_RENEG |
1160             DL_NOTE_FASTPATH_FLUSH |
1161             DL_NOTE_SPEED |
1162             DL_NOTE_SDU_SIZE|
1163             DL_NOTE_SDU_SIZE2|
1164             DL_NOTE_ALLOWED_IPS;
1165 
1166         if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1167                 dl_err = DL_BADPRIM;
1168                 goto failed;
1169         }
1170 
1171         if (dsp->ds_dlstate == DL_UNATTACHED ||
1172             DL_ACK_PENDING(dsp->ds_dlstate)) {
1173                 dl_err = DL_OUTSTATE;
1174                 goto failed;
1175         }
1176 
1177         note &= ~(mac_no_notification(dsp->ds_mh));
1178 
1179         /*
1180          * Cache the notifications that are being enabled.
1181          */
1182         dsp->ds_notifications = dlp->dl_notifications & note;
1183         /*
1184          * The ACK carries all notifications regardless of which set is
1185          * being enabled.
1186          */
1187         dlnotifyack(q, mp, note);
1188 
1189         /*
1190          * Generate DL_NOTIFY_IND messages for each enabled notification.
1191          */
1192         if (dsp->ds_notifications != 0) {
1193                 dld_str_notify_ind(dsp);
1194         }
1195         return;
1196 failed:
1197         dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1198 }
1199 
1200 /*
1201  * DL_UINTDATA_REQ
1202  */
1203 void
1204 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1205 {
1206         queue_t                 *q = dsp->ds_wq;
1207         dl_unitdata_req_t       *dlp = (dl_unitdata_req_t *)mp->b_rptr;
1208         off_t                   off;
1209         size_t                  len, size;
1210         const uint8_t           *addr;
1211         uint16_t                sap;
1212         uint_t                  addr_length;
1213         mblk_t                  *bp, *payload;
1214         uint32_t                start, stuff, end, value, flags;
1215         t_uscalar_t             dl_err;
1216         uint_t                  max_sdu;
1217 
1218         if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1219                 dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1220                 return;
1221         }
1222 
1223         mutex_enter(&dsp->ds_lock);
1224         if (dsp->ds_dlstate != DL_IDLE) {
1225                 mutex_exit(&dsp->ds_lock);
1226                 dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1227                 return;
1228         }
1229         DLD_DATATHR_INC(dsp);
1230         mutex_exit(&dsp->ds_lock);
1231 
1232         addr_length = dsp->ds_mip->mi_addr_length;
1233 
1234         off = dlp->dl_dest_addr_offset;
1235         len = dlp->dl_dest_addr_length;
1236 
1237         if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1238                 dl_err = DL_BADPRIM;
1239                 goto failed;
1240         }
1241 
1242         if (len != addr_length + sizeof (uint16_t)) {
1243                 dl_err = DL_BADADDR;
1244                 goto failed;
1245         }
1246 
1247         addr = mp->b_rptr + off;
1248         sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1249 
1250         /*
1251          * Check the length of the packet and the block types.
1252          */
1253         size = 0;
1254         payload = mp->b_cont;
1255         for (bp = payload; bp != NULL; bp = bp->b_cont) {
1256                 if (DB_TYPE(bp) != M_DATA)
1257                         goto baddata;
1258 
1259                 size += MBLKL(bp);
1260         }
1261 
1262         mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1263         if (size > max_sdu)
1264                 goto baddata;
1265 
1266         /*
1267          * Build a packet header.
1268          */
1269         if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1270             &payload)) == NULL) {
1271                 dl_err = DL_BADADDR;
1272                 goto failed;
1273         }
1274 
1275         /*
1276          * We no longer need the M_PROTO header, so free it.
1277          */
1278         freeb(mp);
1279 
1280         /*
1281          * Transfer the checksum offload information if it is present.
1282          */
1283         hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1284             &flags);
1285         (void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1286 
1287         /*
1288          * Link the payload onto the new header.
1289          */
1290         ASSERT(bp->b_cont == NULL);
1291         bp->b_cont = payload;
1292 
1293         /*
1294          * No lock can be held across modules and putnext()'s,
1295          * which can happen here with the call from DLD_TX().
1296          */
1297         if (DLD_TX(dsp, bp, 0, 0) != NULL) {
1298                 /* flow-controlled */
1299                 DLD_SETQFULL(dsp);
1300         }
1301         DLD_DATATHR_DCR(dsp);
1302         return;
1303 
1304 failed:
1305         dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1306         DLD_DATATHR_DCR(dsp);
1307         return;
1308 
1309 baddata:
1310         dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1311         DLD_DATATHR_DCR(dsp);
1312 }
1313 
1314 /*
1315  * DL_PASSIVE_REQ
1316  */
1317 static void
1318 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1319 {
1320         t_uscalar_t dl_err;
1321 
1322         /*
1323          * If we've already become active by issuing an active primitive,
1324          * then it's too late to try to become passive.
1325          */
1326         if (dsp->ds_passivestate == DLD_ACTIVE ||
1327             dsp->ds_passivestate == DLD_EXCLUSIVE) {
1328                 dl_err = DL_OUTSTATE;
1329                 goto failed;
1330         }
1331 
1332         if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1333                 dl_err = DL_BADPRIM;
1334                 goto failed;
1335         }
1336 
1337         dsp->ds_passivestate = DLD_PASSIVE;
1338         dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1339         return;
1340 failed:
1341         dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1342 }
1343 
1344 
1345 /*
1346  * Catch-all handler.
1347  */
1348 static void
1349 proto_req(dld_str_t *dsp, mblk_t *mp)
1350 {
1351         union DL_primitives     *dlp = (union DL_primitives *)mp->b_rptr;
1352 
1353         dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1354 }
1355 
1356 static int
1357 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1358 {
1359         switch (flags) {
1360         case DLD_ENABLE:
1361                 mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1362                 return (0);
1363 
1364         case DLD_DISABLE:
1365                 mac_perim_exit((mac_perim_handle_t)data);
1366                 return (0);
1367 
1368         case DLD_QUERY:
1369                 return (mac_perim_held(dsp->ds_mh));
1370         }
1371         return (0);
1372 }
1373 
1374 static int
1375 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1376 {
1377         dld_capab_direct_t      *direct = data;
1378 
1379         ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1380 
1381         switch (flags) {
1382         case DLD_ENABLE:
1383                 dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1384                     direct->di_rx_ch);
1385 
1386                 if (direct->di_flags & DI_DIRECT_RAW) {
1387                         direct->di_tx_df =
1388                             (uintptr_t)str_mdata_raw_fastpath_put;
1389                 } else {
1390                         direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1391                 }
1392                 direct->di_tx_dh = dsp;
1393                 direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1394                 direct->di_tx_cb_dh = dsp->ds_mch;
1395                 direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1396                 direct->di_tx_fctl_dh = dsp->ds_mch;
1397 
1398                 dsp->ds_direct = B_TRUE;
1399 
1400                 return (0);
1401 
1402         case DLD_DISABLE:
1403                 dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1404                     dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1405                 dsp->ds_direct = B_FALSE;
1406 
1407                 return (0);
1408         }
1409         return (ENOTSUP);
1410 }
1411 
1412 /*
1413  * dld_capab_poll_enable()
1414  *
1415  * This function is misnamed. All polling  and fanouts are run out of the
1416  * lower mac (in case of VNIC and the only mac in case of NICs). The
1417  * availability of Rx ring and promiscous mode is all taken care between
1418  * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any
1419  * fanout necessary is done by the soft rings that are part of the
1420  * mac_srs (by default mac_srs sends the packets up via a TCP and
1421  * non TCP soft ring).
1422  *
1423  * The mac_srs (or its associated soft rings) always store the ill_rx_ring
1424  * (the cookie returned when they registered with IP during plumb) as their
1425  * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1426  * function and 1st argument is what the caller registered when they
1427  * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1428  * the function is vnic_rx and argument is vnic_t. For regular NIC
1429  * case, it mac_rx_default and mac_handle_t. As explained above, the
1430  * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1431  * from its stored 2nd argument.
1432  */
1433 static int
1434 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1435 {
1436         if (dsp->ds_polling)
1437                 return (EINVAL);
1438 
1439         if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1440                 return (ENOTSUP);
1441 
1442         /*
1443          * Enable client polling if and only if DLS bypass is possible.
1444          * Special cases like VLANs need DLS processing in the Rx data path.
1445          * In such a case we can neither allow the client (IP) to directly
1446          * poll the softring (since DLS processing hasn't been done) nor can
1447          * we allow DLS bypass.
1448          */
1449         if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1450                 return (ENOTSUP);
1451 
1452         /*
1453          * Register soft ring resources. This will come in handy later if
1454          * the user decides to modify CPU bindings to use more CPUs for the
1455          * device in which case we will switch to fanout using soft rings.
1456          */
1457         mac_resource_set_common(dsp->ds_mch,
1458             (mac_resource_add_t)poll->poll_ring_add_cf,
1459             (mac_resource_remove_t)poll->poll_ring_remove_cf,
1460             (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1461             (mac_resource_restart_t)poll->poll_ring_restart_cf,
1462             (mac_resource_bind_t)poll->poll_ring_bind_cf,
1463             poll->poll_ring_ch);
1464 
1465         mac_client_poll_enable(dsp->ds_mch);
1466 
1467         dsp->ds_polling = B_TRUE;
1468         return (0);
1469 }
1470 
1471 /* ARGSUSED */
1472 static int
1473 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1474 {
1475         if (!dsp->ds_polling)
1476                 return (EINVAL);
1477 
1478         mac_client_poll_disable(dsp->ds_mch);
1479         mac_resource_set(dsp->ds_mch, NULL, NULL);
1480 
1481         dsp->ds_polling = B_FALSE;
1482         return (0);
1483 }
1484 
1485 static int
1486 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1487 {
1488         dld_capab_poll_t        *poll = data;
1489 
1490         ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1491 
1492         switch (flags) {
1493         case DLD_ENABLE:
1494                 return (dld_capab_poll_enable(dsp, poll));
1495         case DLD_DISABLE:
1496                 return (dld_capab_poll_disable(dsp, poll));
1497         }
1498         return (ENOTSUP);
1499 }
1500 
1501 static int
1502 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1503 {
1504         dld_capab_lso_t         *lso = data;
1505 
1506         ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1507 
1508         switch (flags) {
1509         case DLD_ENABLE: {
1510                 mac_capab_lso_t         mac_lso;
1511 
1512                 /*
1513                  * Check if LSO is supported on this MAC & enable LSO
1514                  * accordingly.
1515                  */
1516                 if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1517                         lso->lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1518                         lso->lso_flags = 0;
1519                         /* translate the flag for mac clients */
1520                         if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1521                                 lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1522                         dsp->ds_lso = B_TRUE;
1523                         dsp->ds_lso_max = lso->lso_max;
1524                 } else {
1525                         dsp->ds_lso = B_FALSE;
1526                         dsp->ds_lso_max = 0;
1527                         return (ENOTSUP);
1528                 }
1529                 return (0);
1530         }
1531         case DLD_DISABLE: {
1532                 dsp->ds_lso = B_FALSE;
1533                 dsp->ds_lso_max = 0;
1534                 return (0);
1535         }
1536         }
1537         return (ENOTSUP);
1538 }
1539 
1540 static int
1541 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1542 {
1543         int     err;
1544 
1545         /*
1546          * Don't enable direct callback capabilities unless the caller is
1547          * the IP client. When a module is inserted in a stream (_I_INSERT)
1548          * the stack initiates capability disable, but due to races, the
1549          * module insertion may complete before the capability disable
1550          * completes. So we limit the check to DLD_ENABLE case.
1551          */
1552         if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1553             ((dsp->ds_sap != ETHERTYPE_IP ||
1554             !check_mod_above(dsp->ds_rq, "ip")) &&
1555             !check_mod_above(dsp->ds_rq, "vnd"))) {
1556                 return (ENOTSUP);
1557         }
1558 
1559         switch (type) {
1560         case DLD_CAPAB_DIRECT:
1561                 err = dld_capab_direct(dsp, data, flags);
1562                 break;
1563 
1564         case DLD_CAPAB_POLL:
1565                 err =  dld_capab_poll(dsp, data, flags);
1566                 break;
1567 
1568         case DLD_CAPAB_PERIM:
1569                 err = dld_capab_perim(dsp, data, flags);
1570                 break;
1571 
1572         case DLD_CAPAB_LSO:
1573                 err = dld_capab_lso(dsp, data, flags);
1574                 break;
1575 
1576         default:
1577                 err = ENOTSUP;
1578                 break;
1579         }
1580 
1581         return (err);
1582 }
1583 
1584 /*
1585  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1586  */
1587 static void
1588 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1589 {
1590         dl_capability_ack_t     *dlap;
1591         dl_capability_sub_t     *dlsp;
1592         size_t                  subsize;
1593         dl_capab_dld_t          dld;
1594         dl_capab_hcksum_t       hcksum;
1595         dl_capab_zerocopy_t     zcopy;
1596         dl_capab_vrrp_t         vrrp;
1597         mac_capab_vrrp_t        vrrp_capab;
1598         uint8_t                 *ptr;
1599         queue_t                 *q = dsp->ds_wq;
1600         mblk_t                  *mp1;
1601         boolean_t               hcksum_capable = B_FALSE;
1602         boolean_t               zcopy_capable = B_FALSE;
1603         boolean_t               dld_capable = B_FALSE;
1604         boolean_t               vrrp_capable = B_FALSE;
1605 
1606         /*
1607          * Initially assume no capabilities.
1608          */
1609         subsize = 0;
1610 
1611         /*
1612          * Check if checksum offload is supported on this MAC.
1613          */
1614         bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1615         if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1616             &hcksum.hcksum_txflags)) {
1617                 if (hcksum.hcksum_txflags != 0) {
1618                         hcksum_capable = B_TRUE;
1619                         subsize += sizeof (dl_capability_sub_t) +
1620                             sizeof (dl_capab_hcksum_t);
1621                 }
1622         }
1623 
1624         /*
1625          * Check if zerocopy is supported on this interface.
1626          * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1627          * then reserve space for that capability.
1628          */
1629         if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1630             !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1631                 zcopy_capable = B_TRUE;
1632                 subsize += sizeof (dl_capability_sub_t) +
1633                     sizeof (dl_capab_zerocopy_t);
1634         }
1635 
1636         /*
1637          * Direct capability negotiation interface between IP/VND and DLD. Note
1638          * that for vnd we only allow the case where the media type is the
1639          * native media type so we know that there are no transformations that
1640          * would have to happen to the mac header that it receives.
1641          */
1642         if ((dsp->ds_sap == ETHERTYPE_IP &&
1643             check_mod_above(dsp->ds_rq, "ip")) ||
1644             (check_mod_above(dsp->ds_rq, "vnd") &&
1645             dsp->ds_mip->mi_media == dsp->ds_mip->mi_nativemedia)) {
1646                 dld_capable = B_TRUE;
1647                 subsize += sizeof (dl_capability_sub_t) +
1648                     sizeof (dl_capab_dld_t);
1649         }
1650 
1651         /*
1652          * Check if vrrp is supported on this interface. If so, reserve
1653          * space for that capability.
1654          */
1655         if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1656                 vrrp_capable = B_TRUE;
1657                 subsize += sizeof (dl_capability_sub_t) +
1658                     sizeof (dl_capab_vrrp_t);
1659         }
1660 
1661         /*
1662          * If there are no capabilities to advertise or if we
1663          * can't allocate a response, send a DL_ERROR_ACK.
1664          */
1665         if ((mp1 = reallocb(mp,
1666             sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1667                 dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1668                 return;
1669         }
1670 
1671         mp = mp1;
1672         DB_TYPE(mp) = M_PROTO;
1673         mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1674         bzero(mp->b_rptr, MBLKL(mp));
1675         dlap = (dl_capability_ack_t *)mp->b_rptr;
1676         dlap->dl_primitive = DL_CAPABILITY_ACK;
1677         dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1678         dlap->dl_sub_length = subsize;
1679         ptr = (uint8_t *)&dlap[1];
1680 
1681         /*
1682          * TCP/IP checksum offload.
1683          */
1684         if (hcksum_capable) {
1685                 dlsp = (dl_capability_sub_t *)ptr;
1686 
1687                 dlsp->dl_cap = DL_CAPAB_HCKSUM;
1688                 dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1689                 ptr += sizeof (dl_capability_sub_t);
1690 
1691                 hcksum.hcksum_version = HCKSUM_VERSION_1;
1692                 dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1693                 bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1694                 ptr += sizeof (dl_capab_hcksum_t);
1695         }
1696 
1697         /*
1698          * Zero copy
1699          */
1700         if (zcopy_capable) {
1701                 dlsp = (dl_capability_sub_t *)ptr;
1702 
1703                 dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1704                 dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1705                 ptr += sizeof (dl_capability_sub_t);
1706 
1707                 bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1708                 zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1709                 zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1710 
1711                 dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1712                 bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1713                 ptr += sizeof (dl_capab_zerocopy_t);
1714         }
1715 
1716         /*
1717          * VRRP capability negotiation
1718          */
1719         if (vrrp_capable) {
1720                 dlsp = (dl_capability_sub_t *)ptr;
1721                 dlsp->dl_cap = DL_CAPAB_VRRP;
1722                 dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1723                 ptr += sizeof (dl_capability_sub_t);
1724 
1725                 bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1726                 vrrp.vrrp_af = vrrp_capab.mcv_af;
1727                 bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1728                 ptr += sizeof (dl_capab_vrrp_t);
1729         }
1730 
1731         /*
1732          * Direct capability negotiation interface between IP and DLD.
1733          * Refer to dld.h for details.
1734          */
1735         if (dld_capable) {
1736                 dlsp = (dl_capability_sub_t *)ptr;
1737                 dlsp->dl_cap = DL_CAPAB_DLD;
1738                 dlsp->dl_length = sizeof (dl_capab_dld_t);
1739                 ptr += sizeof (dl_capability_sub_t);
1740 
1741                 bzero(&dld, sizeof (dl_capab_dld_t));
1742                 dld.dld_version = DLD_CURRENT_VERSION;
1743                 dld.dld_capab = (uintptr_t)dld_capab;
1744                 dld.dld_capab_handle = (uintptr_t)dsp;
1745 
1746                 dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1747                 bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1748                 ptr += sizeof (dl_capab_dld_t);
1749         }
1750 
1751         ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1752         qreply(q, mp);
1753 }
1754 
1755 /*
1756  * Disable any enabled capabilities.
1757  */
1758 void
1759 dld_capabilities_disable(dld_str_t *dsp)
1760 {
1761         if (dsp->ds_polling)
1762                 (void) dld_capab_poll_disable(dsp, NULL);
1763 }
1764 
1765 static void
1766 proto_exclusive_req(dld_str_t *dsp, mblk_t *mp)
1767 {
1768         int ret = 0;
1769         t_uscalar_t dl_err;
1770         mac_perim_handle_t mph;
1771 
1772         if (dsp->ds_passivestate != DLD_UNINITIALIZED) {
1773                 dl_err = DL_OUTSTATE;
1774                 goto failed;
1775         }
1776 
1777         if (MBLKL(mp) < DL_EXCLUSIVE_REQ_SIZE) {
1778                 dl_err = DL_BADPRIM;
1779                 goto failed;
1780         }
1781 
1782         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
1783         ret = dls_exclusive_set(dsp, B_TRUE);
1784         mac_perim_exit(mph);
1785 
1786         if (ret != 0) {
1787                 dl_err = DL_SYSERR;
1788                 goto failed;
1789         }
1790 
1791         dsp->ds_passivestate = DLD_EXCLUSIVE;
1792         dlokack(dsp->ds_wq, mp, DL_EXCLUSIVE_REQ);
1793         return;
1794 failed:
1795         dlerrorack(dsp->ds_wq, mp, DL_EXCLUSIVE_REQ, dl_err, (t_uscalar_t)ret);
1796 }